{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4007514088916719, "eval_steps": 500, "global_step": 320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1310.375, "completions/mean_terminated_length": 1120.75, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.0012523481527864746, "frac_reward_zero_std": 0.0, "grad_norm": 2.4675665797659936, "kl": 0.0014476776123046875, "learning_rate": 0.0, "loss": -0.0042, "num_tokens": 47606.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0425715446472168, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.020242706942291286, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08320206610241015, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 1 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1215.625, "completions/mean_terminated_length": 1120.8333740234375, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.002504696305572949, "frac_reward_zero_std": 0.0, "grad_norm": 3.5220841352987073, "kl": 0.002323150634765625, "learning_rate": 1.25e-08, "loss": -0.0365, "num_tokens": 78984.0, "reward": 0.0, "reward_std": 0.9615500569343567, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.019240361081273367, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0375240418925418, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 2 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1430.8125, "completions/mean_terminated_length": 1341.857177734375, "completions/min_length": 1171.0, "completions/min_terminated_length": 1171.0, "epoch": 0.003757044458359424, "frac_reward_zero_std": 0.0, "grad_norm": 2.7257956401904653, "kl": 0.0018787384033203125, "learning_rate": 2.5e-08, "loss": -0.014, "num_tokens": 126437.0, "reward": 7.450580596923828e-09, "reward_std": 1.0492231845855713, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09708628067006185, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16724793667635054, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 3 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1463.4375, "completions/mean_terminated_length": 1353.75, "completions/min_length": 1084.0, "completions/min_terminated_length": 1084.0, "epoch": 0.005009392611145898, "frac_reward_zero_std": 0.0, "grad_norm": 3.0068456094040337, "kl": 0.00238037109375, "learning_rate": 3.75e-08, "loss": -0.0103, "num_tokens": 192900.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4076952338218689, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.42554686388976987, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3748667411110748, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14580555290954889, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.006261740763932373, "frac_reward_zero_std": 0.0, "grad_norm": 3.0250923226839315, "kl": 0.002262115478515625, "learning_rate": 5e-08, "loss": 0.0001, "num_tokens": 257452.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9494391083717346, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0021633155301854353, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04003332867073718, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09583937179043475, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 5 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1213.4375, "completions/mean_terminated_length": 1041.5, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.007514088916718848, "frac_reward_zero_std": 0.0, "grad_norm": 3.520673181444066, "kl": 0.002166748046875, "learning_rate": 6.25e-08, "loss": -0.0047, "num_tokens": 300227.0, "reward": 1.4901161193847656e-08, "reward_std": 1.000030517578125, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1494053837623106, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21650138601325905, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6291666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921942, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 6 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1441.5, "completions/mean_terminated_length": 1266.0, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.008766437069505322, "frac_reward_zero_std": 0.0, "grad_norm": 3.131914910533151, "kl": 0.002285003662109375, "learning_rate": 7.5e-08, "loss": -0.0115, "num_tokens": 365811.0, "reward": 0.0, "reward_std": 1.021754264831543, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.20434821411964987, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13055976557133547, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08509254221575907, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 7 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1037.0, "completions/mean_length": 1197.6875, "completions/mean_terminated_length": 895.375, "completions/min_length": 718.0, "completions/min_terminated_length": 718.0, "epoch": 0.010018785222291797, "frac_reward_zero_std": 0.0, "grad_norm": 3.1709545933290264, "kl": 0.001911163330078125, "learning_rate": 8.75e-08, "loss": 0.0124, "num_tokens": 406590.0, "reward": 0.0, "reward_std": 0.7096362113952637, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011512278889933215, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.017023573988747046, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07588978362901863, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 1424.8125, "completions/mean_terminated_length": 1299.5, "completions/min_length": 1222.0, "completions/min_terminated_length": 1222.0, "epoch": 0.011271133375078271, "frac_reward_zero_std": 0.0, "grad_norm": 2.4985696146916663, "kl": 0.0014972686767578125, "learning_rate": 1e-07, "loss": 0.0017, "num_tokens": 449955.0, "reward": 2.9802322387695312e-08, "reward_std": 0.592147946357727, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09093222668860702, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16366647482965233, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10741060020797315, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 9 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1497.0, "completions/mean_terminated_length": 1452.0, "completions/min_length": 1452.0, "completions/min_terminated_length": 1452.0, "epoch": 0.012523481527864746, "frac_reward_zero_std": 0.0, "grad_norm": 2.994231395858393, "kl": 0.002552032470703125, "learning_rate": 1.125e-07, "loss": 0.0008, "num_tokens": 512611.0, "reward": 0.0, "reward_std": 0.7100945115089417, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.39335439512941156, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.44383620756924225, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11013459778666118, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 10 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1319.0, "completions/mean_terminated_length": 1258.666748046875, "completions/min_length": 1147.0, "completions/min_terminated_length": 1147.0, "epoch": 0.013775829680651221, "frac_reward_zero_std": 0.0, "grad_norm": 3.29188099989823, "kl": 0.002773284912109375, "learning_rate": 1.25e-07, "loss": -0.0193, "num_tokens": 578363.0, "reward": 0.0, "reward_std": 0.7537417411804199, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.006668674614171876, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06272286484055771, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.49583333333333335, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15581327856693655, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 11 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1362.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 856.4375, "completions/mean_terminated_length": 856.4375, "completions/min_length": 689.0, "completions/min_terminated_length": 689.0, "epoch": 0.015028177833437696, "frac_reward_zero_std": 0.0, "grad_norm": 3.02451725178761, "kl": 0.0014524459838867188, "learning_rate": 1.375e-07, "loss": -0.0034, "num_tokens": 624626.0, "reward": 0.0, "reward_std": 0.25382307171821594, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.16171540881469407, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04512392405527899, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.17293758240303758, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1088.5625, "completions/mean_terminated_length": 901.5454711914062, "completions/min_length": 674.0, "completions/min_terminated_length": 674.0, "epoch": 0.01628052598622417, "frac_reward_zero_std": 0.0, "grad_norm": 4.015402694119637, "kl": 0.0021724700927734375, "learning_rate": 1.5e-07, "loss": -0.0824, "num_tokens": 681059.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9276120662689209, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.041562779715464626, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1909826248378845, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 13 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1496.5625, "completions/mean_terminated_length": 1445.0, "completions/min_length": 1445.0, "completions/min_terminated_length": 1445.0, "epoch": 0.017532874139010644, "frac_reward_zero_std": 0.0, "grad_norm": 2.995346934747371, "kl": 0.002460479736328125, "learning_rate": 1.625e-07, "loss": -0.001, "num_tokens": 745196.0, "reward": 1.4901161193847656e-08, "reward_std": 1.008323073387146, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.057098024958501865, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10812840498160957, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1398411797560202, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 14 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1267.8125, "completions/mean_terminated_length": 1128.5, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.01878522229179712, "frac_reward_zero_std": 0.0, "grad_norm": 3.681468707345247, "kl": 0.002552032470703125, "learning_rate": 1.75e-07, "loss": -0.0258, "num_tokens": 802777.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5409140586853027, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.025752634294563932, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1190717918627845, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6375, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10318986456114838, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 15 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1465.75, "completions/mean_terminated_length": 1317.3333740234375, "completions/min_length": 1240.0, "completions/min_terminated_length": 1240.0, "epoch": 0.020037570444583593, "frac_reward_zero_std": 0.0, "grad_norm": 2.952391425850165, "kl": 0.00229644775390625, "learning_rate": 1.875e-07, "loss": 0.0085, "num_tokens": 852397.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9532216191291809, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.009914003172755002, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14279656209744931, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1085254706406647, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1464.1875, "completions/mean_terminated_length": 1356.75, "completions/min_length": 1308.0, "completions/min_terminated_length": 1308.0, "epoch": 0.021289918597370068, "frac_reward_zero_std": 0.0, "grad_norm": 2.6835952376597447, "kl": 0.0016937255859375, "learning_rate": 2e-07, "loss": -0.0182, "num_tokens": 905544.0, "reward": -2.9802322387695312e-08, "reward_std": 0.670647144317627, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0385939635652747, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11140246797780545, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14950535726806533, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 17 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1226.6875, "completions/mean_terminated_length": 1163.615478515625, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.022542266750156543, "frac_reward_zero_std": 0.0, "grad_norm": 4.118592346302386, "kl": 0.0031585693359375, "learning_rate": 2.1249999999999998e-07, "loss": -0.014, "num_tokens": 958635.0, "reward": -3.725290298461914e-09, "reward_std": 1.0170769691467285, "rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.004864839675281578, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0373192839130601, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1192569587999888, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 18 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1492.125, "completions/mean_terminated_length": 1374.0, "completions/min_length": 1374.0, "completions/min_terminated_length": 1374.0, "epoch": 0.023794614902943018, "frac_reward_zero_std": 0.0, "grad_norm": 2.9483970425927475, "kl": 0.0020732879638671875, "learning_rate": 2.25e-07, "loss": 0.0003, "num_tokens": 1017653.0, "reward": 0.0, "reward_std": 0.8760651350021362, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.014058396075366015, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03110460490345673, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 19 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1290.125, "completions/mean_terminated_length": 1164.2000732421875, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.025046963055729492, "frac_reward_zero_std": 0.0, "grad_norm": 3.3136168175643763, "kl": 0.002140045166015625, "learning_rate": 2.3749999999999998e-07, "loss": -0.0256, "num_tokens": 1065663.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9537639617919922, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.038097750035485885, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1082295867822669, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0850925422157591, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 20 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 1341.5, "completions/mean_terminated_length": 1183.0, "completions/min_length": 1034.0, "completions/min_terminated_length": 1034.0, "epoch": 0.026299311208515967, "frac_reward_zero_std": 0.0, "grad_norm": 3.306897182610288, "kl": 0.0024871826171875, "learning_rate": 2.5e-07, "loss": -0.0094, "num_tokens": 1117263.0, "reward": 0.0, "reward_std": 0.990053117275238, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011397748892334698, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.046758634855771405, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13743685418725538, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 21 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 1266.625, "completions/mean_terminated_length": 1033.25, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.027551659361302442, "frac_reward_zero_std": 0.0, "grad_norm": 3.3250640108747564, "kl": 0.002407073974609375, "learning_rate": 2.625e-07, "loss": -0.0385, "num_tokens": 1172489.0, "reward": 0.0, "reward_std": 0.7966146469116211, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.020326344256082304, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14616918176802837, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0787635937708768, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 22 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 1460.875, "completions/mean_terminated_length": 1187.0, "completions/min_length": 1138.0, "completions/min_terminated_length": 1138.0, "epoch": 0.028804007514088917, "frac_reward_zero_std": 0.0, "grad_norm": 3.4267377669243926, "kl": 0.0071010589599609375, "learning_rate": 2.75e-07, "loss": -0.0111, "num_tokens": 1234527.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5723245143890381, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02122072131733574, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.157410051166117, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 23 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1325.625, "completions/mean_terminated_length": 1151.25, "completions/min_length": 1018.0, "completions/min_terminated_length": 1018.0, "epoch": 0.03005635566687539, "frac_reward_zero_std": 0.0, "grad_norm": 3.24473326800092, "kl": 0.0023651123046875, "learning_rate": 2.8749999999999995e-07, "loss": -0.0069, "num_tokens": 1269905.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0385103225708008, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04647400767345873, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09647557054247557, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.675, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15371932093796678, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 24 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1377.0625, "completions/mean_terminated_length": 1321.181884765625, "completions/min_length": 1206.0, "completions/min_terminated_length": 1206.0, "epoch": 0.031308703819661866, "frac_reward_zero_std": 0.0, "grad_norm": 2.6845626042385518, "kl": 0.001850128173828125, "learning_rate": 3e-07, "loss": 0.0174, "num_tokens": 1328778.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7787291407585144, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09266568639996468, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0822707712414604, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 25 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1169.5625, "completions/mean_terminated_length": 1093.3077392578125, "completions/min_length": 721.0, "completions/min_terminated_length": 721.0, "epoch": 0.03256105197244834, "frac_reward_zero_std": 0.0, "grad_norm": 3.712859953207578, "kl": 0.00261688232421875, "learning_rate": 3.1249999999999997e-07, "loss": 0.0167, "num_tokens": 1363011.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0016117095947266, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1709176314049482, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1600211117254044, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11800816042090449, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 26 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1215.9375, "completions/mean_terminated_length": 1197.0001220703125, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.033813400125234816, "frac_reward_zero_std": 0.0, "grad_norm": 3.399525494329125, "kl": 0.002574920654296875, "learning_rate": 3.25e-07, "loss": 0.0013, "num_tokens": 1407434.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9290227890014648, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03979791227452069, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13950243126020834, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08073734277593314, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 27 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 1340.4375, "completions/mean_terminated_length": 1180.875, "completions/min_length": 1034.0, "completions/min_terminated_length": 1034.0, "epoch": 0.03506574827802129, "frac_reward_zero_std": 0.0, "grad_norm": 2.9566553373980344, "kl": 0.0021228790283203125, "learning_rate": 3.375e-07, "loss": -0.0056, "num_tokens": 1458241.0, "reward": 0.0, "reward_std": 0.7809990644454956, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.028119487654073606, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1198837710832071, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6083333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0873477511423713, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 28 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1401.625, "completions/mean_terminated_length": 1275.1429443359375, "completions/min_length": 1054.0, "completions/min_terminated_length": 1054.0, "epoch": 0.036318096430807766, "frac_reward_zero_std": 0.0, "grad_norm": 2.9381632846830548, "kl": 0.002338409423828125, "learning_rate": 3.5e-07, "loss": 0.0125, "num_tokens": 1523987.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9863969087600708, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06145046632658874, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08502220502724643, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6041666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11538983843829063, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 29 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1257.625, "completions/mean_terminated_length": 1069.111083984375, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.03757044458359424, "frac_reward_zero_std": 0.0, "grad_norm": 3.4028418647289094, "kl": 0.0042324066162109375, "learning_rate": 3.6249999999999997e-07, "loss": -0.0044, "num_tokens": 1582341.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9369316697120667, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.058010557784549034, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06029435215775259, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 30 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 1480.25, "completions/mean_terminated_length": 1184.0, "completions/min_length": 1184.0, "completions/min_terminated_length": 1184.0, "epoch": 0.038822792736380715, "frac_reward_zero_std": 0.0, "grad_norm": 2.842249981197029, "kl": 0.0021514892578125, "learning_rate": 3.75e-07, "loss": -0.0106, "num_tokens": 1629017.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0243444442749023, "rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010824625533504566, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.02884739427994731, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11474609652039004, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 31 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1032.0, "completions/mean_terminated_length": 668.0, "completions/min_length": 294.0, "completions/min_terminated_length": 294.0, "epoch": 0.040075140889167186, "frac_reward_zero_std": 0.0, "grad_norm": 3.8067119735652115, "kl": 0.0024871826171875, "learning_rate": 3.875e-07, "loss": 0.0413, "num_tokens": 1666305.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8114193677902222, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05789475536948171, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04045242685812858, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14707015206910487, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1271.25, "completions/mean_terminated_length": 1195.0, "completions/min_length": 1030.0, "completions/min_terminated_length": 1030.0, "epoch": 0.041327489041953665, "frac_reward_zero_std": 0.0, "grad_norm": 3.1322197700429, "kl": 0.00167083740234375, "learning_rate": 4e-07, "loss": -0.0393, "num_tokens": 1727629.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4495465159416199, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05764457052515048, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11640629412276694, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 33 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1438.0625, "completions/mean_terminated_length": 1334.8333740234375, "completions/min_length": 1171.0, "completions/min_terminated_length": 1171.0, "epoch": 0.042579837194740136, "frac_reward_zero_std": 0.0, "grad_norm": 3.291671899271785, "kl": 0.0019550323486328125, "learning_rate": 4.1249999999999997e-07, "loss": 0.0187, "num_tokens": 1794062.0, "reward": 2.60770320892334e-08, "reward_std": 1.0634629726409912, "rewards/wordcountpos_reward_ecommerce/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05489684988302594, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2423673289052158, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11538983843829065, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 34 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1106.0, "completions/mean_length": 1253.1875, "completions/mean_terminated_length": 1061.2222900390625, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.043832185347526614, "frac_reward_zero_std": 0.0, "grad_norm": 2.3253132789681, "kl": 0.00135040283203125, "learning_rate": 4.2499999999999995e-07, "loss": 0.0016, "num_tokens": 1847393.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6563782691955566, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05072262342914357, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.195641332904443, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746356, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 35 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1399.3125, "completions/mean_terminated_length": 1298.625, "completions/min_length": 1031.0, "completions/min_terminated_length": 1031.0, "epoch": 0.045084533500313086, "frac_reward_zero_std": 0.0, "grad_norm": 2.7548692610807795, "kl": 0.0021877288818359375, "learning_rate": 4.375e-07, "loss": -0.0061, "num_tokens": 1892206.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9828654527664185, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06116004436340469, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10276980263780594, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 36 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1352.6875, "completions/mean_terminated_length": 1318.6923828125, "completions/min_length": 1218.0, "completions/min_terminated_length": 1218.0, "epoch": 0.046336881653099564, "frac_reward_zero_std": 0.0, "grad_norm": 2.47319096015074, "kl": 0.0014925003051757812, "learning_rate": 4.5e-07, "loss": -0.0109, "num_tokens": 1940945.0, "reward": 0.0, "reward_std": 0.955802857875824, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.041245323817924374, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19292307241869963, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 37 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1375.25, "completions/mean_terminated_length": 1250.5, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.047589229805886035, "frac_reward_zero_std": 0.0, "grad_norm": 3.1878681605362496, "kl": 0.002475738525390625, "learning_rate": 4.625e-07, "loss": -0.0118, "num_tokens": 1985181.0, "reward": -7.450580596923828e-09, "reward_std": 1.054539442062378, "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.019033803394582376, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10927050985901436, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06831300510639736, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 38 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1447.0, "completions/mean_terminated_length": 1288.0, "completions/min_length": 1065.0, "completions/min_terminated_length": 1065.0, "epoch": 0.048841577958672514, "frac_reward_zero_std": 0.0, "grad_norm": 3.188392715000756, "kl": 0.00237274169921875, "learning_rate": 4.7499999999999995e-07, "loss": 0.0406, "num_tokens": 2034525.0, "reward": -7.450580596923828e-09, "reward_std": 1.0613259077072144, "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.019229174460983274, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03385821534786073, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 39 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1135.0, "completions/mean_length": 1148.75, "completions/mean_terminated_length": 797.5, "completions/min_length": 735.0, "completions/min_terminated_length": 735.0, "epoch": 0.050093926111458985, "frac_reward_zero_std": 0.0, "grad_norm": 2.6930283084099047, "kl": 0.0015668869018554688, "learning_rate": 4.875e-07, "loss": -0.0288, "num_tokens": 2080937.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7898622751235962, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05072289975795826, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19078379794323846, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10036968702787749, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 40 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1416.9375, "completions/mean_terminated_length": 1333.875, "completions/min_length": 1244.0, "completions/min_terminated_length": 1244.0, "epoch": 0.05134627426424546, "frac_reward_zero_std": 0.0, "grad_norm": 2.177282619828498, "kl": 0.0012989044189453125, "learning_rate": 5e-07, "loss": 0.0091, "num_tokens": 2136744.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0509533882141113, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.031295483862865174, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11149225377383207, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07084150279686706, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 41 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1273.4375, "completions/mean_terminated_length": 1197.916748046875, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.052598622417031934, "frac_reward_zero_std": 0.0, "grad_norm": 3.2241520761115305, "kl": 0.002368927001953125, "learning_rate": 5.125e-07, "loss": 0.004, "num_tokens": 2171271.0, "reward": -3.725290298461914e-09, "reward_std": 1.0308477878570557, "rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12167064883765863, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12965137595029042, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5416666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15177956725803718, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 42 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1466.6875, "completions/mean_terminated_length": 1366.75, "completions/min_length": 1132.0, "completions/min_terminated_length": 1132.0, "epoch": 0.05385097056981841, "frac_reward_zero_std": 0.0, "grad_norm": 2.9471462447348635, "kl": 0.0021038055419921875, "learning_rate": 5.25e-07, "loss": -0.0164, "num_tokens": 2231986.0, "reward": 2.9802322387695312e-08, "reward_std": 0.23251324892044067, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04063102604876061, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.22066858001488113, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.205074512203627, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 43 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1422.3125, "completions/mean_terminated_length": 1322.4285888671875, "completions/min_length": 1074.0, "completions/min_terminated_length": 1074.0, "epoch": 0.055103318722604884, "frac_reward_zero_std": 0.0, "grad_norm": 2.4268519840677527, "kl": 0.0010042190551757812, "learning_rate": 5.374999999999999e-07, "loss": -0.0099, "num_tokens": 2288223.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9692014455795288, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.070492449256456, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20832413138507544, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06440611887195309, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 44 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1376.0, "completions/mean_terminated_length": 1301.5999755859375, "completions/min_length": 954.0, "completions/min_terminated_length": 954.0, "epoch": 0.056355666875391355, "frac_reward_zero_std": 0.0, "grad_norm": 3.03273915452992, "kl": 0.002422332763671875, "learning_rate": 5.5e-07, "loss": 0.0123, "num_tokens": 2354343.0, "reward": 1.862645149230957e-08, "reward_std": 1.067973256111145, "rewards/wordcountpos_reward_ecommerce/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1362560230689488, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16718884747044185, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.058214163988576643, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 45 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1453.0625, "completions/mean_terminated_length": 1392.71435546875, "completions/min_length": 1326.0, "completions/min_terminated_length": 1326.0, "epoch": 0.057608015028177834, "frac_reward_zero_std": 0.0, "grad_norm": 2.7636896151627517, "kl": 0.001781463623046875, "learning_rate": 5.625e-07, "loss": -0.0161, "num_tokens": 2410776.0, "reward": -2.9802322387695312e-08, "reward_std": 0.722027599811554, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.175370955230916, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15404241260320187, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14446581038560777, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 46 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1185.0, "completions/mean_length": 1384.8125, "completions/mean_terminated_length": 1039.25, "completions/min_length": 797.0, "completions/min_terminated_length": 797.0, "epoch": 0.058860363180964305, "frac_reward_zero_std": 0.0, "grad_norm": 3.218383433006556, "kl": 0.002429962158203125, "learning_rate": 5.749999999999999e-07, "loss": -0.0616, "num_tokens": 2460181.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8904982209205627, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.020496850203242982, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13226975013047063, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08062257748298553, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 47 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1472.0, "completions/mean_terminated_length": 1350.666748046875, "completions/min_length": 1240.0, "completions/min_terminated_length": 1240.0, "epoch": 0.06011271133375078, "frac_reward_zero_std": 0.0, "grad_norm": 3.5256786742208663, "kl": 0.002872467041015625, "learning_rate": 5.875e-07, "loss": -0.0096, "num_tokens": 2524269.0, "reward": -1.4901161193847656e-08, "reward_std": 1.001219630241394, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0854065639247727, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0950921206250912, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1430.9375, "completions/mean_terminated_length": 1315.8333740234375, "completions/min_length": 1131.0, "completions/min_terminated_length": 1131.0, "epoch": 0.061365059486537255, "frac_reward_zero_std": 0.0, "grad_norm": 2.961325619079936, "kl": 0.002254486083984375, "learning_rate": 6e-07, "loss": 0.0013, "num_tokens": 2584356.0, "reward": 0.0, "reward_std": 0.9873535633087158, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.029950137491573797, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16218750528728998, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08421753138505425, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 49 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1491.125, "completions/mean_terminated_length": 1429.0, "completions/min_length": 1392.0, "completions/min_terminated_length": 1392.0, "epoch": 0.06261740763932373, "frac_reward_zero_std": 0.0, "grad_norm": 2.6822700139828397, "kl": 0.0020389556884765625, "learning_rate": 6.125000000000001e-07, "loss": -0.0009, "num_tokens": 2648270.0, "reward": -5.960464477539063e-08, "reward_std": 0.7698144912719727, "rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.032020807081585716, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.053695035371207615, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11417984514369005, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 50 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1282.125, "completions/mean_terminated_length": 1112.6666259765625, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.06386975579211021, "frac_reward_zero_std": 0.0, "grad_norm": 3.07804473575342, "kl": 0.002071380615234375, "learning_rate": 6.249999999999999e-07, "loss": -0.0074, "num_tokens": 2693776.0, "reward": 0.0, "reward_std": 0.5634655952453613, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.013292184885055576, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12085541345993306, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333336, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 51 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1462.75, "completions/mean_terminated_length": 1301.3333740234375, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.06512210394489668, "frac_reward_zero_std": 0.0, "grad_norm": 3.194495117066712, "kl": 0.002658843994140625, "learning_rate": 6.374999999999999e-07, "loss": 0.0226, "num_tokens": 2758980.0, "reward": -1.4901161193847656e-08, "reward_std": 0.985281229019165, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.02226574155778713, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05167870819779757, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6208333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746353, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 52 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1321.3125, "completions/mean_terminated_length": 1295.7857666015625, "completions/min_length": 1123.0, "completions/min_terminated_length": 1123.0, "epoch": 0.06637445209768315, "frac_reward_zero_std": 0.0, "grad_norm": 3.085117182590426, "kl": 0.0022735595703125, "learning_rate": 6.5e-07, "loss": 0.0196, "num_tokens": 2825249.0, "reward": 4.470348358154297e-08, "reward_std": 0.9839984774589539, "rewards/wordcountpos_reward_ecommerce/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.08735912330077701, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14559155866011037, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06978803887752093, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 53 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1498.5625, "completions/mean_terminated_length": 1477.0, "completions/min_length": 1477.0, "completions/min_terminated_length": 1477.0, "epoch": 0.06762680025046963, "frac_reward_zero_std": 0.0, "grad_norm": 2.9348811206443304, "kl": 0.00191497802734375, "learning_rate": 6.624999999999999e-07, "loss": 0.0001, "num_tokens": 2889498.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0318164825439453, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04941181253574712, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06836218150195612, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05900408021045227, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 54 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1384.875, "completions/mean_terminated_length": 1236.857177734375, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.06887914840325611, "frac_reward_zero_std": 0.0, "grad_norm": 3.0200180369762695, "kl": 0.0021152496337890625, "learning_rate": 6.75e-07, "loss": -0.0343, "num_tokens": 2950200.0, "reward": 7.450580596923828e-09, "reward_std": 1.0550494194030762, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.038887574815180403, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06912072840442107, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 55 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1435.0625, "completions/mean_terminated_length": 1370.125, "completions/min_length": 1161.0, "completions/min_terminated_length": 1161.0, "epoch": 0.07013149655604257, "frac_reward_zero_std": 0.0, "grad_norm": 2.9170742885205607, "kl": 0.0019893646240234375, "learning_rate": 6.875e-07, "loss": 0.0029, "num_tokens": 3019673.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9821785688400269, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.016992912073662925, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.105336871629235, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 56 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1360.75, "completions/mean_terminated_length": 1221.5, "completions/min_length": 1081.0, "completions/min_terminated_length": 1081.0, "epoch": 0.07138384470882905, "frac_reward_zero_std": 0.0, "grad_norm": 2.3821452004991794, "kl": 0.0014314651489257812, "learning_rate": 7e-07, "loss": 0.029, "num_tokens": 3075477.0, "reward": 2.60770320892334e-08, "reward_std": 1.0472596883773804, "rewards/wordcountpos_reward_ecommerce/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010678083797130186, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11394385265661125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.045338235029118164, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 57 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1392.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 970.625, "completions/mean_terminated_length": 970.625, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.07263619286161553, "frac_reward_zero_std": 0.0, "grad_norm": 3.5943049280036243, "kl": 0.0017871856689453125, "learning_rate": 7.125e-07, "loss": -0.0489, "num_tokens": 3103423.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8579948544502258, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11955284309699343, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1294140259487627, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0909822937597079, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 58 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1484.75, "completions/mean_terminated_length": 1378.0, "completions/min_length": 1305.0, "completions/min_terminated_length": 1305.0, "epoch": 0.07388854101440201, "frac_reward_zero_std": 0.0, "grad_norm": 2.914915162479294, "kl": 0.0023479461669921875, "learning_rate": 7.249999999999999e-07, "loss": -0.0133, "num_tokens": 3170979.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0570372343063354, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.004701879619984315, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0950367185128266, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408157, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 59 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1359.9375, "completions/mean_terminated_length": 1179.857177734375, "completions/min_length": 406.0, "completions/min_terminated_length": 406.0, "epoch": 0.07514088916718847, "frac_reward_zero_std": 0.0, "grad_norm": 3.283089786479577, "kl": 0.002674102783203125, "learning_rate": 7.375e-07, "loss": -0.0525, "num_tokens": 3233802.0, "reward": -1.6763806343078613e-08, "reward_std": 1.050881028175354, "rewards/wordcountpos_reward_ecommerce/mean": -1.6763806343078613e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.02996931362982372, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07266154836265915, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 60 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.07639323731997495, "frac_reward_zero_std": 0.0, "grad_norm": 3.311519312396822, "kl": 0.00279998779296875, "learning_rate": 7.5e-07, "loss": 0.0001, "num_tokens": 3293354.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0101943016052246, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04793344228148064, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12274932480508612, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13655822255780922, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 61 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1410.1875, "completions/mean_terminated_length": 1294.71435546875, "completions/min_length": 1137.0, "completions/min_terminated_length": 1137.0, "epoch": 0.07764558547276143, "frac_reward_zero_std": 0.0, "grad_norm": 3.0789417256546874, "kl": 0.0022792816162109375, "learning_rate": 7.624999999999999e-07, "loss": -0.0109, "num_tokens": 3334909.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9900147914886475, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.004903461451645089, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03771048515625185, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15581327856693658, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 62 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 1133.0, "completions/mean_terminated_length": 1108.533447265625, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.07889793362554791, "frac_reward_zero_std": 0.0, "grad_norm": 3.259053190740638, "kl": 0.0018634796142578125, "learning_rate": 7.75e-07, "loss": -0.0194, "num_tokens": 3383333.0, "reward": 2.9802322387695312e-08, "reward_std": 0.671829104423523, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09142372399409204, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09598955648379433, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08509254221575907, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 63 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1463.0, "completions/mean_terminated_length": 1352.0, "completions/min_length": 1206.0, "completions/min_terminated_length": 1206.0, "epoch": 0.08015028177833437, "frac_reward_zero_std": 0.0, "grad_norm": 3.187716368550353, "kl": 0.002471923828125, "learning_rate": 7.875e-07, "loss": 0.0106, "num_tokens": 3442269.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0351850986480713, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12370484162737726, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1619343847332339, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10470416879457553, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1410.625, "completions/mean_terminated_length": 1261.666748046875, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.08140262993112085, "frac_reward_zero_std": 0.0, "grad_norm": 3.218117740066407, "kl": 0.002559661865234375, "learning_rate": 8e-07, "loss": -0.0443, "num_tokens": 3489911.0, "reward": 5.960464477539063e-08, "reward_std": 0.5395079851150513, "rewards/wordcountpos_reward_ecommerce/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.061171909778282046, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06618755934392026, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460886, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 65 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1316.6875, "completions/mean_terminated_length": 1206.7000732421875, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.08265497808390733, "frac_reward_zero_std": 0.0, "grad_norm": 3.223646819331395, "kl": 0.002506256103515625, "learning_rate": 8.125e-07, "loss": -0.0004, "num_tokens": 3531330.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9571313858032227, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.027972706586888517, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1908156027057365, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 66 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1478.5625, "completions/mean_terminated_length": 1328.5, "completions/min_length": 1327.0, "completions/min_terminated_length": 1327.0, "epoch": 0.08390732623669381, "frac_reward_zero_std": 0.0, "grad_norm": 3.411248138087788, "kl": 0.00255584716796875, "learning_rate": 8.249999999999999e-07, "loss": 0.0085, "num_tokens": 3591331.0, "reward": -5.960464477539063e-08, "reward_std": 0.6705090403556824, "rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.3499282464198203, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3060898603663511, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921946, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 67 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1495.125, "completions/mean_terminated_length": 1422.0, "completions/min_length": 1422.0, "completions/min_terminated_length": 1422.0, "epoch": 0.08515967438948027, "frac_reward_zero_std": 0.0, "grad_norm": 3.025267713589772, "kl": 0.002880096435546875, "learning_rate": 8.375e-07, "loss": -0.0014, "num_tokens": 3658421.0, "reward": 0.0, "reward_std": 0.9633276462554932, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07580010422442789, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.17700501480681413, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05692750425533113, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 68 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 1469.875, "completions/mean_terminated_length": 1259.0, "completions/min_length": 1209.0, "completions/min_terminated_length": 1209.0, "epoch": 0.08641202254226675, "frac_reward_zero_std": 0.0, "grad_norm": 3.0466734222423546, "kl": 0.002544403076171875, "learning_rate": 8.499999999999999e-07, "loss": 0.0044, "num_tokens": 3724899.0, "reward": 0.0, "reward_std": 1.0227458477020264, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.002677645774302454, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11990711113827299, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10470416879457552, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 69 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1410.8125, "completions/mean_terminated_length": 1321.625, "completions/min_length": 1070.0, "completions/min_terminated_length": 1070.0, "epoch": 0.08766437069505323, "frac_reward_zero_std": 0.0, "grad_norm": 3.318934314260283, "kl": 0.002834320068359375, "learning_rate": 8.625e-07, "loss": 0.0072, "num_tokens": 3777184.0, "reward": 7.450580596923828e-09, "reward_std": 1.0494259595870972, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.024827264373621732, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.036366284403351476, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0859586463881842, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 70 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1399.25, "completions/mean_terminated_length": 1231.3333740234375, "completions/min_length": 1009.0, "completions/min_terminated_length": 1009.0, "epoch": 0.08891671884783969, "frac_reward_zero_std": 0.0, "grad_norm": 2.8509264779724033, "kl": 0.002223968505859375, "learning_rate": 8.75e-07, "loss": 0.0037, "num_tokens": 3836428.0, "reward": 0.0, "reward_std": 1.0668516159057617, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.053166199498163626, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12647299276011556, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 71 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1199.625, "completions/mean_terminated_length": 1130.3077392578125, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.09016906700062617, "frac_reward_zero_std": 0.0, "grad_norm": 2.886201838693034, "kl": 0.001605987548828125, "learning_rate": 8.874999999999999e-07, "loss": -0.0027, "num_tokens": 3881094.0, "reward": 0.0, "reward_std": 0.8761758804321289, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.02026371657719268, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04408943383486411, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14548768561863465, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 72 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.09142141515341265, "frac_reward_zero_std": 0.0, "grad_norm": 2.60284832797847, "kl": 0.00217437744140625, "learning_rate": 9e-07, "loss": 0.0001, "num_tokens": 3940518.0, "reward": 0.0, "reward_std": 0.5877071619033813, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.027393406712592036, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0844493241747004, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 73 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1269.3125, "completions/mean_terminated_length": 1192.416748046875, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.09267376330619913, "frac_reward_zero_std": 0.0, "grad_norm": 3.2612995556206354, "kl": 0.002330780029296875, "learning_rate": 9.124999999999999e-07, "loss": -0.0066, "num_tokens": 3982827.0, "reward": -1.4901161193847656e-08, "reward_std": 0.924209713935852, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.021465279786927867, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03289535545475229, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11979921473804345, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 74 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1346.4375, "completions/mean_terminated_length": 1227.0, "completions/min_length": 1081.0, "completions/min_terminated_length": 1081.0, "epoch": 0.09392611145898559, "frac_reward_zero_std": 0.0, "grad_norm": 2.6195723075826596, "kl": 0.00183868408203125, "learning_rate": 9.25e-07, "loss": -0.0361, "num_tokens": 4041194.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0540246963500977, "rewards/wordcountpos_reward_ecommerce/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03654417489517675, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.055054088822312976, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07588978362901858, "rewards/wordcountpos_reward_ecommerce/std": 1.0327954292297363, "step": 75 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1492.0625, "completions/mean_terminated_length": 1436.5, "completions/min_length": 1381.0, "completions/min_terminated_length": 1381.0, "epoch": 0.09517845961177207, "frac_reward_zero_std": 0.0, "grad_norm": 3.1883337396909632, "kl": 0.0028228759765625, "learning_rate": 9.374999999999999e-07, "loss": -0.0004, "num_tokens": 4102531.0, "reward": 0.0, "reward_std": 0.7272332906723022, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06657008296291109, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08174957503379145, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11213417888437976, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 76 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1347.875, "completions/mean_terminated_length": 1152.2857666015625, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.09643080776455855, "frac_reward_zero_std": 0.0, "grad_norm": 3.323504050782515, "kl": 0.002685546875, "learning_rate": 9.499999999999999e-07, "loss": -0.012, "num_tokens": 4154537.0, "reward": 0.0, "reward_std": 0.9932632446289062, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12575056940966298, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15133213208857665, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14782371884055634, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 77 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1434.4375, "completions/mean_terminated_length": 1290.2000732421875, "completions/min_length": 1178.0, "completions/min_terminated_length": 1178.0, "epoch": 0.09768315591734503, "frac_reward_zero_std": 0.0, "grad_norm": 3.4382048596496553, "kl": 0.002773284912109375, "learning_rate": 9.624999999999999e-07, "loss": -0.0322, "num_tokens": 4221464.0, "reward": -2.60770320892334e-08, "reward_std": 1.0265973806381226, "rewards/wordcountpos_reward_ecommerce/mean": -2.60770320892334e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08170559900334663, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10185399685140464, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.161245154965971, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 78 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1248.3125, "completions/mean_terminated_length": 1097.300048828125, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.09893550407013149, "frac_reward_zero_std": 0.0, "grad_norm": 2.648370216175046, "kl": 0.0019168853759765625, "learning_rate": 9.75e-07, "loss": -0.027, "num_tokens": 4267669.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9588196873664856, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07500714246624458, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06993198507995109, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05288001793018134, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 79 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1436.75, "completions/mean_terminated_length": 1247.0, "completions/min_length": 1132.0, "completions/min_terminated_length": 1132.0, "epoch": 0.10018785222291797, "frac_reward_zero_std": 0.0, "grad_norm": 2.879418147641467, "kl": 0.0019855499267578125, "learning_rate": 9.875e-07, "loss": -0.0127, "num_tokens": 4328465.0, "reward": 0.0, "reward_std": 0.9200654029846191, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1453335125370645, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1827536027247548, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09496588081262934, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1422.1875, "completions/mean_terminated_length": 1344.375, "completions/min_length": 1237.0, "completions/min_terminated_length": 1237.0, "epoch": 0.10144020037570445, "frac_reward_zero_std": 0.0, "grad_norm": 2.635617733673008, "kl": 0.0017528533935546875, "learning_rate": 1e-06, "loss": -0.0046, "num_tokens": 4373324.0, "reward": -3.725290298461914e-09, "reward_std": 1.0682477951049805, "rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.053201182409366166, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.044798463974146746, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07876359377087683, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 81 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1170.5, "completions/mean_terminated_length": 1020.727294921875, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.10269254852849093, "frac_reward_zero_std": 0.0, "grad_norm": 2.8578219249339107, "kl": 0.0019054412841796875, "learning_rate": 9.999957044004145e-07, "loss": -0.0353, "num_tokens": 4419844.0, "reward": 0.0, "reward_std": 0.4868781566619873, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1691690312178033, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1856850439917278, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 82 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1202.4375, "completions/mean_terminated_length": 1182.60009765625, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.10394489668127739, "frac_reward_zero_std": 0.0, "grad_norm": 3.206094248867889, "kl": 0.0022640228271484375, "learning_rate": 9.999828176836682e-07, "loss": -0.0042, "num_tokens": 4464763.0, "reward": 7.450580596923828e-09, "reward_std": 0.9854896068572998, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.11969234946420118, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3068885267137289, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078614, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 83 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1347.25, "completions/mean_terminated_length": 1228.4444580078125, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.10519724483406387, "frac_reward_zero_std": 0.0, "grad_norm": 3.3502401935196273, "kl": 0.0025177001953125, "learning_rate": 9.99961340095788e-07, "loss": -0.0232, "num_tokens": 4520295.0, "reward": -7.450580596923828e-09, "reward_std": 1.0421638488769531, "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04940475583906399, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10190244243958202, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12102953419784838, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 84 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1309.6875, "completions/mean_terminated_length": 1265.769287109375, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.10644959298685035, "frac_reward_zero_std": 0.0, "grad_norm": 2.728411933718049, "kl": 0.001689910888671875, "learning_rate": 9.99931272046815e-07, "loss": -0.0142, "num_tokens": 4576338.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8622345924377441, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.016984465370970727, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.040579939841277814, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 85 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1341.5625, "completions/mean_terminated_length": 1269.5455322265625, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.10770194113963683, "frac_reward_zero_std": 0.0, "grad_norm": 3.200552135647029, "kl": 0.002315521240234375, "learning_rate": 9.998926141107945e-07, "loss": 0.0351, "num_tokens": 4618667.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8471476435661316, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.22087111411084098, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24091025740898386, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08153617692869927, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 86 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1394.6875, "completions/mean_terminated_length": 1259.2857666015625, "completions/min_length": 1069.0, "completions/min_terminated_length": 1069.0, "epoch": 0.10895428929242329, "frac_reward_zero_std": 0.0, "grad_norm": 3.177667386837352, "kl": 0.002468109130859375, "learning_rate": 9.998453670257666e-07, "loss": 0.0024, "num_tokens": 4675550.0, "reward": 0.0, "reward_std": 0.3878336548805237, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06448512648276508, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0842294519714606, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 87 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1469.6875, "completions/mean_terminated_length": 1338.3333740234375, "completions/min_length": 1296.0, "completions/min_terminated_length": 1296.0, "epoch": 0.11020663744520977, "frac_reward_zero_std": 0.0, "grad_norm": 3.074898456526424, "kl": 0.00238037109375, "learning_rate": 9.997895316937517e-07, "loss": 0.0066, "num_tokens": 4734649.0, "reward": -4.470348358154297e-08, "reward_std": 0.9637711048126221, "rewards/wordcountpos_reward_ecommerce/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09676546074924117, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06959776462437538, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 88 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1303.1875, "completions/mean_terminated_length": 1237.5833740234375, "completions/min_length": 1039.0, "completions/min_terminated_length": 1039.0, "epoch": 0.11145898559799625, "frac_reward_zero_std": 0.0, "grad_norm": 2.3833185192510284, "kl": 0.001354217529296875, "learning_rate": 9.997251091807332e-07, "loss": 0.0171, "num_tokens": 4789676.0, "reward": 0.0, "reward_std": 1.016492486000061, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12777237426683458, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21498123308224262, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 89 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1453.0625, "completions/mean_terminated_length": 1249.666748046875, "completions/min_length": 1149.0, "completions/min_terminated_length": 1149.0, "epoch": 0.11271133375078271, "frac_reward_zero_std": 0.0, "grad_norm": 3.1689450227648854, "kl": 0.002933502197265625, "learning_rate": 9.99652100716637e-07, "loss": -0.0062, "num_tokens": 4847781.0, "reward": 0.0, "reward_std": 0.64935302734375, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.16229754855451553, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20151739444607794, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6291666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.18373692949230228, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 90 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1450.0, "completions/mean_terminated_length": 1300.0, "completions/min_length": 1049.0, "completions/min_terminated_length": 1049.0, "epoch": 0.11396368190356919, "frac_reward_zero_std": 0.0, "grad_norm": 3.0146226006623476, "kl": 0.002593994140625, "learning_rate": 9.995705076953075e-07, "loss": -0.0291, "num_tokens": 4905421.0, "reward": 0.0, "reward_std": 1.0383461713790894, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06052119205813296, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12160618129006116, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 91 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 910.0, "completions/mean_length": 1144.0625, "completions/mean_terminated_length": 788.125, "completions/min_length": 610.0, "completions/min_terminated_length": 610.0, "epoch": 0.11521603005635567, "frac_reward_zero_std": 0.0, "grad_norm": 2.228817256723133, "kl": 0.0014410018920898438, "learning_rate": 9.994803316744828e-07, "loss": 0.0105, "num_tokens": 4950462.0, "reward": -4.470348358154297e-08, "reward_std": 0.9390549659729004, "rewards/wordcountpos_reward_ecommerce/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07564319510568883, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1514996148617109, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15770342536029575, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 92 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1471.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1093.8125, "completions/mean_terminated_length": 1093.8125, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.11646837820914215, "frac_reward_zero_std": 0.0, "grad_norm": 3.6363277397384617, "kl": 0.002498626708984375, "learning_rate": 9.993815743757633e-07, "loss": -0.0484, "num_tokens": 4983835.0, "reward": 0.0, "reward_std": 0.8996579647064209, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0037569304970198007, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07736656048737343, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.2014760347847669, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 93 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1417.75, "completions/mean_terminated_length": 1335.5, "completions/min_length": 1111.0, "completions/min_terminated_length": 1111.0, "epoch": 0.11772072636192861, "frac_reward_zero_std": 0.0, "grad_norm": 3.20990587817039, "kl": 0.002735137939453125, "learning_rate": 9.99274237684579e-07, "loss": 0.004, "num_tokens": 5030407.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6368776559829712, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.029770016601004534, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0349532410691535, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10461569884316813, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 94 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 926.8125, "completions/mean_terminated_length": 926.8125, "completions/min_length": 631.0, "completions/min_terminated_length": 631.0, "epoch": 0.11897307451471509, "frac_reward_zero_std": 0.0, "grad_norm": 2.7699870834508333, "kl": 0.0008082389831542969, "learning_rate": 9.99158323650154e-07, "loss": -0.0527, "num_tokens": 5074556.0, "reward": -3.725290298461914e-09, "reward_std": 1.0668668746948242, "rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01722883909028131, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19517428674960768, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0843274042711568, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 95 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1283.5625, "completions/mean_terminated_length": 1115.2222900390625, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.12022542266750157, "frac_reward_zero_std": 0.0, "grad_norm": 3.2021886227663034, "kl": 0.002685546875, "learning_rate": 9.990338344854676e-07, "loss": -0.0074, "num_tokens": 5120597.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9720104336738586, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.024841432663237503, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.17561297504079998, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1415.3125, "completions/mean_terminated_length": 1364.5, "completions/min_length": 1206.0, "completions/min_terminated_length": 1206.0, "epoch": 0.12147777082028804, "frac_reward_zero_std": 0.0, "grad_norm": 3.2246665277704185, "kl": 0.002559661865234375, "learning_rate": 9.989007725672113e-07, "loss": 0.0063, "num_tokens": 5158170.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7684129476547241, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.020625, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0825, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1586400537905439, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 97 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 1175.3125, "completions/mean_terminated_length": 922.7777709960938, "completions/min_length": 596.0, "completions/min_terminated_length": 596.0, "epoch": 0.12273011897307451, "frac_reward_zero_std": 0.0, "grad_norm": 3.169149502657088, "kl": 0.00231170654296875, "learning_rate": 9.987591404357437e-07, "loss": -0.0811, "num_tokens": 5215647.0, "reward": 0.0, "reward_std": 0.9120274782180786, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.005036444545787546, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10234315753446507, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1387777332977422, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 98 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1264.5, "completions/mean_terminated_length": 1123.2000732421875, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.12398246712586099, "frac_reward_zero_std": 0.0, "grad_norm": 2.9409598040312765, "kl": 0.002063751220703125, "learning_rate": 9.986089407950426e-07, "loss": -0.0453, "num_tokens": 5250879.0, "reward": 7.450580596923828e-09, "reward_std": 1.0199556350708008, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11830339701018143, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.25916185560707883, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408157, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 99 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1406.9375, "completions/mean_terminated_length": 1251.8333740234375, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.12523481527864747, "frac_reward_zero_std": 0.0, "grad_norm": 3.236489502184281, "kl": 0.0029754638671875, "learning_rate": 9.98450176512652e-07, "loss": 0.0261, "num_tokens": 5303030.0, "reward": 0.0, "reward_std": 0.8868198990821838, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.14501472660672157, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15004116932595393, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1172998689652263, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1374.0625, "completions/mean_terminated_length": 1332.0833740234375, "completions/min_length": 1208.0, "completions/min_terminated_length": 1208.0, "epoch": 0.12648716343143393, "frac_reward_zero_std": 0.0, "grad_norm": 2.7393115321098898, "kl": 0.0021686553955078125, "learning_rate": 9.982828506196295e-07, "loss": 0.0475, "num_tokens": 5348991.0, "reward": -2.9802322387695312e-08, "reward_std": 0.744665265083313, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.16115596269847898, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19475646493041288, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07649739768026005, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1392.25, "completions/mean_terminated_length": 1284.5, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.12773951158422042, "frac_reward_zero_std": 0.0, "grad_norm": 2.465229932517055, "kl": 0.00170135498046875, "learning_rate": 9.981069663104853e-07, "loss": -0.0292, "num_tokens": 5393291.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9994131326675415, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.010671914654693294, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.027094219261353553, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194866, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1388.5625, "completions/mean_terminated_length": 1277.125, "completions/min_length": 1062.0, "completions/min_terminated_length": 1062.0, "epoch": 0.1289918597370069, "frac_reward_zero_std": 0.0, "grad_norm": 2.97886098445916, "kl": 0.00238800048828125, "learning_rate": 9.979225269431252e-07, "loss": 0.0455, "num_tokens": 5437588.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0143799781799316, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0018910121903646018, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21804038685357507, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.55, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12292725943057183, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1159.8125, "completions/mean_terminated_length": 955.7000122070312, "completions/min_length": 402.0, "completions/min_terminated_length": 402.0, "epoch": 0.13024420788979335, "frac_reward_zero_std": 0.0, "grad_norm": 3.2070297326725687, "kl": 0.0024261474609375, "learning_rate": 9.977295360387827e-07, "loss": -0.0325, "num_tokens": 5469273.0, "reward": 0.0, "reward_std": 0.848124623298645, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0002889221914715882, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03991849505429317, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.4875, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1495053572680653, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1405.4375, "completions/mean_terminated_length": 1247.8333740234375, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.13149655604257984, "frac_reward_zero_std": 0.0, "grad_norm": 3.1693981909983457, "kl": 0.00269317626953125, "learning_rate": 9.97527997281954e-07, "loss": -0.0085, "num_tokens": 5527744.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0289491415023804, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07601873282977642, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2329329780235847, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0787635937708768, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1258.5625, "completions/mean_terminated_length": 1070.77783203125, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.1327489041953663, "frac_reward_zero_std": 0.0, "grad_norm": 2.4731519538264903, "kl": 0.0015192031860351562, "learning_rate": 9.973179145203272e-07, "loss": -0.0122, "num_tokens": 5571305.0, "reward": 0.0, "reward_std": 1.046633243560791, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.027299266065874364, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09683294681842305, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward_ecommerce/std": 1.0327954292297363, "step": 106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1361.875, "completions/mean_terminated_length": 1223.75, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.1340012523481528, "frac_reward_zero_std": 0.0, "grad_norm": 2.781242946832243, "kl": 0.0024871826171875, "learning_rate": 9.970992917647088e-07, "loss": -0.0163, "num_tokens": 5617855.0, "reward": 0.0, "reward_std": 0.9318596124649048, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.19798356691808755, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.29651415192877617, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11021863793455328, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1146.0625, "completions/mean_terminated_length": 1122.4666748046875, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.13525360050093926, "frac_reward_zero_std": 0.0, "grad_norm": 2.9997009952780855, "kl": 0.0022125244140625, "learning_rate": 9.968721331889465e-07, "loss": 0.0235, "num_tokens": 5654992.0, "reward": 7.450580596923828e-09, "reward_std": 1.0186116695404053, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0558045951815816, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.029030233660680062, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242312, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1410.4375, "completions/mean_terminated_length": 1261.166748046875, "completions/min_length": 1123.0, "completions/min_terminated_length": 1123.0, "epoch": 0.13650594865372573, "frac_reward_zero_std": 0.0, "grad_norm": 2.9682648437410637, "kl": 0.002681732177734375, "learning_rate": 9.966364431298509e-07, "loss": -0.022, "num_tokens": 5711927.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0176870822906494, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.26425948065238597, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.28899722395436095, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09428090415820636, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1212.4375, "completions/mean_terminated_length": 1081.727294921875, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.13775829680651222, "frac_reward_zero_std": 0.0, "grad_norm": 3.323352542220494, "kl": 0.002532958984375, "learning_rate": 9.963922260871115e-07, "loss": -0.0134, "num_tokens": 5754094.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9666612148284912, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.051175618061779164, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.039320213077717464, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5833333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14504150108516195, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1485.25, "completions/mean_terminated_length": 1421.3333740234375, "completions/min_length": 1380.0, "completions/min_terminated_length": 1380.0, "epoch": 0.13901064495929868, "frac_reward_zero_std": 0.0, "grad_norm": 2.8360270019245446, "kl": 0.0024871826171875, "learning_rate": 9.9613948672321e-07, "loss": -0.0014, "num_tokens": 5814162.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0610442161560059, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.009639880768854782, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045421738289270215, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12292725943057184, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1323.0, "completions/mean_length": 1434.75, "completions/mean_terminated_length": 1239.0, "completions/min_length": 1100.0, "completions/min_terminated_length": 1100.0, "epoch": 0.14026299311208515, "frac_reward_zero_std": 0.0, "grad_norm": 2.999336260576351, "kl": 0.0024852752685546875, "learning_rate": 9.958782298633351e-07, "loss": -0.0196, "num_tokens": 5879078.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7917496562004089, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03413289340922598, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05688585018947227, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1290.0, "completions/mean_length": 1377.4375, "completions/mean_terminated_length": 1173.166748046875, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.14151534126487164, "frac_reward_zero_std": 0.0, "grad_norm": 2.661753098948472, "kl": 0.0021266937255859375, "learning_rate": 9.95608460495285e-07, "loss": -0.0087, "num_tokens": 5933045.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0039006471633911, "rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05284198848548562, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05437266883758088, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 1322.25, "completions/mean_terminated_length": 1093.71435546875, "completions/min_length": 991.0, "completions/min_terminated_length": 991.0, "epoch": 0.1427676894176581, "frac_reward_zero_std": 0.0, "grad_norm": 2.8780144010263284, "kl": 0.0020542144775390625, "learning_rate": 9.953301837693767e-07, "loss": 0.003, "num_tokens": 5979113.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8175742626190186, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.005502994719066203, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06974582191643876, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1401.1875, "completions/mean_terminated_length": 1324.3333740234375, "completions/min_length": 1118.0, "completions/min_terminated_length": 1118.0, "epoch": 0.14402003757044457, "frac_reward_zero_std": 0.0, "grad_norm": 3.139329899307579, "kl": 0.002716064453125, "learning_rate": 9.95043404998345e-07, "loss": 0.0292, "num_tokens": 6040452.0, "reward": 0.0, "reward_std": 1.0616416931152344, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03748903917915849, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14395002297286164, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1641476300299351, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1185.375, "completions/mean_terminated_length": 1140.4285888671875, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.14527238572323106, "frac_reward_zero_std": 0.0, "grad_norm": 3.3086990939983667, "kl": 0.0029144287109375, "learning_rate": 9.947481296572423e-07, "loss": -0.014, "num_tokens": 6090810.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0066075325012207, "rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05014218857813404, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09276403913432626, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12171612389003693, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1351.9375, "completions/mean_terminated_length": 1263.0999755859375, "completions/min_length": 1043.0, "completions/min_terminated_length": 1043.0, "epoch": 0.14652473387601753, "frac_reward_zero_std": 0.0, "grad_norm": 3.035226656450535, "kl": 0.002376556396484375, "learning_rate": 9.944443633833335e-07, "loss": 0.0179, "num_tokens": 6148881.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7348309755325317, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0762897874284947, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12841725021840134, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1474.6875, "completions/mean_terminated_length": 1297.5, "completions/min_length": 1203.0, "completions/min_terminated_length": 1203.0, "epoch": 0.14777708202880402, "frac_reward_zero_std": 0.0, "grad_norm": 2.9234616863737957, "kl": 0.0024566650390625, "learning_rate": 9.94132111975989e-07, "loss": 0.0031, "num_tokens": 6213916.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5194555521011353, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.018562499999999996, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.024749999999999994, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11213417888437974, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 1220.0625, "completions/mean_terminated_length": 940.125, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.14902943018159048, "frac_reward_zero_std": 0.0, "grad_norm": 2.7331417657136603, "kl": 0.002094268798828125, "learning_rate": 9.93811381396573e-07, "loss": -0.0031, "num_tokens": 6257485.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7746272087097168, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02009986693954008, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07362867807980181, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333336, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1400.375, "completions/mean_terminated_length": 1340.5999755859375, "completions/min_length": 1181.0, "completions/min_terminated_length": 1181.0, "epoch": 0.15028177833437695, "frac_reward_zero_std": 0.0, "grad_norm": 3.834637825788323, "kl": 0.003093719482421875, "learning_rate": 9.934821777683306e-07, "loss": 0.0269, "num_tokens": 6319963.0, "reward": 0.0, "reward_std": 1.0544224977493286, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.18103321643586406, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14394672405121658, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1325.5, "completions/mean_terminated_length": 1189.77783203125, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.15153412648716344, "frac_reward_zero_std": 0.0, "grad_norm": 3.2642836490036453, "kl": 0.0030364990234375, "learning_rate": 9.93144507376271e-07, "loss": -0.005, "num_tokens": 6385427.0, "reward": 0.0, "reward_std": 0.8268899917602539, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1112911236291226, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1569615458099141, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6124999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09803627446568493, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1305.875, "completions/mean_terminated_length": 1217.6363525390625, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.1527864746399499, "frac_reward_zero_std": 0.0, "grad_norm": 2.6641398914857923, "kl": 0.002033233642578125, "learning_rate": 9.927983766670462e-07, "loss": -0.0098, "num_tokens": 6440177.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0115642547607422, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06872988161057395, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1025211626906069, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194866, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1239.6875, "completions/mean_terminated_length": 1083.5, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.15403882279273637, "frac_reward_zero_std": 0.0, "grad_norm": 2.9543701078797575, "kl": 0.0018558502197265625, "learning_rate": 9.924437922488291e-07, "loss": 0.0245, "num_tokens": 6498212.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6738491654396057, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.038590091343060344, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09510012784467493, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6041666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12524050936172842, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1403.625, "completions/mean_terminated_length": 1345.800048828125, "completions/min_length": 1121.0, "completions/min_terminated_length": 1121.0, "epoch": 0.15529117094552286, "frac_reward_zero_std": 0.0, "grad_norm": 3.5178341976839556, "kl": 0.0033111572265625, "learning_rate": 9.920807608911876e-07, "loss": 0.0022, "num_tokens": 6553902.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8796525597572327, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.013190710670885862, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1480868926971966, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10183501544346313, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 825.0, "completions/mean_length": 1112.5625, "completions/mean_terminated_length": 725.125, "completions/min_length": 613.0, "completions/min_terminated_length": 613.0, "epoch": 0.15654351909830932, "frac_reward_zero_std": 0.0, "grad_norm": 2.378105432163651, "kl": 0.0008687973022460938, "learning_rate": 9.917092895249543e-07, "loss": -0.0272, "num_tokens": 6589311.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9441956877708435, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0014329624416098018, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.112902138916422, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12758439472669758, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1137.0625, "completions/mean_terminated_length": 1053.3077392578125, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.15779586725109582, "frac_reward_zero_std": 0.0, "grad_norm": 3.2527212324187826, "kl": 0.002117156982421875, "learning_rate": 9.913293852420946e-07, "loss": -0.0249, "num_tokens": 6618304.0, "reward": 2.2351741790771484e-08, "reward_std": 1.035041093826294, "rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.007633954846541112, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.032194935573291575, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.103905227473387, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1462.125, "completions/mean_terminated_length": 1348.5, "completions/min_length": 1255.0, "completions/min_terminated_length": 1255.0, "epoch": 0.15904821540388228, "frac_reward_zero_std": 0.0, "grad_norm": 3.091983329824593, "kl": 0.00301361083984375, "learning_rate": 9.909410552955712e-07, "loss": 0.0155, "num_tokens": 6681314.0, "reward": 2.9802322387695312e-08, "reward_std": 0.784981369972229, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12737730164130195, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21747166290242714, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6208333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.093392838174146, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1378.875, "completions/mean_terminated_length": 1223.1429443359375, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.16030056355666875, "frac_reward_zero_std": 0.0, "grad_norm": 3.2503169455658982, "kl": 0.002620697021484375, "learning_rate": 9.905443070992068e-07, "loss": -0.0039, "num_tokens": 6723448.0, "reward": -3.3527612686157227e-08, "reward_std": 1.06490159034729, "rewards/wordcountpos_reward_ecommerce/mean": -3.3527612686157227e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07877405649297206, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0705921273253386, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1418.0, "completions/mean_terminated_length": 1281.3333740234375, "completions/min_length": 1167.0, "completions/min_terminated_length": 1167.0, "epoch": 0.16155291170945524, "frac_reward_zero_std": 0.0, "grad_norm": 3.419069025864104, "kl": 0.003154754638671875, "learning_rate": 9.901391482275403e-07, "loss": -0.0084, "num_tokens": 6774208.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9308052062988281, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06996807244867725, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1266299752409378, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0909822937597079, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1098.0, "completions/mean_length": 1249.3125, "completions/mean_terminated_length": 998.625, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.1628052598622417, "frac_reward_zero_std": 0.0, "grad_norm": 2.593196585544164, "kl": 0.001987457275390625, "learning_rate": 9.897255864156847e-07, "loss": 0.0036, "num_tokens": 6807421.0, "reward": 0.0, "reward_std": 0.4564354419708252, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0429616858320893, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07600285040401121, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08243965245133134, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1382.375, "completions/mean_terminated_length": 1231.1429443359375, "completions/min_length": 1075.0, "completions/min_terminated_length": 1075.0, "epoch": 0.16405760801502817, "frac_reward_zero_std": 0.0, "grad_norm": 3.0914996686420104, "kl": 0.002330780029296875, "learning_rate": 9.893036295591768e-07, "loss": -0.0116, "num_tokens": 6866379.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9815191626548767, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04217953361323695, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06871670933278229, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1347.125, "completions/mean_terminated_length": 1150.571533203125, "completions/min_length": 371.0, "completions/min_terminated_length": 371.0, "epoch": 0.16530995616781466, "frac_reward_zero_std": 0.0, "grad_norm": 3.001502145266533, "kl": 0.0022430419921875, "learning_rate": 9.888732857138291e-07, "loss": -0.04, "num_tokens": 6912533.0, "reward": 0.0, "reward_std": 0.8428164720535278, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.020130872057838745, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04873657297962695, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11417984514369003, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 882.0, "completions/mean_length": 1177.75, "completions/mean_terminated_length": 855.5, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.16656230432060112, "frac_reward_zero_std": 0.0, "grad_norm": 2.849369584207102, "kl": 0.00208282470703125, "learning_rate": 9.884345630955742e-07, "loss": -0.0097, "num_tokens": 6966273.0, "reward": 7.450580596923828e-09, "reward_std": 1.0472090244293213, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08003635148497827, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09874522821696813, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1488.0, "completions/mean_terminated_length": 1404.0, "completions/min_length": 1317.0, "completions/min_terminated_length": 1317.0, "epoch": 0.16781465247338762, "frac_reward_zero_std": 0.0, "grad_norm": 2.688722282572705, "kl": 0.0026092529296875, "learning_rate": 9.879874700803082e-07, "loss": 0.0158, "num_tokens": 7027657.0, "reward": 3.166496753692627e-08, "reward_std": 1.0543937683105469, "rewards/wordcountpos_reward_ecommerce/mean": 3.166496753692627e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11540214745824308, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.23102363071615145, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042256, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1146.375, "completions/mean_terminated_length": 1064.769287109375, "completions/min_length": 858.0, "completions/min_terminated_length": 858.0, "epoch": 0.16906700062617408, "frac_reward_zero_std": 0.0, "grad_norm": 3.664759257998968, "kl": 0.0027923583984375, "learning_rate": 9.875320152037318e-07, "loss": -0.0535, "num_tokens": 7084095.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8985534906387329, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.23834962043700852, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.27030996076033054, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 1274.875, "completions/mean_terminated_length": 1049.75, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.17031934877896054, "frac_reward_zero_std": 0.0, "grad_norm": 2.4849191974864953, "kl": 0.0015964508056640625, "learning_rate": 9.870682071611862e-07, "loss": 0.0064, "num_tokens": 7133293.0, "reward": 0.0, "reward_std": 0.6105766892433167, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02324001170505371, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08829030406958045, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11013459778666118, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 955.6875, "completions/mean_terminated_length": 955.6875, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.17157169693174704, "frac_reward_zero_std": 0.0, "grad_norm": 2.620633996964256, "kl": 0.0014123916625976562, "learning_rate": 9.865960548074874e-07, "loss": 0.0103, "num_tokens": 7187688.0, "reward": 5.960464477539063e-08, "reward_std": 0.6596803069114685, "rewards/wordcountpos_reward_ecommerce/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0038102094327885448, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12227248665731598, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1322.0, "completions/mean_terminated_length": 1093.1429443359375, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.1728240450845335, "frac_reward_zero_std": 0.0, "grad_norm": 2.8032617883799893, "kl": 0.0021915435791015625, "learning_rate": 9.861155671567572e-07, "loss": 0.0513, "num_tokens": 7236832.0, "reward": 1.4901161193847656e-08, "reward_std": 0.938301682472229, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06466602322499601, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05158824252677371, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5666666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09428090415820632, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1335.8125, "completions/mean_terminated_length": 1237.300048828125, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.17407639323731997, "frac_reward_zero_std": 0.0, "grad_norm": 3.6291703245010103, "kl": 0.00284576416015625, "learning_rate": 9.856267533822519e-07, "loss": -0.021, "num_tokens": 7293301.0, "reward": 0.0, "reward_std": 0.7662729024887085, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.08970693607829759, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16534434492549577, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1970147578604578, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1120.8125, "completions/mean_terminated_length": 994.4166870117188, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.17532874139010646, "frac_reward_zero_std": 0.0, "grad_norm": 2.6820256510644604, "kl": 0.0022602081298828125, "learning_rate": 9.851296228161857e-07, "loss": 0.019, "num_tokens": 7341130.0, "reward": 0.0, "reward_std": 0.8969849348068237, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.026873742767844065, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0656536955300479, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1060223596263578, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1399.75, "completions/mean_terminated_length": 1232.666748046875, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.17658108954289292, "frac_reward_zero_std": 0.0, "grad_norm": 3.372829326628183, "kl": 0.003108978271484375, "learning_rate": 9.846241849495535e-07, "loss": 0.0153, "num_tokens": 7410982.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9778778553009033, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05033218082218886, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.025136378125956142, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13333333333333333, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1449.5625, "completions/mean_terminated_length": 1399.125, "completions/min_length": 1240.0, "completions/min_terminated_length": 1240.0, "epoch": 0.17783343769567939, "frac_reward_zero_std": 0.0, "grad_norm": 2.396651196247218, "kl": 0.001613616943359375, "learning_rate": 9.841104494319492e-07, "loss": -0.0053, "num_tokens": 7468879.0, "reward": 0.0, "reward_std": 0.6796972155570984, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.021608644866332537, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11519923314511032, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0739118594202782, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1434.4375, "completions/mean_terminated_length": 1350.1429443359375, "completions/min_length": 1215.0, "completions/min_terminated_length": 1215.0, "epoch": 0.17908578584846588, "frac_reward_zero_std": 0.0, "grad_norm": 2.6795502864911453, "kl": 0.00296783447265625, "learning_rate": 9.835884260713826e-07, "loss": 0.0053, "num_tokens": 7526334.0, "reward": 7.450580596923828e-09, "reward_std": 0.9401005506515503, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05878136743445916, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15259208491300538, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09583937179043481, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1271.0, "completions/mean_length": 1081.1875, "completions/mean_terminated_length": 1021.357177734375, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.18033813400125234, "frac_reward_zero_std": 0.0, "grad_norm": 2.4443025787306487, "kl": 0.0014491081237792969, "learning_rate": 9.830581248340904e-07, "loss": 0.0523, "num_tokens": 7560449.0, "reward": 0.0, "reward_std": 0.6386822462081909, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.030072721096349574, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07933031547923879, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1464.875, "completions/mean_terminated_length": 1312.666748046875, "completions/min_length": 1143.0, "completions/min_terminated_length": 1143.0, "epoch": 0.18159048215403883, "frac_reward_zero_std": 0.0, "grad_norm": 3.281062477288285, "kl": 0.00322723388671875, "learning_rate": 9.82519555844347e-07, "loss": 0.0292, "num_tokens": 7621295.0, "reward": 0.0, "reward_std": 0.8289343118667603, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12500933186269494, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10441096539901965, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07097208632298363, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1436.875, "completions/mean_terminated_length": 1298.0, "completions/min_length": 1137.0, "completions/min_terminated_length": 1137.0, "epoch": 0.1828428303068253, "frac_reward_zero_std": 0.0, "grad_norm": 2.852746135375484, "kl": 0.0022106170654296875, "learning_rate": 9.819727293842715e-07, "loss": -0.0099, "num_tokens": 7663125.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9735676646232605, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.008435227123041298, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08786012223776958, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1447.0625, "completions/mean_terminated_length": 1288.25, "completions/min_length": 1028.0, "completions/min_terminated_length": 1028.0, "epoch": 0.18409517845961176, "frac_reward_zero_std": 0.0, "grad_norm": 2.7900001366496268, "kl": 0.002574920654296875, "learning_rate": 9.814176558936306e-07, "loss": 0.0107, "num_tokens": 7727518.0, "reward": -2.9802322387695312e-08, "reward_std": 0.3397839367389679, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09743503994599206, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16748018946124937, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09953596037316068, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 863.0, "completions/mean_length": 1135.0625, "completions/mean_terminated_length": 770.125, "completions/min_length": 571.0, "completions/min_terminated_length": 571.0, "epoch": 0.18534752661239826, "frac_reward_zero_std": 0.0, "grad_norm": 2.560665055023378, "kl": 0.00197601318359375, "learning_rate": 9.808543459696394e-07, "loss": -0.0149, "num_tokens": 7771327.0, "reward": 0.0, "reward_std": 0.9778045415878296, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.20066201620356428, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3214780108822807, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1307.375, "completions/mean_terminated_length": 1157.5555419921875, "completions/min_length": 336.0, "completions/min_terminated_length": 336.0, "epoch": 0.18659987476518472, "frac_reward_zero_std": 0.0, "grad_norm": 3.698308959281013, "kl": 0.003589630126953125, "learning_rate": 9.802828103667598e-07, "loss": 0.0049, "num_tokens": 7824917.0, "reward": -9.313225746154785e-09, "reward_std": 0.929603099822998, "rewards/wordcountpos_reward_ecommerce/mean": -9.313225746154785e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0017376960374372932, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03411053398366144, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1954576775256058, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1350.8125, "completions/mean_terminated_length": 1234.77783203125, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.18785222291797118, "frac_reward_zero_std": 0.0, "grad_norm": 3.37227831535049, "kl": 0.003437042236328125, "learning_rate": 9.797030599964946e-07, "loss": -0.0282, "num_tokens": 7879658.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6427962779998779, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0803417321639054, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11525098223680169, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1446.0625, "completions/mean_terminated_length": 1327.4000244140625, "completions/min_length": 1075.0, "completions/min_terminated_length": 1075.0, "epoch": 0.18910457107075768, "frac_reward_zero_std": 0.0, "grad_norm": 2.6387419512111028, "kl": 0.002285003662109375, "learning_rate": 9.791151059271787e-07, "loss": -0.0106, "num_tokens": 7927819.0, "reward": 0.0, "reward_std": 0.9979233145713806, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06657694240337725, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20181152584757237, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194862, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1422.1875, "completions/mean_terminated_length": 1344.375, "completions/min_length": 1078.0, "completions/min_terminated_length": 1078.0, "epoch": 0.19035691922354414, "frac_reward_zero_std": 0.0, "grad_norm": 2.8499814649815485, "kl": 0.00269317626953125, "learning_rate": 9.78518959383769e-07, "loss": -0.0267, "num_tokens": 7979030.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6457971334457397, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.032289559957375875, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03678022720872768, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.19160926737633063, "frac_reward_zero_std": 0.0, "grad_norm": 2.7439239666738633, "kl": 0.00275421142578125, "learning_rate": 9.779146317476294e-07, "loss": 0.0001, "num_tokens": 8039006.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0521876811981201, "rewards/wordcountpos_reward_ecommerce/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09760563861386369, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10390475856290554, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05692750425533111, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 1500.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 1500.0, "epoch": 0.1928616155291171, "frac_reward_zero_std": 0.0, "grad_norm": 3.0303320592221237, "kl": 0.003154754638671875, "learning_rate": 9.773021345563133e-07, "loss": 0.0001, "num_tokens": 8103454.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0458917617797852, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.25916260149601344, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.18093382728997642, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746357, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1404.1875, "completions/mean_terminated_length": 1346.7000732421875, "completions/min_length": 1252.0, "completions/min_terminated_length": 1252.0, "epoch": 0.19411396368190356, "frac_reward_zero_std": 0.0, "grad_norm": 2.7184034168824684, "kl": 0.002166748046875, "learning_rate": 9.766814795033438e-07, "loss": 0.0074, "num_tokens": 8157473.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9921345710754395, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05048016331986036, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1222984521625515, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07187952884282611, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1382.875, "completions/mean_terminated_length": 1031.5, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.19536631183469005, "frac_reward_zero_std": 0.5, "grad_norm": 1.7493199389788998, "kl": 0.002361297607421875, "learning_rate": 9.7605267843799e-07, "loss": -0.0294, "num_tokens": 8204367.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7406100630760193, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.4396175531814227, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.42217210131772864, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6375, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1252.6875, "completions/mean_terminated_length": 1104.300048828125, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.19661865998747652, "frac_reward_zero_std": 0.0, "grad_norm": 3.1812238222319373, "kl": 0.002895355224609375, "learning_rate": 9.754157433650416e-07, "loss": 0.0099, "num_tokens": 8250426.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7503967881202698, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09038614901064657, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10393207102574, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14580555290954889, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1485.9375, "completions/mean_terminated_length": 1425.0, "completions/min_length": 1367.0, "completions/min_terminated_length": 1367.0, "epoch": 0.19787100814026298, "frac_reward_zero_std": 0.0, "grad_norm": 2.274381341557373, "kl": 0.0019855499267578125, "learning_rate": 9.74770686444578e-07, "loss": -0.0039, "num_tokens": 8312649.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0463612079620361, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01854492153050523, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07355929227115507, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7124999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1468.6875, "completions/mean_terminated_length": 1249.5, "completions/min_length": 1203.0, "completions/min_terminated_length": 1203.0, "epoch": 0.19912335629304947, "frac_reward_zero_std": 0.0, "grad_norm": 3.037789984063529, "kl": 0.00295257568359375, "learning_rate": 9.74117519991739e-07, "loss": 0.0195, "num_tokens": 8372460.0, "reward": 5.960464477539063e-08, "reward_std": 0.6518849730491638, "rewards/wordcountpos_reward_ecommerce/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010805361779511215, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10954072593469087, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12041594578792295, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 1422.5625, "completions/mean_terminated_length": 1293.5, "completions/min_length": 1240.0, "completions/min_terminated_length": 1240.0, "epoch": 0.20037570444583594, "frac_reward_zero_std": 0.0, "grad_norm": 2.4914861954275853, "kl": 0.0020751953125, "learning_rate": 9.734562564764863e-07, "loss": -0.0084, "num_tokens": 8441477.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0050157308578491, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.20082839440532127, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24021378555176306, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1496.5, "completions/mean_terminated_length": 1444.0, "completions/min_length": 1444.0, "completions/min_terminated_length": 1444.0, "epoch": 0.2016280525986224, "frac_reward_zero_std": 0.0, "grad_norm": 2.96733321823591, "kl": 0.003032684326171875, "learning_rate": 9.727869085233683e-07, "loss": 0.0008, "num_tokens": 8500525.0, "reward": 0.0, "reward_std": 1.0511749982833862, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.30725599890646893, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11503663852918616, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333336, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1405.9375, "completions/mean_terminated_length": 1199.0, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.2028804007514089, "frac_reward_zero_std": 0.0, "grad_norm": 3.1286840232588795, "kl": 0.003131866455078125, "learning_rate": 9.721094889112769e-07, "loss": -0.0017, "num_tokens": 8561668.0, "reward": 0.0, "reward_std": 1.0658842325210571, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.3530029462031852, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3684803684710799, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10036968702787749, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1408.4375, "completions/mean_terminated_length": 1290.71435546875, "completions/min_length": 1074.0, "completions/min_terminated_length": 1074.0, "epoch": 0.20413274890419536, "frac_reward_zero_std": 0.0, "grad_norm": 2.9421524856257575, "kl": 0.003437042236328125, "learning_rate": 9.714240105732056e-07, "loss": -0.0217, "num_tokens": 8611395.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8545268774032593, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.004406093333840853, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07521193600811737, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1383.4375, "completions/mean_terminated_length": 878.3333740234375, "completions/min_length": 209.0, "completions/min_terminated_length": 209.0, "epoch": 0.20538509705698185, "frac_reward_zero_std": 0.0, "grad_norm": 2.8733824694411965, "kl": 0.002685546875, "learning_rate": 9.707304865960003e-07, "loss": 0.0086, "num_tokens": 8668282.0, "reward": -2.9802322387695312e-08, "reward_std": 1.019072413444519, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09355282337201007, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11627823063016991, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12619796324000607, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1461.0625, "completions/mean_terminated_length": 1292.3333740234375, "completions/min_length": 1182.0, "completions/min_terminated_length": 1182.0, "epoch": 0.20663744520976832, "frac_reward_zero_std": 0.0, "grad_norm": 3.0251334252111324, "kl": 0.00315093994140625, "learning_rate": 9.700289302201118e-07, "loss": -0.0054, "num_tokens": 8726843.0, "reward": 2.2351741790771484e-08, "reward_std": 0.9717680215835571, "rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03285324398900216, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1305907322232915, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921946, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1479.9375, "completions/mean_terminated_length": 1339.5, "completions/min_length": 1187.0, "completions/min_terminated_length": 1187.0, "epoch": 0.20788979336255478, "frac_reward_zero_std": 0.0, "grad_norm": 3.1164633460783593, "kl": 0.00360107421875, "learning_rate": 9.69319354839341e-07, "loss": -0.01, "num_tokens": 8774074.0, "reward": 0.0, "reward_std": 0.6172374486923218, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010136480012205995, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05376440319397317, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10741060020797316, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1443.6875, "completions/mean_terminated_length": 1371.2857666015625, "completions/min_length": 1221.0, "completions/min_terminated_length": 1221.0, "epoch": 0.20914214151534127, "frac_reward_zero_std": 0.0, "grad_norm": 2.8681254932066893, "kl": 0.003143310546875, "learning_rate": 9.686017740005845e-07, "loss": -0.0029, "num_tokens": 8833421.0, "reward": 0.0, "reward_std": 1.049817442893982, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.23814174262345672, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24595173419132288, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10470416879457554, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1498.9375, "completions/mean_terminated_length": 1483.0, "completions/min_length": 1483.0, "completions/min_terminated_length": 1483.0, "epoch": 0.21039448966812774, "frac_reward_zero_std": 0.0, "grad_norm": 2.699748602413328, "kl": 0.002422332763671875, "learning_rate": 9.678762014035755e-07, "loss": 0.001, "num_tokens": 8896332.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8306390047073364, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.01676756574749607, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03592053954406261, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078614, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1463.25, "completions/mean_terminated_length": 1304.0, "completions/min_length": 1117.0, "completions/min_terminated_length": 1117.0, "epoch": 0.2116468378209142, "frac_reward_zero_std": 0.0, "grad_norm": 2.9281066984860664, "kl": 0.0030670166015625, "learning_rate": 9.67142650900622e-07, "loss": 0.0284, "num_tokens": 8960800.0, "reward": -1.4901161193847656e-08, "reward_std": 1.051703691482544, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02864644527108891, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12265684181148895, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1492.8125, "completions/mean_terminated_length": 1385.0, "completions/min_length": 1385.0, "completions/min_terminated_length": 1385.0, "epoch": 0.2128991859737007, "frac_reward_zero_std": 0.0, "grad_norm": 2.719524959962169, "kl": 0.002513885498046875, "learning_rate": 9.664011364963427e-07, "loss": -0.0014, "num_tokens": 9014901.0, "reward": 0.0, "reward_std": 0.6419066190719604, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.026585625959977408, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05774533640389131, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6416666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901158, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1398.875, "completions/mean_terminated_length": 1268.857177734375, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.21415153412648716, "frac_reward_zero_std": 0.0, "grad_norm": 3.2711757628292637, "kl": 0.003734588623046875, "learning_rate": 9.656516723474003e-07, "loss": 0.0199, "num_tokens": 9082635.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7075515985488892, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04754440907840732, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19258567827157586, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1372.0, "completions/mean_terminated_length": 1244.0, "completions/min_length": 414.0, "completions/min_terminated_length": 414.0, "epoch": 0.21540388227927365, "frac_reward_zero_std": 0.0, "grad_norm": 3.130031481410935, "kl": 0.003208160400390625, "learning_rate": 9.648942727622293e-07, "loss": -0.0004, "num_tokens": 9139131.0, "reward": 4.470348358154297e-08, "reward_std": 0.8231313824653625, "rewards/wordcountpos_reward_ecommerce/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.15172830287547154, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10156016936265624, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.825, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1439.125, "completions/mean_terminated_length": 1337.666748046875, "completions/min_length": 1231.0, "completions/min_terminated_length": 1231.0, "epoch": 0.21665623043206012, "frac_reward_zero_std": 0.0, "grad_norm": 2.8174656495384003, "kl": 0.003330230712890625, "learning_rate": 9.641289522007648e-07, "loss": 0.0184, "num_tokens": 9189589.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9120515584945679, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.08079485341203167, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.404800820644525, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.059004080210452274, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1416.4375, "completions/mean_terminated_length": 1232.5999755859375, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.21790857858484658, "frac_reward_zero_std": 0.0, "grad_norm": 2.883462614618569, "kl": 0.003101348876953125, "learning_rate": 9.633557252741655e-07, "loss": -0.0209, "num_tokens": 9242428.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7592308521270752, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09250187361454984, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24709362304891008, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1630723538573985, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1153.5625, "completions/mean_terminated_length": 884.1111450195312, "completions/min_length": 704.0, "completions/min_terminated_length": 704.0, "epoch": 0.21916092673763307, "frac_reward_zero_std": 0.0, "grad_norm": 3.067205627199215, "kl": 0.003124237060546875, "learning_rate": 9.625746067445344e-07, "loss": 0.0267, "num_tokens": 9286885.0, "reward": 0.0, "reward_std": 0.8734534978866577, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0040830023789233914, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.007219286680192259, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242308, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1478.625, "completions/mean_terminated_length": 1329.0, "completions/min_length": 1183.0, "completions/min_terminated_length": 1183.0, "epoch": 0.22041327489041954, "frac_reward_zero_std": 0.0, "grad_norm": 3.1276319460691004, "kl": 0.003200531005859375, "learning_rate": 9.61785611524638e-07, "loss": -0.0146, "num_tokens": 9345695.0, "reward": 0.0, "reward_std": 0.7759820222854614, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.008401002667427777, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08187937939788012, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11547005383792516, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1251.3125, "completions/mean_terminated_length": 1215.7857666015625, "completions/min_length": 1017.0, "completions/min_terminated_length": 1017.0, "epoch": 0.221665623043206, "frac_reward_zero_std": 0.0, "grad_norm": 2.6069318364125738, "kl": 0.0021648406982421875, "learning_rate": 9.609887546776213e-07, "loss": -0.0061, "num_tokens": 9382804.0, "reward": 0.0, "reward_std": 0.8300349712371826, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04134925667146179, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05624626120552443, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07698003589195014, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1388.0, "completions/mean_terminated_length": 1244.0, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.2229179711959925, "frac_reward_zero_std": 0.0, "grad_norm": 3.156831352479139, "kl": 0.00347137451171875, "learning_rate": 9.601840514167194e-07, "loss": -0.0001, "num_tokens": 9443532.0, "reward": 0.0, "reward_std": 0.9561296701431274, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02300302439349743, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06503983162022253, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.775, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13305526559931294, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1296.75, "completions/mean_terminated_length": 1204.3636474609375, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.22417031934877896, "frac_reward_zero_std": 0.0, "grad_norm": 3.2566872824431337, "kl": 0.003185272216796875, "learning_rate": 9.593715171049677e-07, "loss": -0.0019, "num_tokens": 9493936.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9979840517044067, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04123772744400983, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.055545285602727666, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5708333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921942, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1462.0625, "completions/mean_terminated_length": 1348.25, "completions/min_length": 1185.0, "completions/min_terminated_length": 1185.0, "epoch": 0.22542266750156542, "frac_reward_zero_std": 0.0, "grad_norm": 3.118655152417983, "kl": 0.003711700439453125, "learning_rate": 9.585511672549087e-07, "loss": -0.0119, "num_tokens": 9547913.0, "reward": 0.0, "reward_std": 0.6055276393890381, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.2990419990496254, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.5212506601592531, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11547005383792518, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1253.875, "completions/mean_terminated_length": 1062.4444580078125, "completions/min_length": 742.0, "completions/min_terminated_length": 742.0, "epoch": 0.2266750156543519, "frac_reward_zero_std": 0.0, "grad_norm": 3.453588710121719, "kl": 0.003208160400390625, "learning_rate": 9.577230175282956e-07, "loss": -0.0189, "num_tokens": 9590383.0, "reward": -2.9802322387695312e-08, "reward_std": 1.026740550994873, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.22982623849797099, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.35491751307206737, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194866, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1397.3125, "completions/mean_terminated_length": 1294.625, "completions/min_length": 1209.0, "completions/min_terminated_length": 1209.0, "epoch": 0.22792736380713838, "frac_reward_zero_std": 0.0, "grad_norm": 2.8117171390856717, "kl": 0.00273895263671875, "learning_rate": 9.568870837357933e-07, "loss": 0.0049, "num_tokens": 9635180.0, "reward": 0.0, "reward_std": 0.9024027585983276, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.015236533423952495, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05515104905405319, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1389.4375, "completions/mean_terminated_length": 1247.2857666015625, "completions/min_length": 1029.0, "completions/min_terminated_length": 1029.0, "epoch": 0.22917971195992487, "frac_reward_zero_std": 0.0, "grad_norm": 3.146143558726643, "kl": 0.003173828125, "learning_rate": 9.56043381836677e-07, "loss": 0.0244, "num_tokens": 9691707.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6937527656555176, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06475936323780643, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07867382027532054, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07781745019952505, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1331.75, "completions/mean_terminated_length": 1275.666748046875, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.23043206011271133, "frac_reward_zero_std": 0.0, "grad_norm": 3.445265358440665, "kl": 0.003719329833984375, "learning_rate": 9.551919279385267e-07, "loss": 0.0321, "num_tokens": 9741247.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9354739785194397, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011286604414356131, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06370003732540648, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09878896324620107, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1312.6875, "completions/mean_terminated_length": 1269.4615478515625, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.2316844082654978, "frac_reward_zero_std": 0.0, "grad_norm": 3.2176769971519983, "kl": 0.0028533935546875, "learning_rate": 9.543327382969203e-07, "loss": 0.0001, "num_tokens": 9800986.0, "reward": 0.0, "reward_std": 0.8514897227287292, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06189917187460071, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09461702207527528, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1251.9375, "completions/mean_terminated_length": 1251.9375, "completions/min_length": 1139.0, "completions/min_terminated_length": 1139.0, "epoch": 0.2329367564182843, "frac_reward_zero_std": 0.0, "grad_norm": 1.8078703137230523, "kl": 0.0009489059448242188, "learning_rate": 9.534658293151226e-07, "loss": 0.0206, "num_tokens": 9844961.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0031490325927734, "rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1837486103073024, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2121586351571871, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.675, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.23418910457107076, "frac_reward_zero_std": 0.0, "grad_norm": 2.341222957754127, "kl": 0.0020427703857421875, "learning_rate": 9.525912175437733e-07, "loss": 0.0001, "num_tokens": 9904889.0, "reward": 0.0, "reward_std": 0.8993015289306641, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0626094048175301, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14524912930313416, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1401.75, "completions/mean_terminated_length": 1303.5, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.23544145272385722, "frac_reward_zero_std": 0.0, "grad_norm": 2.7109926581212966, "kl": 0.0028514862060546875, "learning_rate": 9.5170891968057e-07, "loss": 0.0103, "num_tokens": 9960061.0, "reward": 0.0, "reward_std": 0.4977339506149292, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.007690022648520695, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11369344635650466, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11279282877125754, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1399.3125, "completions/mean_terminated_length": 1321.0, "completions/min_length": 1270.0, "completions/min_terminated_length": 1270.0, "epoch": 0.2366938008766437, "frac_reward_zero_std": 0.0, "grad_norm": 2.2019750861669105, "kl": 0.0019474029541015625, "learning_rate": 9.508189525699498e-07, "loss": 0.0016, "num_tokens": 10018474.0, "reward": 0.0, "reward_std": 0.40811485052108765, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09626746004308685, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11868608664564458, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1067.0, "completions/mean_length": 1176.1875, "completions/mean_terminated_length": 852.375, "completions/min_length": 641.0, "completions/min_terminated_length": 641.0, "epoch": 0.23794614902943018, "frac_reward_zero_std": 0.0, "grad_norm": 3.0536777064675973, "kl": 0.00238037109375, "learning_rate": 9.499213332027676e-07, "loss": -0.0079, "num_tokens": 10055509.0, "reward": -5.960464477539063e-08, "reward_std": 0.5494594573974609, "rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09604975311367514, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10254148239725733, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07084150279686702, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 980.5, "completions/mean_terminated_length": 980.5, "completions/min_length": 598.0, "completions/min_terminated_length": 598.0, "epoch": 0.23919849718221667, "frac_reward_zero_std": 0.0, "grad_norm": 3.5775077184845583, "kl": 0.003173828125, "learning_rate": 9.490160787159716e-07, "loss": -0.0435, "num_tokens": 10088493.0, "reward": 0.0, "reward_std": 0.7993010878562927, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.017021331786918385, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08925782815695868, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5791666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10809803506625447, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1205.125, "completions/mean_terminated_length": 1028.2000732421875, "completions/min_length": 700.0, "completions/min_terminated_length": 700.0, "epoch": 0.24045084533500313, "frac_reward_zero_std": 0.0, "grad_norm": 3.472937551782796, "kl": 0.003765106201171875, "learning_rate": 9.481032063922764e-07, "loss": 0.0801, "num_tokens": 10134447.0, "reward": 0.0, "reward_std": 0.9049590826034546, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.028540941769550358, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.047797358383350766, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10183501544346314, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1342.1875, "completions/mean_terminated_length": 1219.4444580078125, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.2417031934877896, "frac_reward_zero_std": 0.0, "grad_norm": 3.0654183912224107, "kl": 0.00345611572265625, "learning_rate": 9.471827336598332e-07, "loss": -0.0116, "num_tokens": 10182434.0, "reward": -2.9802322387695312e-08, "reward_std": 0.894692599773407, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.10903944916375954, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15533453332102554, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06540472290116196, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.2429555416405761, "frac_reward_zero_std": 0.0, "grad_norm": 2.7779664849113415, "kl": 0.003116607666015625, "learning_rate": 9.462546780918966e-07, "loss": 0.0001, "num_tokens": 10244530.0, "reward": 0.0, "reward_std": 0.967013955116272, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.053086024723834134, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06887877561253418, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07888106377466157, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1454.9375, "completions/mean_terminated_length": 1397.0001220703125, "completions/min_length": 1265.0, "completions/min_terminated_length": 1265.0, "epoch": 0.24420788979336255, "frac_reward_zero_std": 0.0, "grad_norm": 3.1642061803468913, "kl": 0.003826141357421875, "learning_rate": 9.453190574064893e-07, "loss": -0.0047, "num_tokens": 10299345.0, "reward": 1.862645149230957e-08, "reward_std": 1.04762601852417, "rewards/wordcountpos_reward_ecommerce/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04354357070732585, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08903133853741613, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08777074514725114, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1337.0625, "completions/mean_terminated_length": 1282.75, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.24546023794614902, "frac_reward_zero_std": 0.0, "grad_norm": 3.0019183290119758, "kl": 0.003467559814453125, "learning_rate": 9.443758894660638e-07, "loss": 0.0284, "num_tokens": 10358514.0, "reward": 0.0, "reward_std": 0.6315692067146301, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07697389081957594, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12645427286420413, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5875, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10671873729054746, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1494.5625, "completions/mean_terminated_length": 1413.0, "completions/min_length": 1413.0, "completions/min_terminated_length": 1413.0, "epoch": 0.2467125860989355, "frac_reward_zero_std": 0.0, "grad_norm": 2.8267138784190755, "kl": 0.002933502197265625, "learning_rate": 9.434251922771616e-07, "loss": 0.0078, "num_tokens": 10411171.0, "reward": 2.9802322387695312e-08, "reward_std": 0.884939432144165, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01747490695405262, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06365932956310252, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6416666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1410.375, "completions/mean_terminated_length": 1261.0, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.24796493425172197, "frac_reward_zero_std": 0.0, "grad_norm": 3.1021608965378076, "kl": 0.003887176513671875, "learning_rate": 9.424669839900691e-07, "loss": 0.0143, "num_tokens": 10469257.0, "reward": -5.21540641784668e-08, "reward_std": 1.061091661453247, "rewards/wordcountpos_reward_ecommerce/mean": -5.21540641784668e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0551289409217747, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2097823559795121, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1370.9375, "completions/mean_terminated_length": 1205.0, "completions/min_length": 1019.0, "completions/min_terminated_length": 1019.0, "epoch": 0.24921728240450847, "frac_reward_zero_std": 0.0, "grad_norm": 2.6919185602949653, "kl": 0.002506256103515625, "learning_rate": 9.415012828984714e-07, "loss": 0.0067, "num_tokens": 10523624.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7187443971633911, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03211836693332174, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13737955494238535, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.775, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07649739768026005, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1390.8125, "completions/mean_terminated_length": 1281.625, "completions/min_length": 1177.0, "completions/min_terminated_length": 1177.0, "epoch": 0.25046963055729493, "frac_reward_zero_std": 0.0, "grad_norm": 2.450252869715986, "kl": 0.00223541259765625, "learning_rate": 9.405281074391022e-07, "loss": -0.0098, "num_tokens": 10579429.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4103597402572632, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1406289464666968, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15985873234433481, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1295.8125, "completions/mean_terminated_length": 1266.6429443359375, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.2517219787100814, "frac_reward_zero_std": 0.0, "grad_norm": 2.642709475322054, "kl": 0.0023345947265625, "learning_rate": 9.395474761913939e-07, "loss": 0.014, "num_tokens": 10628866.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7710261940956116, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04348868814755175, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0830759853911682, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14504150108516198, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1357.125, "completions/mean_terminated_length": 1324.1539306640625, "completions/min_length": 1030.0, "completions/min_terminated_length": 1030.0, "epoch": 0.25297432686286786, "frac_reward_zero_std": 0.0, "grad_norm": 3.216143536482822, "kl": 0.004241943359375, "learning_rate": 9.3855940787712e-07, "loss": -0.0086, "num_tokens": 10670092.0, "reward": 0.0, "reward_std": 0.6420686841011047, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03687807737633173, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16934247164490465, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14343665526661611, "rewards/wordcountpos_reward_ecommerce/std": 1.0327954292297363, "step": 202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1278.625, "completions/mean_terminated_length": 1227.5384521484375, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.2542266750156543, "frac_reward_zero_std": 0.0, "grad_norm": 3.4244156852349814, "kl": 0.0052642822265625, "learning_rate": 9.375639213600401e-07, "loss": -0.0436, "num_tokens": 10728350.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9011333584785461, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09818030402455966, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07523729893672071, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13080944580232393, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1443.9375, "completions/mean_terminated_length": 1371.857177734375, "completions/min_length": 1252.0, "completions/min_terminated_length": 1252.0, "epoch": 0.25547902316844084, "frac_reward_zero_std": 0.0, "grad_norm": 3.5077076210411278, "kl": 0.00457000732421875, "learning_rate": 9.365610356455384e-07, "loss": 0.0019, "num_tokens": 10791365.0, "reward": 4.470348358154297e-08, "reward_std": 0.8847507238388062, "rewards/wordcountpos_reward_ecommerce/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.015580215905333485, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06751943458738671, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.135263802609184, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1391.8125, "completions/mean_terminated_length": 1067.25, "completions/min_length": 264.0, "completions/min_terminated_length": 264.0, "epoch": 0.2567313713212273, "frac_reward_zero_std": 0.0, "grad_norm": 2.827913375094597, "kl": 0.003643035888671875, "learning_rate": 9.355507698802613e-07, "loss": -0.0786, "num_tokens": 10852330.0, "reward": 7.450580596923828e-09, "reward_std": 1.0562589168548584, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09460135777577211, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12653992925605045, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11122216672215289, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1426.6875, "completions/mean_terminated_length": 1265.4000244140625, "completions/min_length": 1092.0, "completions/min_terminated_length": 1092.0, "epoch": 0.2579837194740138, "frac_reward_zero_std": 0.0, "grad_norm": 4.109416757035146, "kl": 0.00577545166015625, "learning_rate": 9.345331433517522e-07, "loss": 0.0289, "num_tokens": 10918837.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9729784727096558, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04706903609226349, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08099201475868337, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11917929226045818, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1464.1875, "completions/mean_terminated_length": 1356.75, "completions/min_length": 1188.0, "completions/min_terminated_length": 1188.0, "epoch": 0.25923606762680024, "frac_reward_zero_std": 0.0, "grad_norm": 3.1385572152974905, "kl": 0.003864288330078125, "learning_rate": 9.335081754880825e-07, "loss": 0.0082, "num_tokens": 10974608.0, "reward": -5.960464477539063e-08, "reward_std": 0.5515385270118713, "rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06366384054522155, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10778487016156474, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1002773930432755, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1448.625, "completions/mean_terminated_length": 1363.0, "completions/min_length": 1103.0, "completions/min_terminated_length": 1103.0, "epoch": 0.2604884157795867, "frac_reward_zero_std": 0.0, "grad_norm": 3.2758893957680097, "kl": 0.004230499267578125, "learning_rate": 9.32475885857481e-07, "loss": -0.0053, "num_tokens": 11033482.0, "reward": 0.0, "reward_std": 0.5894155502319336, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05038277241462744, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07116397984833597, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07097208632298363, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1455.75, "completions/mean_terminated_length": 1264.0, "completions/min_length": 1089.0, "completions/min_terminated_length": 1089.0, "epoch": 0.2617407639323732, "frac_reward_zero_std": 0.0, "grad_norm": 2.6461488967254634, "kl": 0.0025310516357421875, "learning_rate": 9.31436294167961e-07, "loss": -0.0132, "num_tokens": 11098902.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6484573483467102, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1468978313797672, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24420746920563674, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1164283279771532, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1237.1875, "completions/mean_terminated_length": 1079.5, "completions/min_length": 742.0, "completions/min_terminated_length": 742.0, "epoch": 0.2629931120851597, "frac_reward_zero_std": 0.0, "grad_norm": 3.313685913897086, "kl": 0.003513336181640625, "learning_rate": 9.303894202669428e-07, "loss": 0.0531, "num_tokens": 11148649.0, "reward": -1.1175870895385742e-08, "reward_std": 0.990402102470398, "rewards/wordcountpos_reward_ecommerce/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0009552414586071921, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0038209658344287682, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333333, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1327.125, "completions/mean_terminated_length": 1154.25, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.26424546023794615, "frac_reward_zero_std": 0.0, "grad_norm": 3.0546042142648298, "kl": 0.0033416748046875, "learning_rate": 9.293352841408759e-07, "loss": -0.0213, "num_tokens": 11207483.0, "reward": 0.0, "reward_std": 0.5952367186546326, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.035691884267146166, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07657424493915134, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.16324260518672248, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 1306.25, "completions/mean_terminated_length": 1190.0, "completions/min_length": 1071.0, "completions/min_terminated_length": 1071.0, "epoch": 0.2654978083907326, "frac_reward_zero_std": 0.0, "grad_norm": 2.123766104646228, "kl": 0.0015621185302734375, "learning_rate": 9.282739059148566e-07, "loss": -0.0237, "num_tokens": 11255703.0, "reward": 0.0, "reward_std": 0.9929344654083252, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.025706850415670862, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11233922174981649, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07685966046898342, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1126.125, "completions/mean_terminated_length": 956.1818237304688, "completions/min_length": 674.0, "completions/min_terminated_length": 674.0, "epoch": 0.2667501565435191, "frac_reward_zero_std": 0.0, "grad_norm": 3.488984850056713, "kl": 0.0038604736328125, "learning_rate": 9.272053058522444e-07, "loss": -0.0253, "num_tokens": 11294505.0, "reward": 0.0, "reward_std": 0.5554646253585815, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.23003407087469527, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20616326736471785, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6291666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15000000000000002, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 883.5, "completions/mean_terminated_length": 883.5, "completions/min_length": 673.0, "completions/min_terminated_length": 673.0, "epoch": 0.2680025046963056, "frac_reward_zero_std": 0.0, "grad_norm": 3.2969967424976456, "kl": 0.002185821533203125, "learning_rate": 9.261295043542747e-07, "loss": 0.0085, "num_tokens": 11325305.0, "reward": 2.421438694000244e-08, "reward_std": 1.039635419845581, "rewards/wordcountpos_reward_ecommerce/mean": 2.421438694000244e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06031083797758491, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16643314604295306, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11603000888978231, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1423.8125, "completions/mean_terminated_length": 1296.8333740234375, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.26925485284909206, "frac_reward_zero_std": 0.0, "grad_norm": 2.683342974010468, "kl": 0.0028839111328125, "learning_rate": 9.250465219596699e-07, "loss": 0.0, "num_tokens": 11384166.0, "reward": 0.0, "reward_std": 0.6987115144729614, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.015736024702926166, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06158481768754947, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1057600358603626, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1160.6875, "completions/mean_terminated_length": 1160.6875, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.27050720100187853, "frac_reward_zero_std": 0.0, "grad_norm": 3.800146584559223, "kl": 0.004093170166015625, "learning_rate": 9.239563793442462e-07, "loss": 0.0174, "num_tokens": 11441313.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8806728720664978, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0032805949907051112, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0510781770746922, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11894598836509009, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1307.0625, "completions/mean_terminated_length": 1279.5, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.271759549154665, "frac_reward_zero_std": 0.0, "grad_norm": 2.806350656696486, "kl": 0.002574920654296875, "learning_rate": 9.228590973205201e-07, "loss": -0.0377, "num_tokens": 11499258.0, "reward": -7.450580596923828e-09, "reward_std": 1.0440177917480469, "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0614237528104428, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06944213481803516, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09953596037316066, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.27301189730745146, "frac_reward_zero_std": 0.0, "grad_norm": 2.4434394608429915, "kl": 0.002834320068359375, "learning_rate": 9.2175469683731e-07, "loss": 0.0001, "num_tokens": 11554162.0, "reward": 0.0, "reward_std": 0.9512232542037964, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.006459758393578777, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09841534495892398, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13601470508735444, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1139.4375, "completions/mean_terminated_length": 1115.4000244140625, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.2742642454602379, "frac_reward_zero_std": 0.0, "grad_norm": 2.183780267914974, "kl": 0.0016641616821289062, "learning_rate": 9.206431989793374e-07, "loss": 0.0171, "num_tokens": 11599913.0, "reward": 0.0, "reward_std": 0.7830429077148438, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.006944415247763777, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03283008559006156, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1295.0, "completions/mean_length": 1346.5, "completions/mean_terminated_length": 1149.1429443359375, "completions/min_length": 1004.0, "completions/min_terminated_length": 1004.0, "epoch": 0.27551659361302444, "frac_reward_zero_std": 0.0, "grad_norm": 3.3703074221103817, "kl": 0.00464630126953125, "learning_rate": 9.195246249668232e-07, "loss": -0.0007, "num_tokens": 11664265.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7190686464309692, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.3476598454237376, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.4301665677025463, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1309.5625, "completions/mean_terminated_length": 1119.125, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.2767689417658109, "frac_reward_zero_std": 0.0, "grad_norm": 2.622730015507812, "kl": 0.0024242401123046875, "learning_rate": 9.183989961550832e-07, "loss": -0.0219, "num_tokens": 11719922.0, "reward": 0.0, "reward_std": 0.72877037525177, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0952471076969717, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12586824643040787, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1485.875, "completions/mean_terminated_length": 1387.0, "completions/min_length": 1382.0, "completions/min_terminated_length": 1382.0, "epoch": 0.27802128991859737, "frac_reward_zero_std": 0.0, "grad_norm": 2.782237650167012, "kl": 0.004253387451171875, "learning_rate": 9.172663340341204e-07, "loss": -0.0028, "num_tokens": 11778680.0, "reward": 0.0, "reward_std": 1.0009106397628784, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.012701224890322388, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.02815604341593864, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7124999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408155, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1443.0625, "completions/mean_terminated_length": 1317.800048828125, "completions/min_length": 1121.0, "completions/min_terminated_length": 1121.0, "epoch": 0.27927363807138383, "frac_reward_zero_std": 0.0, "grad_norm": 2.998694336626448, "kl": 0.003566741943359375, "learning_rate": 9.161266602282147e-07, "loss": -0.0055, "num_tokens": 11838169.0, "reward": 0.0, "reward_std": 0.9211122989654541, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04490957636365446, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09152261044011904, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10741060020797315, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1488.875, "completions/mean_terminated_length": 1411.0, "completions/min_length": 1372.0, "completions/min_terminated_length": 1372.0, "epoch": 0.2805259862241703, "frac_reward_zero_std": 0.0, "grad_norm": 2.9658149533875187, "kl": 0.004146575927734375, "learning_rate": 9.149799964955093e-07, "loss": 0.008, "num_tokens": 11899975.0, "reward": -3.725290298461914e-09, "reward_std": 1.0432794094085693, "rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.012174573886332358, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04195711207506097, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07685966046898342, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1409.8125, "completions/mean_terminated_length": 1139.25, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.2817783343769568, "frac_reward_zero_std": 0.0, "grad_norm": 2.936718458392348, "kl": 0.00342559814453125, "learning_rate": 9.138263647275969e-07, "loss": -0.0033, "num_tokens": 11941164.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0000627040863037, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.15080494449355206, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08565387051258783, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09339283817414601, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1183.3125, "completions/mean_terminated_length": 1110.2308349609375, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.2830306825297433, "frac_reward_zero_std": 0.0, "grad_norm": 3.658827402816175, "kl": 0.00414276123046875, "learning_rate": 9.126657869491e-07, "loss": 0.0126, "num_tokens": 11992657.0, "reward": 0.0, "reward_std": 0.9479507207870483, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.017415102975537073, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.031912571116253466, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1499.0, "completions/mean_terminated_length": 1484.0, "completions/min_length": 1484.0, "completions/min_terminated_length": 1484.0, "epoch": 0.28428303068252975, "frac_reward_zero_std": 0.0, "grad_norm": 3.1261193198966652, "kl": 0.0041351318359375, "learning_rate": 9.114982853172521e-07, "loss": 0.0009, "num_tokens": 12054529.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0018526315689087, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.012434236974245455, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045472914513713596, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.2855353788353162, "frac_reward_zero_std": 0.0, "grad_norm": 2.4904650551200485, "kl": 0.00292205810546875, "learning_rate": 9.103238821214727e-07, "loss": 0.0001, "num_tokens": 12114017.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4001474976539612, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.19495499044861478, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.26273237351903383, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.059628479399994425, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1472.3125, "completions/mean_terminated_length": 1278.5, "completions/min_length": 1228.0, "completions/min_terminated_length": 1228.0, "epoch": 0.2867877269881027, "frac_reward_zero_std": 0.0, "grad_norm": 2.9738846657010085, "kl": 0.00360107421875, "learning_rate": 9.09142599782944e-07, "loss": -0.0048, "num_tokens": 12167838.0, "reward": 0.0, "reward_std": 0.981914758682251, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.2657549523204851, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.31826899071497716, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6333333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08777074514725108, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1397.75, "completions/mean_terminated_length": 1266.2857666015625, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.28804007514088914, "frac_reward_zero_std": 0.0, "grad_norm": 2.9008571656686524, "kl": 0.003810882568359375, "learning_rate": 9.07954460854181e-07, "loss": -0.0435, "num_tokens": 12219114.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9752408266067505, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 1.7226310978600795e-05, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.017137695280743562, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1381.0625, "completions/mean_terminated_length": 1262.125, "completions/min_length": 1118.0, "completions/min_terminated_length": 1118.0, "epoch": 0.28929242329367566, "frac_reward_zero_std": 0.0, "grad_norm": 2.579522085731086, "kl": 0.00286102294921875, "learning_rate": 9.067594880186016e-07, "loss": 0.0118, "num_tokens": 12283627.0, "reward": 0.0, "reward_std": 0.8155025839805603, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.4072348540230938, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.33458166056964905, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11474609652039004, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1432.375, "completions/mean_terminated_length": 1229.5, "completions/min_length": 1085.0, "completions/min_terminated_length": 1085.0, "epoch": 0.2905447714464621, "frac_reward_zero_std": 0.0, "grad_norm": 3.3504781665775463, "kl": 0.00449371337890625, "learning_rate": 9.055577040900944e-07, "loss": 0.0198, "num_tokens": 12334705.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0079009532928467, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.007641388631451263, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1447140199531734, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1067187372905475, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1411.4375, "completions/mean_terminated_length": 1322.875, "completions/min_length": 1169.0, "completions/min_terminated_length": 1169.0, "epoch": 0.2917971195992486, "frac_reward_zero_std": 0.0, "grad_norm": 3.3485586422127005, "kl": 0.004638671875, "learning_rate": 9.043491320125814e-07, "loss": 0.0213, "num_tokens": 12389648.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8743376731872559, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.002227354544120855, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08926616854117143, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13109227736669002, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1474.0, "completions/mean_terminated_length": 1430.666748046875, "completions/min_length": 1263.0, "completions/min_terminated_length": 1263.0, "epoch": 0.29304946775203505, "frac_reward_zero_std": 0.0, "grad_norm": 2.8494643618437947, "kl": 0.00304412841796875, "learning_rate": 9.031337948595817e-07, "loss": 0.0093, "num_tokens": 12456272.0, "reward": 0.0, "reward_std": 0.7424121499061584, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04108305878098174, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1161369232371233, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408158, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1141.4375, "completions/mean_terminated_length": 1117.533447265625, "completions/min_length": 557.0, "completions/min_terminated_length": 557.0, "epoch": 0.2943018159048215, "frac_reward_zero_std": 0.0, "grad_norm": 2.2293142678990754, "kl": 0.0016422271728515625, "learning_rate": 9.019117158337695e-07, "loss": 0.0038, "num_tokens": 12498031.0, "reward": -5.960464477539063e-08, "reward_std": 0.6336873769760132, "rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.009688556708469433, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05746171503021093, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1529342632927262, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1298.0, "completions/mean_terminated_length": 1140.888916015625, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.29555416405760804, "frac_reward_zero_std": 0.0, "grad_norm": 2.714021525456579, "kl": 0.00262451171875, "learning_rate": 9.006829182665325e-07, "loss": -0.0167, "num_tokens": 12548119.0, "reward": 0.0, "reward_std": 0.6711900234222412, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07748680022506171, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09355524405080126, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09953596037316063, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1300.5625, "completions/mean_terminated_length": 1254.5384521484375, "completions/min_length": 1067.0, "completions/min_terminated_length": 1067.0, "epoch": 0.2968065122103945, "frac_reward_zero_std": 0.0, "grad_norm": 2.982742616917611, "kl": 0.003223419189453125, "learning_rate": 8.99447425617525e-07, "loss": 0.0208, "num_tokens": 12596288.0, "reward": 7.450580596923828e-09, "reward_std": 1.049065351486206, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.24719836974150322, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.26811306631065646, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1029.3125, "completions/mean_terminated_length": 997.9334106445312, "completions/min_length": 658.0, "completions/min_terminated_length": 658.0, "epoch": 0.29805886036318097, "frac_reward_zero_std": 0.0, "grad_norm": 2.8416489290807947, "kl": 0.002620697021484375, "learning_rate": 8.982052614742218e-07, "loss": 0.011, "num_tokens": 12642901.0, "reward": 0.0, "reward_std": 1.0193631649017334, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07904007503321656, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05617218071571685, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08243965245133134, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1096.1875, "completions/mean_terminated_length": 782.1111450195312, "completions/min_length": 444.0, "completions/min_terminated_length": 444.0, "epoch": 0.29931120851596743, "frac_reward_zero_std": 0.0, "grad_norm": 3.052640590300802, "kl": 0.003078460693359375, "learning_rate": 8.96956449551466e-07, "loss": 0.0293, "num_tokens": 12685520.0, "reward": 3.725290298461914e-08, "reward_std": 1.0355110168457031, "rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0007223476637822487, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045707258037314374, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6416666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1164283279771532, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1176.375, "completions/mean_terminated_length": 1101.6923828125, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.3005635566687539, "frac_reward_zero_std": 0.0, "grad_norm": 3.409137882904874, "kl": 0.00447845458984375, "learning_rate": 8.957010136910177e-07, "loss": 0.0027, "num_tokens": 12732478.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0500978231430054, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03744221002235665, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07008909373099989, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 1412.4375, "completions/mean_terminated_length": 799.5, "completions/min_length": 364.0, "completions/min_terminated_length": 364.0, "epoch": 0.3018159048215404, "frac_reward_zero_std": 0.0, "grad_norm": 2.773019820684676, "kl": 0.003604888916015625, "learning_rate": 8.944389778610978e-07, "loss": -0.0118, "num_tokens": 12801637.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0457574129104614, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0662282436201746, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07940471297587236, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.22273551829717486, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1189.0, "completions/mean_length": 1244.8125, "completions/mean_terminated_length": 989.625, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.3030682529743269, "frac_reward_zero_std": 0.0, "grad_norm": 3.2644500704934876, "kl": 0.00386810302734375, "learning_rate": 8.931703661559313e-07, "loss": -0.0143, "num_tokens": 12856914.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8334095478057861, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.056274055481427915, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06634909249021953, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07888106377466157, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1456.9375, "completions/mean_terminated_length": 1327.75, "completions/min_length": 1033.0, "completions/min_terminated_length": 1033.0, "epoch": 0.30432060112711334, "frac_reward_zero_std": 0.0, "grad_norm": 2.6637299937398455, "kl": 0.003570556640625, "learning_rate": 8.918952027952867e-07, "loss": 0.0284, "num_tokens": 12917977.0, "reward": 0.0, "reward_std": 0.6500852704048157, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.022227592869964712, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.035807130460280175, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6375, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.16324260518672246, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1087.0, "completions/mean_length": 1474.1875, "completions/mean_terminated_length": 1087.0, "completions/min_length": 1087.0, "completions/min_terminated_length": 1087.0, "epoch": 0.3055729492798998, "frac_reward_zero_std": 0.0, "grad_norm": 2.9975900171544794, "kl": 0.003841400146484375, "learning_rate": 8.906135121240139e-07, "loss": -0.0025, "num_tokens": 12975724.0, "reward": 7.450580596923828e-09, "reward_std": 1.0467472076416016, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.042561575382490995, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12106724719901756, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042258, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1228.9375, "completions/mean_terminated_length": 1066.300048828125, "completions/min_length": 227.0, "completions/min_terminated_length": 227.0, "epoch": 0.3068252974326863, "frac_reward_zero_std": 0.0, "grad_norm": 2.9921353466243374, "kl": 0.00344085693359375, "learning_rate": 8.89325318611579e-07, "loss": -0.1088, "num_tokens": 13028715.0, "reward": 0.0, "reward_std": 0.8063486218452454, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04616985070885913, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.17900764914436168, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0718795288428261, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1367.3125, "completions/mean_terminated_length": 1307.0, "completions/min_length": 1073.0, "completions/min_terminated_length": 1073.0, "epoch": 0.30807764558547274, "frac_reward_zero_std": 0.0, "grad_norm": 3.02852662262937, "kl": 0.00337982177734375, "learning_rate": 8.880306468515979e-07, "loss": 0.0285, "num_tokens": 13077528.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4837535619735718, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.010209232644034694, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1481102929172379, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.16947631758514883, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1327.5625, "completions/mean_terminated_length": 1270.0833740234375, "completions/min_length": 1026.0, "completions/min_terminated_length": 1026.0, "epoch": 0.30932999373825926, "frac_reward_zero_std": 0.0, "grad_norm": 5.433838129064804, "kl": 0.009929656982421875, "learning_rate": 8.867295215613659e-07, "loss": 0.0288, "num_tokens": 13145409.0, "reward": 2.9802322387695312e-08, "reward_std": 0.691638708114624, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06467589999789795, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0938792951394418, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0824396524513313, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1467.9375, "completions/mean_terminated_length": 1397.4000244140625, "completions/min_length": 1284.0, "completions/min_terminated_length": 1284.0, "epoch": 0.3105823418910457, "frac_reward_zero_std": 0.0, "grad_norm": 2.9888443937256404, "kl": 0.0040283203125, "learning_rate": 8.85421967581386e-07, "loss": 0.0184, "num_tokens": 13198848.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9693495035171509, "rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.032938770819161474, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.162768145864506, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.675, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1803289175881631, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1498.9375, "completions/mean_terminated_length": 1483.0, "completions/min_length": 1483.0, "completions/min_terminated_length": 1483.0, "epoch": 0.3118346900438322, "frac_reward_zero_std": 0.0, "grad_norm": 2.721188390535492, "kl": 0.003574371337890625, "learning_rate": 8.841080098748959e-07, "loss": 0.0006, "num_tokens": 13257207.0, "reward": 0.0, "reward_std": 0.9934348464012146, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.054418946541605284, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14760181642272932, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12345339501504504, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1359.3125, "completions/mean_terminated_length": 1178.4285888671875, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.31308703819661865, "frac_reward_zero_std": 0.0, "grad_norm": 3.2668088351359708, "kl": 0.005401611328125, "learning_rate": 8.827876735273893e-07, "loss": -0.03, "num_tokens": 13314820.0, "reward": -3.725290298461914e-09, "reward_std": 1.0606722831726074, "rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1087294165966756, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07396732734066605, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.775, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1409.3125, "completions/mean_terminated_length": 1258.166748046875, "completions/min_length": 1036.0, "completions/min_terminated_length": 1036.0, "epoch": 0.3143393863494051, "frac_reward_zero_std": 0.0, "grad_norm": 3.0088889020769733, "kl": 0.003604888916015625, "learning_rate": 8.814609837461385e-07, "loss": 0.0432, "num_tokens": 13381449.0, "reward": 0.0, "reward_std": 0.6518675088882446, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.005307542092858496, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.017828779266863094, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1112221667221529, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1289.9375, "completions/mean_terminated_length": 1126.5555419921875, "completions/min_length": 1031.0, "completions/min_terminated_length": 1031.0, "epoch": 0.31559173450219163, "frac_reward_zero_std": 0.0, "grad_norm": 2.2400933230709956, "kl": 0.0020122528076171875, "learning_rate": 8.801279658597131e-07, "loss": 0.0011, "num_tokens": 13430872.0, "reward": 0.0, "reward_std": 0.8595645427703857, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12188488436675578, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.32049499716061297, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901162, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1234.0, "completions/mean_length": 1483.375, "completions/mean_terminated_length": 1234.0, "completions/min_length": 1234.0, "completions/min_terminated_length": 1234.0, "epoch": 0.3168440826549781, "frac_reward_zero_std": 0.0, "grad_norm": 3.0575565927809905, "kl": 0.004093170166015625, "learning_rate": 8.787886453174951e-07, "loss": -0.0053, "num_tokens": 13479446.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9997775554656982, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.051598953607968394, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06761287588738078, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12583057392117916, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 1068.0625, "completions/mean_terminated_length": 1068.0625, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.31809643080776456, "frac_reward_zero_std": 0.0, "grad_norm": 3.56780625703575, "kl": 0.00376129150390625, "learning_rate": 8.77443047689195e-07, "loss": -0.0249, "num_tokens": 13534791.0, "reward": 3.725290298461914e-09, "reward_std": 1.0647456645965576, "rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12037176129735677, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15720532676467985, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08681611046941137, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1149.0, "completions/mean_length": 1264.8125, "completions/mean_terminated_length": 1029.625, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.319348778960551, "frac_reward_zero_std": 0.0, "grad_norm": 1.9002914944914675, "kl": 0.001689910888671875, "learning_rate": 8.760911986643621e-07, "loss": 0.0079, "num_tokens": 13585044.0, "reward": 7.450580596923828e-09, "reward_std": 1.0633113384246826, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11099520216632296, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11289406797895053, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11013459778666118, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1039.625, "completions/mean_terminated_length": 1039.625, "completions/min_length": 816.0, "completions/min_terminated_length": 816.0, "epoch": 0.3206011271133375, "frac_reward_zero_std": 0.0, "grad_norm": 1.7437212883045385, "kl": 0.0007447004318237305, "learning_rate": 8.747331240518946e-07, "loss": -0.0359, "num_tokens": 13622654.0, "reward": 0.0, "reward_std": 0.80560302734375, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05283560581406991, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1274858045865064, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11213417888437975, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1147.25, "completions/mean_terminated_length": 1123.7333984375, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.32185347526612396, "frac_reward_zero_std": 0.0, "grad_norm": 3.829288365048447, "kl": 0.00440216064453125, "learning_rate": 8.73368849779547e-07, "loss": -0.0586, "num_tokens": 13666658.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0113918781280518, "rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.004399012913845209, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.022002579783276802, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15104573749303493, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1205.0, "completions/mean_terminated_length": 1136.923095703125, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.3231058234189105, "frac_reward_zero_std": 0.0, "grad_norm": 3.5508475650699274, "kl": 0.00417327880859375, "learning_rate": 8.719984018934348e-07, "loss": -0.0198, "num_tokens": 13713002.0, "reward": -7.450580596923828e-09, "reward_std": 0.9411071538925171, "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.049397690395078006, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14628546425305664, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13381856152046848, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1393.9375, "completions/mean_terminated_length": 1287.875, "completions/min_length": 1155.0, "completions/min_terminated_length": 1155.0, "epoch": 0.32435817157169694, "frac_reward_zero_std": 0.0, "grad_norm": 3.1259216947519164, "kl": 0.004150390625, "learning_rate": 8.706218065575374e-07, "loss": 0.0051, "num_tokens": 13765289.0, "reward": -5.960464477539063e-08, "reward_std": 0.7700310945510864, "rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011427243535616135, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12374645217812205, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13045504405165223, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1161.25, "completions/mean_terminated_length": 1007.2727661132812, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.3256105197244834, "frac_reward_zero_std": 0.0, "grad_norm": 2.3286130814036103, "kl": 0.00212058424949646, "learning_rate": 8.692390900531985e-07, "loss": 0.0569, "num_tokens": 13819269.0, "reward": 0.0, "reward_std": 0.45210930705070496, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.23343450769100488, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.33023521153193414, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14981470036162822, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1162.0, "completions/max_terminated_length": 1162.0, "completions/mean_length": 974.1875, "completions/mean_terminated_length": 974.1875, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.32686286787726987, "frac_reward_zero_std": 0.0, "grad_norm": 3.6973290112944848, "kl": 0.0045013427734375, "learning_rate": 8.678502787786249e-07, "loss": -0.0481, "num_tokens": 13849256.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8221656084060669, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01926574676180823, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.028234090328970243, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042256, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1254.3125, "completions/mean_terminated_length": 1106.9000244140625, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.32811521603005633, "frac_reward_zero_std": 0.0, "grad_norm": 2.8513068805319826, "kl": 0.003444671630859375, "learning_rate": 8.664553992483812e-07, "loss": -0.0294, "num_tokens": 13886621.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8134012818336487, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04343925396813008, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08880475360320686, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11979921473804349, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1267.5, "completions/mean_terminated_length": 1161.8182373046875, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.32936756418284285, "frac_reward_zero_std": 0.0, "grad_norm": 2.8893558213524178, "kl": 0.003467559814453125, "learning_rate": 8.650544780928851e-07, "loss": -0.0196, "num_tokens": 13935477.0, "reward": -2.9802322387695312e-08, "reward_std": 0.748847246170044, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03399978669526769, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0913917502530681, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1060223596263578, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1393.75, "completions/mean_terminated_length": 1287.5, "completions/min_length": 1085.0, "completions/min_terminated_length": 1085.0, "epoch": 0.3306199123356293, "frac_reward_zero_std": 0.0, "grad_norm": 3.2549999172638953, "kl": 0.00475311279296875, "learning_rate": 8.63647542057898e-07, "loss": -0.0273, "num_tokens": 13998809.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9957271814346313, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.032845331546287986, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1426354161680431, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1465.625, "completions/mean_terminated_length": 1316.666748046875, "completions/min_length": 1156.0, "completions/min_terminated_length": 1156.0, "epoch": 0.3318722604884158, "frac_reward_zero_std": 0.0, "grad_norm": 2.761811279986975, "kl": 0.004451751708984375, "learning_rate": 8.622346180040149e-07, "loss": 0.0022, "num_tokens": 14063899.0, "reward": 0.0, "reward_std": 0.9594628810882568, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.003469042362222641, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06436545386363138, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11917929226045819, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1490.6875, "completions/mean_terminated_length": 1351.0, "completions/min_length": 1351.0, "completions/min_terminated_length": 1351.0, "epoch": 0.33312460864120225, "frac_reward_zero_std": 0.0, "grad_norm": 2.812294289039839, "kl": 0.00394439697265625, "learning_rate": 8.608157329061513e-07, "loss": -0.0088, "num_tokens": 14117462.0, "reward": 0.0, "reward_std": 0.8838216066360474, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.040579408270268943, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06117251081342495, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10809803506625451, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1466.375, "completions/mean_terminated_length": 1410.3333740234375, "completions/min_length": 1160.0, "completions/min_terminated_length": 1160.0, "epoch": 0.3343769567939887, "frac_reward_zero_std": 0.0, "grad_norm": 3.1377512293638716, "kl": 0.00469207763671875, "learning_rate": 8.59390913853028e-07, "loss": 0.0227, "num_tokens": 14167892.0, "reward": 0.0, "reward_std": 0.5932345390319824, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.095152127303474, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13689770081097544, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.33562930494677523, "frac_reward_zero_std": 0.0, "grad_norm": 3.006613104134553, "kl": 0.004669189453125, "learning_rate": 8.579601880466547e-07, "loss": 0.0002, "num_tokens": 14229372.0, "reward": 0.0, "reward_std": 1.0337638854980469, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04680772992368523, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14029739799038618, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1392.9375, "completions/mean_terminated_length": 1214.5, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.3368816530995617, "frac_reward_zero_std": 0.0, "grad_norm": 2.7703650077113675, "kl": 0.0036773681640625, "learning_rate": 8.565235828018099e-07, "loss": 0.0013, "num_tokens": 14289123.0, "reward": 0.0, "reward_std": 1.0367697477340698, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07296543210522512, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07812168004547569, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1464.5625, "completions/mean_terminated_length": 1358.25, "completions/min_length": 1214.0, "completions/min_terminated_length": 1214.0, "epoch": 0.33813400125234816, "frac_reward_zero_std": 0.0, "grad_norm": 2.773518375849927, "kl": 0.0041961669921875, "learning_rate": 8.550811255455198e-07, "loss": -0.0021, "num_tokens": 14352892.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9712283611297607, "rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06869379781464208, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0930651391561654, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.3393863494051346, "frac_reward_zero_std": 0.0, "grad_norm": 2.79098774129514, "kl": 0.003692626953125, "learning_rate": 8.536328438165346e-07, "loss": 0.0001, "num_tokens": 14414740.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0314404964447021, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0126400376983615, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11352147882865961, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07252075054258102, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1491.625, "completions/mean_terminated_length": 1366.0, "completions/min_length": 1366.0, "completions/min_terminated_length": 1366.0, "epoch": 0.3406386975579211, "frac_reward_zero_std": 0.0, "grad_norm": 2.3328071582670127, "kl": 0.002582550048828125, "learning_rate": 8.521787652648026e-07, "loss": -0.0005, "num_tokens": 14475390.0, "reward": -2.9802322387695312e-08, "reward_std": 1.066014051437378, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04816321266725149, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.23143656867957818, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10292032157252809, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1274.0, "completions/max_terminated_length": 1274.0, "completions/mean_length": 966.0, "completions/mean_terminated_length": 966.0, "completions/min_length": 818.0, "completions/min_terminated_length": 818.0, "epoch": 0.34189104571070755, "frac_reward_zero_std": 0.0, "grad_norm": 2.623716761400387, "kl": 0.00237274169921875, "learning_rate": 8.507189176509429e-07, "loss": 0.0118, "num_tokens": 14519830.0, "reward": 0.0, "reward_std": 0.8626605868339539, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09591776756938776, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0561472451616448, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1441.8125, "completions/mean_terminated_length": 1267.25, "completions/min_length": 1188.0, "completions/min_terminated_length": 1188.0, "epoch": 0.3431433938634941, "frac_reward_zero_std": 0.0, "grad_norm": 2.6860149670271087, "kl": 0.003993988037109375, "learning_rate": 8.492533288457142e-07, "loss": 0.0176, "num_tokens": 14562059.0, "reward": 0.0, "reward_std": 0.7108601331710815, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08617312005850387, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09760563193409819, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10878112581387149, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1495.3125, "completions/mean_terminated_length": 1462.5, "completions/min_length": 1440.0, "completions/min_terminated_length": 1440.0, "epoch": 0.34439574201628054, "frac_reward_zero_std": 0.0, "grad_norm": 3.40909669375685, "kl": 0.00632476806640625, "learning_rate": 8.477820268294844e-07, "loss": 0.0006, "num_tokens": 14626280.0, "reward": 0.0, "reward_std": 0.9581431150436401, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0518540297916247, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05711745364940273, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1498.8125, "completions/mean_terminated_length": 1490.5, "completions/min_length": 1487.0, "completions/min_terminated_length": 1487.0, "epoch": 0.345648090169067, "frac_reward_zero_std": 0.0, "grad_norm": 2.606304176627287, "kl": 0.003498077392578125, "learning_rate": 8.463050396916945e-07, "loss": 0.0, "num_tokens": 14686461.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8919962048530579, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.16232941024284467, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.38333692394397534, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901162, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1296.25, "completions/mean_terminated_length": 1034.2857666015625, "completions/min_length": 430.0, "completions/min_terminated_length": 430.0, "epoch": 0.34690043832185347, "frac_reward_zero_std": 0.0, "grad_norm": 3.4727706531128004, "kl": 0.0047149658203125, "learning_rate": 8.44822395630324e-07, "loss": -0.0713, "num_tokens": 14729641.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0214866399765015, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.006892221922202982, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.027501536576714, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746357, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1469.6875, "completions/mean_terminated_length": 1338.3333740234375, "completions/min_length": 1234.0, "completions/min_terminated_length": 1234.0, "epoch": 0.34815278647463993, "frac_reward_zero_std": 0.0, "grad_norm": 3.079175222753427, "kl": 0.004638671875, "learning_rate": 8.433341229513516e-07, "loss": 0.0011, "num_tokens": 14784988.0, "reward": 2.9802322387695312e-08, "reward_std": 1.011260986328125, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.016744175612928278, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10326622112744127, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 926.75, "completions/mean_terminated_length": 926.75, "completions/min_length": 629.0, "completions/min_terminated_length": 629.0, "epoch": 0.34940513462742645, "frac_reward_zero_std": 0.0, "grad_norm": 2.3928033221665683, "kl": 0.0011532902717590332, "learning_rate": 8.41840250068215e-07, "loss": 0.0325, "num_tokens": 14819992.0, "reward": 0.0, "reward_std": 1.027898907661438, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0026503222290372498, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06993382935974904, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.059472994182545084, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1439.4375, "completions/mean_terminated_length": 1338.5, "completions/min_length": 1219.0, "completions/min_terminated_length": 1219.0, "epoch": 0.3506574827802129, "frac_reward_zero_std": 0.0, "grad_norm": 2.958647342200965, "kl": 0.00386810302734375, "learning_rate": 8.403408055012688e-07, "loss": 0.0226, "num_tokens": 14868223.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9957724213600159, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010003602936438873, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10122225063918935, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.575, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14782371884055634, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1458.3125, "completions/mean_terminated_length": 1366.5999755859375, "completions/min_length": 1197.0, "completions/min_terminated_length": 1197.0, "epoch": 0.3519098309329994, "frac_reward_zero_std": 0.0, "grad_norm": 3.2796328783487394, "kl": 0.0066375732421875, "learning_rate": 8.388358178772394e-07, "loss": -0.0218, "num_tokens": 14927820.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8733463287353516, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.014896438499357663, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0518786397936184, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13655822255780922, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1253.0625, "completions/mean_terminated_length": 1140.8182373046875, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.35316217908578584, "frac_reward_zero_std": 0.0, "grad_norm": 3.1237440148175604, "kl": 0.004863739013671875, "learning_rate": 8.373253159286788e-07, "loss": -0.0073, "num_tokens": 14982213.0, "reward": 0.0, "reward_std": 0.7831696271896362, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06306545956559828, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05774599513752542, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14580555290954889, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1441.875, "completions/mean_terminated_length": 1345.0, "completions/min_length": 1205.0, "completions/min_terminated_length": 1205.0, "epoch": 0.3544145272385723, "frac_reward_zero_std": 0.0, "grad_norm": 2.904795417720074, "kl": 0.00449371337890625, "learning_rate": 8.35809328493416e-07, "loss": -0.0205, "num_tokens": 15040715.0, "reward": 3.725290298461914e-09, "reward_std": 1.0330736637115479, "rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.012980308714010343, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0673415334549809, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042258, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1163.0, "completions/mean_length": 1267.125, "completions/mean_terminated_length": 1034.25, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.35566687539135877, "frac_reward_zero_std": 0.0, "grad_norm": 1.9530736863410463, "kl": 0.0021953582763671875, "learning_rate": 8.342878845140067e-07, "loss": 0.0243, "num_tokens": 15099253.0, "reward": -1.4901161193847656e-08, "reward_std": 1.060163974761963, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.030749215825924263, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045245562410845486, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09179284245476839, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1273.375, "completions/mean_terminated_length": 1046.75, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.3569192235441453, "frac_reward_zero_std": 0.0, "grad_norm": 2.9643132058260195, "kl": 0.00377655029296875, "learning_rate": 8.327610130371804e-07, "loss": -0.0085, "num_tokens": 15156899.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9131340980529785, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05994073836967858, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16156243225331035, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1133.0, "completions/mean_length": 1196.4375, "completions/mean_terminated_length": 892.875, "completions/min_length": 726.0, "completions/min_terminated_length": 726.0, "epoch": 0.35817157169693176, "frac_reward_zero_std": 0.0, "grad_norm": 3.4448564572392772, "kl": 0.005279541015625, "learning_rate": 8.312287432132857e-07, "loss": -0.0008, "num_tokens": 15210234.0, "reward": 0.0, "reward_std": 0.6276436448097229, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08401960696737835, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.31657785119011167, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11800816042090449, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1118.0, "completions/mean_terminated_length": 1029.84619140625, "completions/min_length": 768.0, "completions/min_terminated_length": 768.0, "epoch": 0.3594239198497182, "frac_reward_zero_std": 0.0, "grad_norm": 3.649351006275311, "kl": 0.004192352294921875, "learning_rate": 8.296911042957347e-07, "loss": 0.0474, "num_tokens": 15254266.0, "reward": 7.450580596923828e-09, "reward_std": 1.0014917850494385, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05039245601276097, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0698277819618762, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1408.8125, "completions/mean_terminated_length": 1317.625, "completions/min_length": 1145.0, "completions/min_terminated_length": 1145.0, "epoch": 0.3606762680025047, "frac_reward_zero_std": 0.0, "grad_norm": 2.3753456567317306, "kl": 0.003170013427734375, "learning_rate": 8.281481256404427e-07, "loss": -0.0065, "num_tokens": 15310551.0, "reward": -7.450580596923828e-09, "reward_std": 1.0467666387557983, "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.011685861651235842, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.020940553119970465, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1308.5, "completions/mean_terminated_length": 1117.0, "completions/min_length": 1059.0, "completions/min_terminated_length": 1059.0, "epoch": 0.36192861615529115, "frac_reward_zero_std": 0.0, "grad_norm": 2.7116355558289365, "kl": 0.0031452178955078125, "learning_rate": 8.265998367052699e-07, "loss": -0.0148, "num_tokens": 15357047.0, "reward": 0.0, "reward_std": 0.7932579517364502, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05102504905151101, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.046974298933007336, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1303.375, "completions/mean_terminated_length": 1150.4444580078125, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.36318096430807767, "frac_reward_zero_std": 0.0, "grad_norm": 2.6471576640483025, "kl": 0.002368927001953125, "learning_rate": 8.25046267049458e-07, "loss": -0.0155, "num_tokens": 15419477.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9311500191688538, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.17596829941789516, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.18219217687822756, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12412657816683506, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.36443331246086413, "frac_reward_zero_std": 0.0, "grad_norm": 2.972210343638841, "kl": 0.004058837890625, "learning_rate": 8.234874463330651e-07, "loss": 0.0002, "num_tokens": 15481293.0, "reward": 0.0, "reward_std": 0.6159095764160156, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06449275539626861, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07616565949841655, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1140.125, "completions/mean_terminated_length": 1020.1666870117188, "completions/min_length": 215.0, "completions/min_terminated_length": 215.0, "epoch": 0.3656856606136506, "frac_reward_zero_std": 0.0, "grad_norm": 3.7273022702751355, "kl": 0.00434112548828125, "learning_rate": 8.219234043164007e-07, "loss": -0.0148, "num_tokens": 15538271.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8317296504974365, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.002159562349982134, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.040954201238117, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1346.875, "completions/mean_terminated_length": 1227.77783203125, "completions/min_length": 1055.0, "completions/min_terminated_length": 1055.0, "epoch": 0.36693800876643706, "frac_reward_zero_std": 0.0, "grad_norm": 3.479167145868108, "kl": 0.005218505859375, "learning_rate": 8.203541708594571e-07, "loss": -0.0154, "num_tokens": 15584509.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0486056804656982, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.005770089344222506, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07441253794038902, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078614, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1284.5625, "completions/mean_terminated_length": 1186.6363525390625, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.3681903569192235, "frac_reward_zero_std": 0.0, "grad_norm": 3.139340219060384, "kl": 0.00438690185546875, "learning_rate": 8.18779775921339e-07, "loss": 0.0201, "num_tokens": 15631742.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0337092876434326, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.061225139692727595, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0882517727987926, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08153617692869927, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1493.0, "completions/mean_terminated_length": 1388.0, "completions/min_length": 1388.0, "completions/min_terminated_length": 1388.0, "epoch": 0.36944270507201, "frac_reward_zero_std": 0.0, "grad_norm": 2.4977483724240614, "kl": 0.0029296875, "learning_rate": 8.17200249559692e-07, "loss": -0.0007, "num_tokens": 15698798.0, "reward": 0.0, "reward_std": 0.4475941061973572, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.24246809612484624, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.35195872278638696, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05900408021045224, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1458.625, "completions/mean_terminated_length": 1367.5999755859375, "completions/min_length": 1330.0, "completions/min_terminated_length": 1330.0, "epoch": 0.3706950532247965, "frac_reward_zero_std": 0.0, "grad_norm": 2.867561636937004, "kl": 0.004486083984375, "learning_rate": 8.156156219301287e-07, "loss": -0.0096, "num_tokens": 15766096.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9567909240722656, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09166855392489899, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11839819598536988, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1323.6875, "completions/mean_terminated_length": 1217.9000244140625, "completions/min_length": 1037.0, "completions/min_terminated_length": 1037.0, "epoch": 0.371947401377583, "frac_reward_zero_std": 0.0, "grad_norm": 3.38013590229778, "kl": 0.00470733642578125, "learning_rate": 8.140259232856521e-07, "loss": -0.0394, "num_tokens": 15817547.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9704372882843018, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05488740961091947, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10481500155411475, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13158576980363348, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1495.1875, "completions/mean_terminated_length": 1423.0, "completions/min_length": 1423.0, "completions/min_terminated_length": 1423.0, "epoch": 0.37319974953036944, "frac_reward_zero_std": 0.0, "grad_norm": 3.0382889306606327, "kl": 0.004367828369140625, "learning_rate": 8.124311839760797e-07, "loss": -0.0027, "num_tokens": 15868646.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8351828455924988, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03419630895774928, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1367852358309971, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09418264367902598, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1399.875, "completions/mean_terminated_length": 1179.5999755859375, "completions/min_length": 1011.0, "completions/min_terminated_length": 1011.0, "epoch": 0.3744520976831559, "frac_reward_zero_std": 0.0, "grad_norm": 2.362094019703377, "kl": 0.003170013427734375, "learning_rate": 8.108314344474623e-07, "loss": 0.0162, "num_tokens": 15934516.0, "reward": 7.450580596923828e-09, "reward_std": 0.9300060868263245, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.026608295676684646, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05700271984957867, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1310.1875, "completions/mean_terminated_length": 1120.375, "completions/min_length": 849.0, "completions/min_terminated_length": 849.0, "epoch": 0.37570444583594237, "frac_reward_zero_std": 0.0, "grad_norm": 3.2267030639366325, "kl": 0.004962921142578125, "learning_rate": 8.092267052415044e-07, "loss": 0.0104, "num_tokens": 15981759.0, "reward": 0.0, "reward_std": 0.9144766330718994, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.13077711907103481, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1576724545552638, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.55, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08944271909999157, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1499.6875, "completions/mean_terminated_length": 1495.0, "completions/min_length": 1495.0, "completions/min_terminated_length": 1495.0, "epoch": 0.3769567939887289, "frac_reward_zero_std": 0.0, "grad_norm": 2.857067953077971, "kl": 0.004589080810546875, "learning_rate": 8.076170269949795e-07, "loss": 0.0005, "num_tokens": 16032986.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8725603818893433, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.01691320115670981, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.057867471716033625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1402.5, "completions/mean_terminated_length": 1305.0, "completions/min_length": 1100.0, "completions/min_terminated_length": 1100.0, "epoch": 0.37820914214151535, "frac_reward_zero_std": 0.0, "grad_norm": 3.0631024256482773, "kl": 0.00476837158203125, "learning_rate": 8.060024304391464e-07, "loss": -0.0059, "num_tokens": 16075122.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0385990142822266, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0014002892068640102, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04032032793331211, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11917929226045819, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1234.625, "completions/mean_terminated_length": 1196.71435546875, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.3794614902943018, "frac_reward_zero_std": 0.0, "grad_norm": 3.625794290754689, "kl": 0.0052490234375, "learning_rate": 8.043829463991619e-07, "loss": -0.0729, "num_tokens": 16137860.0, "reward": 0.0, "reward_std": 0.7281184196472168, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.15255108490634472, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0853071621433351, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08944271909999162, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1229.0, "completions/mean_terminated_length": 1105.8182373046875, "completions/min_length": 759.0, "completions/min_terminated_length": 759.0, "epoch": 0.3807138384470883, "frac_reward_zero_std": 0.0, "grad_norm": 3.260598284534238, "kl": 0.00495147705078125, "learning_rate": 8.027586057934928e-07, "loss": -0.0588, "num_tokens": 16193676.0, "reward": 7.450580596923828e-09, "reward_std": 1.0218051671981812, "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.00276089248932003, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03710765345598682, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14168300559373406, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1335.75, "completions/mean_terminated_length": 1208.0, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.38196618659987475, "frac_reward_zero_std": 0.0, "grad_norm": 3.305649016415018, "kl": 0.00536346435546875, "learning_rate": 8.011294396333247e-07, "loss": 0.035, "num_tokens": 16241520.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0677435398101807, "rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0046395341039948005, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04123648809292501, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12995725793078622, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1287.0, "completions/mean_terminated_length": 1159.2000732421875, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.38321853475266127, "frac_reward_zero_std": 0.0, "grad_norm": 3.727887770111367, "kl": 0.00641632080078125, "learning_rate": 7.99495479021971e-07, "loss": -0.022, "num_tokens": 16295288.0, "reward": -4.470348358154297e-08, "reward_std": 1.053145408630371, "rewards/wordcountpos_reward_ecommerce/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.038974548522257506, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10020848772744548, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12224747213928168, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1242.0625, "completions/mean_terminated_length": 1124.8182373046875, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.38447088290544773, "frac_reward_zero_std": 0.0, "grad_norm": 2.5550785119927446, "kl": 0.0034637451171875, "learning_rate": 7.978567551542785e-07, "loss": -0.0756, "num_tokens": 16333129.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6722694635391235, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06133805044031608, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07932499651372282, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.8125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1466.0625, "completions/mean_terminated_length": 1319.0, "completions/min_length": 1256.0, "completions/min_terminated_length": 1256.0, "epoch": 0.3857232310582342, "frac_reward_zero_std": 0.0, "grad_norm": 2.914391532004015, "kl": 0.00472259521484375, "learning_rate": 7.962132993160318e-07, "loss": -0.0031, "num_tokens": 16393066.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5695419311523438, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03688578385137459, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05735193102645086, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6208333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14548768561863462, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1468.125, "completions/mean_terminated_length": 1245.0, "completions/min_length": 1190.0, "completions/min_terminated_length": 1190.0, "epoch": 0.38697557921102066, "frac_reward_zero_std": 0.0, "grad_norm": 2.462122287718944, "kl": 0.003597259521484375, "learning_rate": 7.945651428833566e-07, "loss": -0.0086, "num_tokens": 16455300.0, "reward": 0.0, "reward_std": 0.9045326113700867, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07094748829476913, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07518616664712767, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1250.0625, "completions/mean_terminated_length": 1055.6666259765625, "completions/min_length": 953.0, "completions/min_terminated_length": 953.0, "epoch": 0.3882279273638071, "frac_reward_zero_std": 0.0, "grad_norm": 2.9451996655491754, "kl": 0.003414154052734375, "learning_rate": 7.929123173221197e-07, "loss": 0.016, "num_tokens": 16510829.0, "reward": -2.60770320892334e-08, "reward_std": 0.9780210256576538, "rewards/wordcountpos_reward_ecommerce/mean": -2.60770320892334e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.11998443212330577, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.273643343882272, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13743685418725535, "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, "step": 310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1477.3125, "completions/mean_terminated_length": 1409.25, "completions/min_length": 1344.0, "completions/min_terminated_length": 1344.0, "epoch": 0.3894802755165936, "frac_reward_zero_std": 0.0, "grad_norm": 2.5088718729093884, "kl": 0.0039215087890625, "learning_rate": 7.91254854187329e-07, "loss": 0.0109, "num_tokens": 16557338.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8606460094451904, "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11084663306324073, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10033388109681571, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07490735018081414, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1070.625, "completions/mean_terminated_length": 1042.0, "completions/min_length": 692.0, "completions/min_terminated_length": 692.0, "epoch": 0.3907326236693801, "frac_reward_zero_std": 0.0, "grad_norm": 3.633658673586784, "kl": 0.004913330078125, "learning_rate": 7.895927851225315e-07, "loss": -0.0045, "num_tokens": 16585492.0, "reward": 0.0, "reward_std": 0.8763086795806885, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.010857263566407607, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06826631403415188, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12171612389003693, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1316.9375, "completions/mean_terminated_length": 1207.0999755859375, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.3919849718221666, "frac_reward_zero_std": 0.0, "grad_norm": 3.5743987456607456, "kl": 0.00490570068359375, "learning_rate": 7.879261418592072e-07, "loss": -0.0521, "num_tokens": 16629555.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9046754240989685, "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07072576648968745, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14285699045244268, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06885303726590966, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1382.4375, "completions/mean_terminated_length": 1329.0, "completions/min_length": 1093.0, "completions/min_terminated_length": 1093.0, "epoch": 0.39323731997495304, "frac_reward_zero_std": 0.0, "grad_norm": 2.1526667079717625, "kl": 0.002288818359375, "learning_rate": 7.862549562161661e-07, "loss": -0.0277, "num_tokens": 16682250.0, "reward": -7.450580596923828e-09, "reward_std": 1.0446007251739502, "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1297401874034389, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1781696946469639, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06871842709362772, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.9375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1497.6875, "completions/mean_terminated_length": 1463.0, "completions/min_length": 1463.0, "completions/min_terminated_length": 1463.0, "epoch": 0.3944896681277395, "frac_reward_zero_std": 0.0, "grad_norm": 3.0308641968752266, "kl": 0.005645751953125, "learning_rate": 7.845792600989385e-07, "loss": -0.0009, "num_tokens": 16736925.0, "reward": 0.0, "reward_std": 1.0489060878753662, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0036366895025502417, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.014546758010200967, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10183501544346313, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1448.0, "completions/mean_terminated_length": 1292.0, "completions/min_length": 1213.0, "completions/min_terminated_length": 1213.0, "epoch": 0.39574201628052597, "frac_reward_zero_std": 0.0, "grad_norm": 2.5606215816363824, "kl": 0.003444671630859375, "learning_rate": 7.828990854991669e-07, "loss": -0.0016, "num_tokens": 16805501.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0115642547607422, "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.021560930387654664, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.031239915717000032, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.3969943644333125, "frac_reward_zero_std": 0.0, "grad_norm": 2.8884496894175316, "kl": 0.00472259521484375, "learning_rate": 7.812144644939948e-07, "loss": 0.0002, "num_tokens": 16868629.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9699341058731079, "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.024920589109913467, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09830967886668995, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1401.8125, "completions/mean_terminated_length": 1342.9000244140625, "completions/min_length": 1196.0, "completions/min_terminated_length": 1196.0, "epoch": 0.39824671258609895, "frac_reward_zero_std": 0.0, "grad_norm": 3.4263971987387944, "kl": 0.00539398193359375, "learning_rate": 7.795254292454546e-07, "loss": -0.0029, "num_tokens": 16930194.0, "reward": 3.725290298461914e-09, "reward_std": 1.058499813079834, "rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.003584071693735027, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06412609719118169, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1266.875, "completions/mean_terminated_length": 1085.5555419921875, "completions/min_length": 632.0, "completions/min_terminated_length": 632.0, "epoch": 0.3994990607388854, "frac_reward_zero_std": 0.0, "grad_norm": 2.6400441912231263, "kl": 0.00444793701171875, "learning_rate": 7.778320119998535e-07, "loss": -0.121, "num_tokens": 16979440.0, "reward": -9.313225746154785e-09, "reward_std": 1.0413284301757812, "rewards/wordcountpos_reward_ecommerce/mean": -9.313225746154785e-09, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.005981072426200435, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04425931042175955, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0926962382871743, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1350.1875, "completions/mean_terminated_length": 1260.300048828125, "completions/min_length": 1061.0, "completions/min_terminated_length": 1061.0, "epoch": 0.4007514088916719, "frac_reward_zero_std": 0.0, "grad_norm": 3.2125264454232823, "kl": 0.00494384765625, "learning_rate": 7.761342450871578e-07, "loss": -0.0401, "num_tokens": 17023723.0, "reward": 0.0, "reward_std": 0.9411365389823914, "rewards/wordcountpos_reward_ecommerce/mean": 0.0, "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08138630489162721, "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09236477000312811, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14548768561863465, "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, "step": 320 } ], "logging_steps": 1, "max_steps": 799, "num_input_tokens_seen": 17023723, "num_train_epochs": 1, "save_steps": 80, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }