AutoGEO_mini_Qwen1.7B_Ecommerce / trainer_state.json
yujiangw's picture
Upload folder using huggingface_hub
13f8fb1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.4007514088916719,
"eval_steps": 500,
"global_step": 320,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1427.0,
"completions/mean_length": 1310.375,
"completions/mean_terminated_length": 1120.75,
"completions/min_length": 941.0,
"completions/min_terminated_length": 941.0,
"epoch": 0.0012523481527864746,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.4675665797659936,
"kl": 0.0014476776123046875,
"learning_rate": 0.0,
"loss": -0.0042,
"num_tokens": 47606.0,
"reward": 2.9802322387695312e-08,
"reward_std": 1.0425715446472168,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.020242706942291286,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08320206610241015,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1128748897706693,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 1
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1493.0,
"completions/mean_length": 1215.625,
"completions/mean_terminated_length": 1120.8333740234375,
"completions/min_length": 920.0,
"completions/min_terminated_length": 920.0,
"epoch": 0.002504696305572949,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.5220841352987073,
"kl": 0.002323150634765625,
"learning_rate": 1.25e-08,
"loss": -0.0365,
"num_tokens": 78984.0,
"reward": 0.0,
"reward_std": 0.9615500569343567,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.019240361081273367,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0375240418925418,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242309,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 2
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1488.0,
"completions/mean_length": 1430.8125,
"completions/mean_terminated_length": 1341.857177734375,
"completions/min_length": 1171.0,
"completions/min_terminated_length": 1171.0,
"epoch": 0.003757044458359424,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7257956401904653,
"kl": 0.0018787384033203125,
"learning_rate": 2.5e-08,
"loss": -0.014,
"num_tokens": 126437.0,
"reward": 7.450580596923828e-09,
"reward_std": 1.0492231845855713,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09708628067006185,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16724793667635054,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09179284245476838,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 3
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1490.0,
"completions/mean_length": 1463.4375,
"completions/mean_terminated_length": 1353.75,
"completions/min_length": 1084.0,
"completions/min_terminated_length": 1084.0,
"epoch": 0.005009392611145898,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0068456094040337,
"kl": 0.00238037109375,
"learning_rate": 3.75e-08,
"loss": -0.0103,
"num_tokens": 192900.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.4076952338218689,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.42554686388976987,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3748667411110748,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14580555290954889,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 4
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 1.0,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 0.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 0.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 0.0,
"epoch": 0.006261740763932373,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0250923226839315,
"kl": 0.002262115478515625,
"learning_rate": 5e-08,
"loss": 0.0001,
"num_tokens": 257452.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.9494391083717346,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0021633155301854353,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04003332867073718,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09583937179043475,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 5
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1443.0,
"completions/mean_length": 1213.4375,
"completions/mean_terminated_length": 1041.5,
"completions/min_length": 749.0,
"completions/min_terminated_length": 749.0,
"epoch": 0.007514088916718848,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.520673181444066,
"kl": 0.002166748046875,
"learning_rate": 6.25e-08,
"loss": -0.0047,
"num_tokens": 300227.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.000030517578125,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1494053837623106,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21650138601325905,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6291666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921942,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 6
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1475.0,
"completions/mean_length": 1441.5,
"completions/mean_terminated_length": 1266.0,
"completions/min_length": 868.0,
"completions/min_terminated_length": 868.0,
"epoch": 0.008766437069505322,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.131914910533151,
"kl": 0.002285003662109375,
"learning_rate": 7.5e-08,
"loss": -0.0115,
"num_tokens": 365811.0,
"reward": 0.0,
"reward_std": 1.021754264831543,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.20434821411964987,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13055976557133547,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08509254221575907,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 7
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1037.0,
"completions/mean_length": 1197.6875,
"completions/mean_terminated_length": 895.375,
"completions/min_length": 718.0,
"completions/min_terminated_length": 718.0,
"epoch": 0.010018785222291797,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1709545933290264,
"kl": 0.001911163330078125,
"learning_rate": 8.75e-08,
"loss": 0.0124,
"num_tokens": 406590.0,
"reward": 0.0,
"reward_std": 0.7096362113952637,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011512278889933215,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.017023573988747046,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07588978362901863,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 8
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1368.0,
"completions/mean_length": 1424.8125,
"completions/mean_terminated_length": 1299.5,
"completions/min_length": 1222.0,
"completions/min_terminated_length": 1222.0,
"epoch": 0.011271133375078271,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.4985696146916663,
"kl": 0.0014972686767578125,
"learning_rate": 1e-07,
"loss": 0.0017,
"num_tokens": 449955.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.592147946357727,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09093222668860702,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16366647482965233,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10741060020797315,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 9
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1452.0,
"completions/mean_length": 1497.0,
"completions/mean_terminated_length": 1452.0,
"completions/min_length": 1452.0,
"completions/min_terminated_length": 1452.0,
"epoch": 0.012523481527864746,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.994231395858393,
"kl": 0.002552032470703125,
"learning_rate": 1.125e-07,
"loss": 0.0008,
"num_tokens": 512611.0,
"reward": 0.0,
"reward_std": 0.7100945115089417,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.39335439512941156,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.44383620756924225,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11013459778666118,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 10
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1436.0,
"completions/mean_length": 1319.0,
"completions/mean_terminated_length": 1258.666748046875,
"completions/min_length": 1147.0,
"completions/min_terminated_length": 1147.0,
"epoch": 0.013775829680651221,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.29188099989823,
"kl": 0.002773284912109375,
"learning_rate": 1.25e-07,
"loss": -0.0193,
"num_tokens": 578363.0,
"reward": 0.0,
"reward_std": 0.7537417411804199,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.006668674614171876,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06272286484055771,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.49583333333333335,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15581327856693655,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 11
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1362.0,
"completions/max_terminated_length": 1362.0,
"completions/mean_length": 856.4375,
"completions/mean_terminated_length": 856.4375,
"completions/min_length": 689.0,
"completions/min_terminated_length": 689.0,
"epoch": 0.015028177833437696,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.02451725178761,
"kl": 0.0014524459838867188,
"learning_rate": 1.375e-07,
"loss": -0.0034,
"num_tokens": 624626.0,
"reward": 0.0,
"reward_std": 0.25382307171821594,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.16171540881469407,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04512392405527899,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.17293758240303758,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 12
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1386.0,
"completions/mean_length": 1088.5625,
"completions/mean_terminated_length": 901.5454711914062,
"completions/min_length": 674.0,
"completions/min_terminated_length": 674.0,
"epoch": 0.01628052598622417,
"frac_reward_zero_std": 0.0,
"grad_norm": 4.015402694119637,
"kl": 0.0021724700927734375,
"learning_rate": 1.5e-07,
"loss": -0.0824,
"num_tokens": 681059.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.9276120662689209,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.041562779715464626,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1909826248378845,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11409872268574492,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 13
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1445.0,
"completions/mean_length": 1496.5625,
"completions/mean_terminated_length": 1445.0,
"completions/min_length": 1445.0,
"completions/min_terminated_length": 1445.0,
"epoch": 0.017532874139010644,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.995346934747371,
"kl": 0.002460479736328125,
"learning_rate": 1.625e-07,
"loss": -0.001,
"num_tokens": 745196.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.008323073387146,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.057098024958501865,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10812840498160957,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1398411797560202,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 14
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1481.0,
"completions/mean_length": 1267.8125,
"completions/mean_terminated_length": 1128.5,
"completions/min_length": 842.0,
"completions/min_terminated_length": 842.0,
"epoch": 0.01878522229179712,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.681468707345247,
"kl": 0.002552032470703125,
"learning_rate": 1.75e-07,
"loss": -0.0258,
"num_tokens": 802777.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.5409140586853027,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.025752634294563932,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1190717918627845,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6375,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10318986456114838,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 15
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1424.0,
"completions/mean_length": 1465.75,
"completions/mean_terminated_length": 1317.3333740234375,
"completions/min_length": 1240.0,
"completions/min_terminated_length": 1240.0,
"epoch": 0.020037570444583593,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.952391425850165,
"kl": 0.00229644775390625,
"learning_rate": 1.875e-07,
"loss": 0.0085,
"num_tokens": 852397.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.9532216191291809,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.009914003172755002,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14279656209744931,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1085254706406647,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 16
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1415.0,
"completions/mean_length": 1464.1875,
"completions/mean_terminated_length": 1356.75,
"completions/min_length": 1308.0,
"completions/min_terminated_length": 1308.0,
"epoch": 0.021289918597370068,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6835952376597447,
"kl": 0.0016937255859375,
"learning_rate": 2e-07,
"loss": -0.0182,
"num_tokens": 905544.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.670647144317627,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0385939635652747,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11140246797780545,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14950535726806533,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 17
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1456.0,
"completions/mean_length": 1226.6875,
"completions/mean_terminated_length": 1163.615478515625,
"completions/min_length": 833.0,
"completions/min_terminated_length": 833.0,
"epoch": 0.022542266750156543,
"frac_reward_zero_std": 0.0,
"grad_norm": 4.118592346302386,
"kl": 0.0031585693359375,
"learning_rate": 2.1249999999999998e-07,
"loss": -0.014,
"num_tokens": 958635.0,
"reward": -3.725290298461914e-09,
"reward_std": 1.0170769691467285,
"rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.004864839675281578,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0373192839130601,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1192569587999888,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 18
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1374.0,
"completions/mean_length": 1492.125,
"completions/mean_terminated_length": 1374.0,
"completions/min_length": 1374.0,
"completions/min_terminated_length": 1374.0,
"epoch": 0.023794614902943018,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9483970425927475,
"kl": 0.0020732879638671875,
"learning_rate": 2.25e-07,
"loss": 0.0003,
"num_tokens": 1017653.0,
"reward": 0.0,
"reward_std": 0.8760651350021362,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.014058396075366015,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03110460490345673,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10292032157252812,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 19
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1462.0,
"completions/mean_length": 1290.125,
"completions/mean_terminated_length": 1164.2000732421875,
"completions/min_length": 987.0,
"completions/min_terminated_length": 987.0,
"epoch": 0.025046963055729492,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.3136168175643763,
"kl": 0.002140045166015625,
"learning_rate": 2.3749999999999998e-07,
"loss": -0.0256,
"num_tokens": 1065663.0,
"reward": -1.4901161193847656e-08,
"reward_std": 0.9537639617919922,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.038097750035485885,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1082295867822669,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0850925422157591,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 20
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1263.0,
"completions/mean_length": 1341.5,
"completions/mean_terminated_length": 1183.0,
"completions/min_length": 1034.0,
"completions/min_terminated_length": 1034.0,
"epoch": 0.026299311208515967,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.306897182610288,
"kl": 0.0024871826171875,
"learning_rate": 2.5e-07,
"loss": -0.0094,
"num_tokens": 1117263.0,
"reward": 0.0,
"reward_std": 0.990053117275238,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011397748892334698,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.046758634855771405,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13743685418725538,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 21
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1342.0,
"completions/mean_length": 1266.625,
"completions/mean_terminated_length": 1033.25,
"completions/min_length": 774.0,
"completions/min_terminated_length": 774.0,
"epoch": 0.027551659361302442,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.3250640108747564,
"kl": 0.002407073974609375,
"learning_rate": 2.625e-07,
"loss": -0.0385,
"num_tokens": 1172489.0,
"reward": 0.0,
"reward_std": 0.7966146469116211,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.020326344256082304,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14616918176802837,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0787635937708768,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 22
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1236.0,
"completions/mean_length": 1460.875,
"completions/mean_terminated_length": 1187.0,
"completions/min_length": 1138.0,
"completions/min_terminated_length": 1138.0,
"epoch": 0.028804007514088917,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.4267377669243926,
"kl": 0.0071010589599609375,
"learning_rate": 2.75e-07,
"loss": -0.0111,
"num_tokens": 1234527.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.5723245143890381,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02122072131733574,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.157410051166117,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11642832797715322,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 23
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1489.0,
"completions/mean_length": 1325.625,
"completions/mean_terminated_length": 1151.25,
"completions/min_length": 1018.0,
"completions/min_terminated_length": 1018.0,
"epoch": 0.03005635566687539,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.24473326800092,
"kl": 0.0023651123046875,
"learning_rate": 2.8749999999999995e-07,
"loss": -0.0069,
"num_tokens": 1269905.0,
"reward": -2.9802322387695312e-08,
"reward_std": 1.0385103225708008,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04647400767345873,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09647557054247557,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.675,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15371932093796678,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 24
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1429.0,
"completions/mean_length": 1377.0625,
"completions/mean_terminated_length": 1321.181884765625,
"completions/min_length": 1206.0,
"completions/min_terminated_length": 1206.0,
"epoch": 0.031308703819661866,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6845626042385518,
"kl": 0.001850128173828125,
"learning_rate": 3e-07,
"loss": 0.0174,
"num_tokens": 1328778.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.7787291407585144,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09266568639996468,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0822707712414604,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12382783747337808,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 25
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1352.0,
"completions/mean_length": 1169.5625,
"completions/mean_terminated_length": 1093.3077392578125,
"completions/min_length": 721.0,
"completions/min_terminated_length": 721.0,
"epoch": 0.03256105197244834,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.712859953207578,
"kl": 0.00261688232421875,
"learning_rate": 3.1249999999999997e-07,
"loss": 0.0167,
"num_tokens": 1363011.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.0016117095947266,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1709176314049482,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1600211117254044,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11800816042090449,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 26
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1413.0,
"completions/mean_length": 1215.9375,
"completions/mean_terminated_length": 1197.0001220703125,
"completions/min_length": 950.0,
"completions/min_terminated_length": 950.0,
"epoch": 0.033813400125234816,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.399525494329125,
"kl": 0.002574920654296875,
"learning_rate": 3.25e-07,
"loss": 0.0013,
"num_tokens": 1407434.0,
"reward": -1.4901161193847656e-08,
"reward_std": 0.9290227890014648,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03979791227452069,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13950243126020834,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08073734277593314,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 27
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1368.0,
"completions/mean_length": 1340.4375,
"completions/mean_terminated_length": 1180.875,
"completions/min_length": 1034.0,
"completions/min_terminated_length": 1034.0,
"epoch": 0.03506574827802129,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9566553373980344,
"kl": 0.0021228790283203125,
"learning_rate": 3.375e-07,
"loss": -0.0056,
"num_tokens": 1458241.0,
"reward": 0.0,
"reward_std": 0.7809990644454956,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.028119487654073606,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1198837710832071,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6083333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0873477511423713,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 28
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1488.0,
"completions/mean_length": 1401.625,
"completions/mean_terminated_length": 1275.1429443359375,
"completions/min_length": 1054.0,
"completions/min_terminated_length": 1054.0,
"epoch": 0.036318096430807766,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9381632846830548,
"kl": 0.002338409423828125,
"learning_rate": 3.5e-07,
"loss": 0.0125,
"num_tokens": 1523987.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.9863969087600708,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06145046632658874,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08502220502724643,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6041666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11538983843829063,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 29
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1394.0,
"completions/mean_length": 1257.625,
"completions/mean_terminated_length": 1069.111083984375,
"completions/min_length": 922.0,
"completions/min_terminated_length": 922.0,
"epoch": 0.03757044458359424,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.4028418647289094,
"kl": 0.0042324066162109375,
"learning_rate": 3.6249999999999997e-07,
"loss": -0.0044,
"num_tokens": 1582341.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.9369316697120667,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.058010557784549034,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06029435215775259,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08606629658238704,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 30
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1184.0,
"completions/mean_length": 1480.25,
"completions/mean_terminated_length": 1184.0,
"completions/min_length": 1184.0,
"completions/min_terminated_length": 1184.0,
"epoch": 0.038822792736380715,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.842249981197029,
"kl": 0.0021514892578125,
"learning_rate": 3.75e-07,
"loss": -0.0106,
"num_tokens": 1629017.0,
"reward": -2.2351741790771484e-08,
"reward_std": 1.0243444442749023,
"rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010824625533504566,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.02884739427994731,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11474609652039004,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 31
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1479.0,
"completions/mean_length": 1032.0,
"completions/mean_terminated_length": 668.0,
"completions/min_length": 294.0,
"completions/min_terminated_length": 294.0,
"epoch": 0.040075140889167186,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.8067119735652115,
"kl": 0.0024871826171875,
"learning_rate": 3.875e-07,
"loss": 0.0413,
"num_tokens": 1666305.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.8114193677902222,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05789475536948171,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04045242685812858,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14707015206910487,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 32
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1495.0,
"completions/mean_length": 1271.25,
"completions/mean_terminated_length": 1195.0,
"completions/min_length": 1030.0,
"completions/min_terminated_length": 1030.0,
"epoch": 0.041327489041953665,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1322197700429,
"kl": 0.00167083740234375,
"learning_rate": 4e-07,
"loss": -0.0393,
"num_tokens": 1727629.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.4495465159416199,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05764457052515048,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11640629412276694,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0906764700582363,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 33
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1436.0,
"completions/mean_length": 1438.0625,
"completions/mean_terminated_length": 1334.8333740234375,
"completions/min_length": 1171.0,
"completions/min_terminated_length": 1171.0,
"epoch": 0.042579837194740136,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.291671899271785,
"kl": 0.0019550323486328125,
"learning_rate": 4.1249999999999997e-07,
"loss": 0.0187,
"num_tokens": 1794062.0,
"reward": 2.60770320892334e-08,
"reward_std": 1.0634629726409912,
"rewards/wordcountpos_reward_ecommerce/mean": 2.60770320892334e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05489684988302594,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2423673289052158,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11538983843829065,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 34
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1106.0,
"completions/mean_length": 1253.1875,
"completions/mean_terminated_length": 1061.2222900390625,
"completions/min_length": 977.0,
"completions/min_terminated_length": 977.0,
"epoch": 0.043832185347526614,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.3253132789681,
"kl": 0.00135040283203125,
"learning_rate": 4.2499999999999995e-07,
"loss": 0.0016,
"num_tokens": 1847393.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.6563782691955566,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05072262342914357,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.195641332904443,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746356,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 35
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1479.0,
"completions/mean_length": 1399.3125,
"completions/mean_terminated_length": 1298.625,
"completions/min_length": 1031.0,
"completions/min_terminated_length": 1031.0,
"epoch": 0.045084533500313086,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7548692610807795,
"kl": 0.0021877288818359375,
"learning_rate": 4.375e-07,
"loss": -0.0061,
"num_tokens": 1892206.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.9828654527664185,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06116004436340469,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10276980263780594,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460884,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 36
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1436.0,
"completions/mean_length": 1352.6875,
"completions/mean_terminated_length": 1318.6923828125,
"completions/min_length": 1218.0,
"completions/min_terminated_length": 1218.0,
"epoch": 0.046336881653099564,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.47319096015074,
"kl": 0.0014925003051757812,
"learning_rate": 4.5e-07,
"loss": -0.0109,
"num_tokens": 1940945.0,
"reward": 0.0,
"reward_std": 0.955802857875824,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.041245323817924374,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19292307241869963,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194865,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 37
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1468.0,
"completions/mean_length": 1375.25,
"completions/mean_terminated_length": 1250.5,
"completions/min_length": 959.0,
"completions/min_terminated_length": 959.0,
"epoch": 0.047589229805886035,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1878681605362496,
"kl": 0.002475738525390625,
"learning_rate": 4.625e-07,
"loss": -0.0118,
"num_tokens": 1985181.0,
"reward": -7.450580596923828e-09,
"reward_std": 1.054539442062378,
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.019033803394582376,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10927050985901436,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06831300510639736,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 38
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1421.0,
"completions/mean_length": 1447.0,
"completions/mean_terminated_length": 1288.0,
"completions/min_length": 1065.0,
"completions/min_terminated_length": 1065.0,
"epoch": 0.048841577958672514,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.188392715000756,
"kl": 0.00237274169921875,
"learning_rate": 4.7499999999999995e-07,
"loss": 0.0406,
"num_tokens": 2034525.0,
"reward": -7.450580596923828e-09,
"reward_std": 1.0613259077072144,
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.019229174460983274,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03385821534786073,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09651328828101764,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 39
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1135.0,
"completions/mean_length": 1148.75,
"completions/mean_terminated_length": 797.5,
"completions/min_length": 735.0,
"completions/min_terminated_length": 735.0,
"epoch": 0.050093926111458985,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6930283084099047,
"kl": 0.0015668869018554688,
"learning_rate": 4.875e-07,
"loss": -0.0288,
"num_tokens": 2080937.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.7898622751235962,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05072289975795826,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19078379794323846,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10036968702787749,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 40
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1433.0,
"completions/mean_length": 1416.9375,
"completions/mean_terminated_length": 1333.875,
"completions/min_length": 1244.0,
"completions/min_terminated_length": 1244.0,
"epoch": 0.05134627426424546,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.177282619828498,
"kl": 0.0012989044189453125,
"learning_rate": 5e-07,
"loss": 0.0091,
"num_tokens": 2136744.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.0509533882141113,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.031295483862865174,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11149225377383207,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07084150279686706,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 41
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1442.0,
"completions/mean_length": 1273.4375,
"completions/mean_terminated_length": 1197.916748046875,
"completions/min_length": 943.0,
"completions/min_terminated_length": 943.0,
"epoch": 0.052598622417031934,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2241520761115305,
"kl": 0.002368927001953125,
"learning_rate": 5.125e-07,
"loss": 0.004,
"num_tokens": 2171271.0,
"reward": -3.725290298461914e-09,
"reward_std": 1.0308477878570557,
"rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12167064883765863,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12965137595029042,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5416666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15177956725803718,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 42
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1497.0,
"completions/mean_length": 1466.6875,
"completions/mean_terminated_length": 1366.75,
"completions/min_length": 1132.0,
"completions/min_terminated_length": 1132.0,
"epoch": 0.05385097056981841,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9471462447348635,
"kl": 0.0021038055419921875,
"learning_rate": 5.25e-07,
"loss": -0.0164,
"num_tokens": 2231986.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.23251324892044067,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04063102604876061,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.22066858001488113,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.205074512203627,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 43
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1492.0,
"completions/mean_length": 1422.3125,
"completions/mean_terminated_length": 1322.4285888671875,
"completions/min_length": 1074.0,
"completions/min_terminated_length": 1074.0,
"epoch": 0.055103318722604884,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.4268519840677527,
"kl": 0.0010042190551757812,
"learning_rate": 5.374999999999999e-07,
"loss": -0.0099,
"num_tokens": 2288223.0,
"reward": -1.4901161193847656e-08,
"reward_std": 0.9692014455795288,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.070492449256456,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20832413138507544,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06440611887195309,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 44
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1448.0,
"completions/mean_length": 1376.0,
"completions/mean_terminated_length": 1301.5999755859375,
"completions/min_length": 954.0,
"completions/min_terminated_length": 954.0,
"epoch": 0.056355666875391355,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.03273915452992,
"kl": 0.002422332763671875,
"learning_rate": 5.5e-07,
"loss": 0.0123,
"num_tokens": 2354343.0,
"reward": 1.862645149230957e-08,
"reward_std": 1.067973256111145,
"rewards/wordcountpos_reward_ecommerce/mean": 1.862645149230957e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1362560230689488,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16718884747044185,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.058214163988576643,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 45
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1460.0,
"completions/mean_length": 1453.0625,
"completions/mean_terminated_length": 1392.71435546875,
"completions/min_length": 1326.0,
"completions/min_terminated_length": 1326.0,
"epoch": 0.057608015028177834,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7636896151627517,
"kl": 0.001781463623046875,
"learning_rate": 5.625e-07,
"loss": -0.0161,
"num_tokens": 2410776.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.722027599811554,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.175370955230916,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15404241260320187,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14446581038560777,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 46
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1185.0,
"completions/mean_length": 1384.8125,
"completions/mean_terminated_length": 1039.25,
"completions/min_length": 797.0,
"completions/min_terminated_length": 797.0,
"epoch": 0.058860363180964305,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.218383433006556,
"kl": 0.002429962158203125,
"learning_rate": 5.749999999999999e-07,
"loss": -0.0616,
"num_tokens": 2460181.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.8904982209205627,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.020496850203242982,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13226975013047063,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08062257748298553,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 47
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1438.0,
"completions/mean_length": 1472.0,
"completions/mean_terminated_length": 1350.666748046875,
"completions/min_length": 1240.0,
"completions/min_terminated_length": 1240.0,
"epoch": 0.06011271133375078,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.5256786742208663,
"kl": 0.002872467041015625,
"learning_rate": 5.875e-07,
"loss": -0.0096,
"num_tokens": 2524269.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.001219630241394,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0854065639247727,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0950921206250912,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10327955589886446,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 48
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1449.0,
"completions/mean_length": 1430.9375,
"completions/mean_terminated_length": 1315.8333740234375,
"completions/min_length": 1131.0,
"completions/min_terminated_length": 1131.0,
"epoch": 0.061365059486537255,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.961325619079936,
"kl": 0.002254486083984375,
"learning_rate": 6e-07,
"loss": 0.0013,
"num_tokens": 2584356.0,
"reward": 0.0,
"reward_std": 0.9873535633087158,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.029950137491573797,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16218750528728998,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08421753138505425,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 49
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1466.0,
"completions/mean_length": 1491.125,
"completions/mean_terminated_length": 1429.0,
"completions/min_length": 1392.0,
"completions/min_terminated_length": 1392.0,
"epoch": 0.06261740763932373,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6822700139828397,
"kl": 0.0020389556884765625,
"learning_rate": 6.125000000000001e-07,
"loss": -0.0009,
"num_tokens": 2648270.0,
"reward": -5.960464477539063e-08,
"reward_std": 0.7698144912719727,
"rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.032020807081585716,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.053695035371207615,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11417984514369005,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 50
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1457.0,
"completions/mean_length": 1282.125,
"completions/mean_terminated_length": 1112.6666259765625,
"completions/min_length": 802.0,
"completions/min_terminated_length": 802.0,
"epoch": 0.06386975579211021,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.07804473575342,
"kl": 0.002071380615234375,
"learning_rate": 6.249999999999999e-07,
"loss": -0.0074,
"num_tokens": 2693776.0,
"reward": 0.0,
"reward_std": 0.5634655952453613,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.013292184885055576,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12085541345993306,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333336,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 51
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1489.0,
"completions/mean_length": 1462.75,
"completions/mean_terminated_length": 1301.3333740234375,
"completions/min_length": 998.0,
"completions/min_terminated_length": 998.0,
"epoch": 0.06512210394489668,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.194495117066712,
"kl": 0.002658843994140625,
"learning_rate": 6.374999999999999e-07,
"loss": 0.0226,
"num_tokens": 2758980.0,
"reward": -1.4901161193847656e-08,
"reward_std": 0.985281229019165,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.02226574155778713,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05167870819779757,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6208333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746353,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 52
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1426.0,
"completions/mean_length": 1321.3125,
"completions/mean_terminated_length": 1295.7857666015625,
"completions/min_length": 1123.0,
"completions/min_terminated_length": 1123.0,
"epoch": 0.06637445209768315,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.085117182590426,
"kl": 0.0022735595703125,
"learning_rate": 6.5e-07,
"loss": 0.0196,
"num_tokens": 2825249.0,
"reward": 4.470348358154297e-08,
"reward_std": 0.9839984774589539,
"rewards/wordcountpos_reward_ecommerce/mean": 4.470348358154297e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.08735912330077701,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14559155866011037,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06978803887752093,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 53
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1477.0,
"completions/mean_length": 1498.5625,
"completions/mean_terminated_length": 1477.0,
"completions/min_length": 1477.0,
"completions/min_terminated_length": 1477.0,
"epoch": 0.06762680025046963,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9348811206443304,
"kl": 0.00191497802734375,
"learning_rate": 6.624999999999999e-07,
"loss": 0.0001,
"num_tokens": 2889498.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.0318164825439453,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04941181253574712,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06836218150195612,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05900408021045227,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 54
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1472.0,
"completions/mean_length": 1384.875,
"completions/mean_terminated_length": 1236.857177734375,
"completions/min_length": 913.0,
"completions/min_terminated_length": 913.0,
"epoch": 0.06887914840325611,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0200180369762695,
"kl": 0.0021152496337890625,
"learning_rate": 6.75e-07,
"loss": -0.0343,
"num_tokens": 2950200.0,
"reward": 7.450580596923828e-09,
"reward_std": 1.0550494194030762,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.038887574815180403,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06912072840442107,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07252075054258099,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 55
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1450.0,
"completions/mean_length": 1435.0625,
"completions/mean_terminated_length": 1370.125,
"completions/min_length": 1161.0,
"completions/min_terminated_length": 1161.0,
"epoch": 0.07013149655604257,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9170742885205607,
"kl": 0.0019893646240234375,
"learning_rate": 6.875e-07,
"loss": 0.0029,
"num_tokens": 3019673.0,
"reward": -1.4901161193847656e-08,
"reward_std": 0.9821785688400269,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.016992912073662925,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.105336871629235,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11385500851066223,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 56
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1499.0,
"completions/mean_length": 1360.75,
"completions/mean_terminated_length": 1221.5,
"completions/min_length": 1081.0,
"completions/min_terminated_length": 1081.0,
"epoch": 0.07138384470882905,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.3821452004991794,
"kl": 0.0014314651489257812,
"learning_rate": 7e-07,
"loss": 0.029,
"num_tokens": 3075477.0,
"reward": 2.60770320892334e-08,
"reward_std": 1.0472596883773804,
"rewards/wordcountpos_reward_ecommerce/mean": 2.60770320892334e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010678083797130186,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11394385265661125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.045338235029118164,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 57
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1392.0,
"completions/max_terminated_length": 1392.0,
"completions/mean_length": 970.625,
"completions/mean_terminated_length": 970.625,
"completions/min_length": 715.0,
"completions/min_terminated_length": 715.0,
"epoch": 0.07263619286161553,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.5943049280036243,
"kl": 0.0017871856689453125,
"learning_rate": 7.125e-07,
"loss": -0.0489,
"num_tokens": 3103423.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.8579948544502258,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11955284309699343,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1294140259487627,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0909822937597079,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 58
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1451.0,
"completions/mean_length": 1484.75,
"completions/mean_terminated_length": 1378.0,
"completions/min_length": 1305.0,
"completions/min_terminated_length": 1305.0,
"epoch": 0.07388854101440201,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.914915162479294,
"kl": 0.0023479461669921875,
"learning_rate": 7.249999999999999e-07,
"loss": -0.0133,
"num_tokens": 3170979.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.0570372343063354,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.004701879619984315,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0950367185128266,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408157,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 59
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1500.0,
"completions/mean_length": 1359.9375,
"completions/mean_terminated_length": 1179.857177734375,
"completions/min_length": 406.0,
"completions/min_terminated_length": 406.0,
"epoch": 0.07514088916718847,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.283089786479577,
"kl": 0.002674102783203125,
"learning_rate": 7.375e-07,
"loss": -0.0525,
"num_tokens": 3233802.0,
"reward": -1.6763806343078613e-08,
"reward_std": 1.050881028175354,
"rewards/wordcountpos_reward_ecommerce/mean": -1.6763806343078613e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.02996931362982372,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07266154836265915,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08606629658238706,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 60
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 1.0,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 0.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 0.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 0.0,
"epoch": 0.07639323731997495,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.311519312396822,
"kl": 0.00279998779296875,
"learning_rate": 7.5e-07,
"loss": 0.0001,
"num_tokens": 3293354.0,
"reward": -2.9802322387695312e-08,
"reward_std": 1.0101943016052246,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04793344228148064,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12274932480508612,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13655822255780922,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 61
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1454.0,
"completions/mean_length": 1410.1875,
"completions/mean_terminated_length": 1294.71435546875,
"completions/min_length": 1137.0,
"completions/min_terminated_length": 1137.0,
"epoch": 0.07764558547276143,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0789417256546874,
"kl": 0.0022792816162109375,
"learning_rate": 7.624999999999999e-07,
"loss": -0.0109,
"num_tokens": 3334909.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.9900147914886475,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.004903461451645089,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03771048515625185,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15581327856693658,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 62
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1316.0,
"completions/mean_length": 1133.0,
"completions/mean_terminated_length": 1108.533447265625,
"completions/min_length": 957.0,
"completions/min_terminated_length": 957.0,
"epoch": 0.07889793362554791,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.259053190740638,
"kl": 0.0018634796142578125,
"learning_rate": 7.75e-07,
"loss": -0.0194,
"num_tokens": 3383333.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.671829104423523,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09142372399409204,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09598955648379433,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08509254221575907,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 63
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1484.0,
"completions/mean_length": 1463.0,
"completions/mean_terminated_length": 1352.0,
"completions/min_length": 1206.0,
"completions/min_terminated_length": 1206.0,
"epoch": 0.08015028177833437,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.187716368550353,
"kl": 0.002471923828125,
"learning_rate": 7.875e-07,
"loss": 0.0106,
"num_tokens": 3442269.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.0351850986480713,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12370484162737726,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1619343847332339,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10470416879457553,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 64
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1483.0,
"completions/mean_length": 1410.625,
"completions/mean_terminated_length": 1261.666748046875,
"completions/min_length": 995.0,
"completions/min_terminated_length": 995.0,
"epoch": 0.08140262993112085,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.218117740066407,
"kl": 0.002559661865234375,
"learning_rate": 8e-07,
"loss": -0.0443,
"num_tokens": 3489911.0,
"reward": 5.960464477539063e-08,
"reward_std": 0.5395079851150513,
"rewards/wordcountpos_reward_ecommerce/mean": 5.960464477539063e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.061171909778282046,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06618755934392026,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460886,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 65
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1491.0,
"completions/mean_length": 1316.6875,
"completions/mean_terminated_length": 1206.7000732421875,
"completions/min_length": 869.0,
"completions/min_terminated_length": 869.0,
"epoch": 0.08265497808390733,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.223646819331395,
"kl": 0.002506256103515625,
"learning_rate": 8.125e-07,
"loss": -0.0004,
"num_tokens": 3531330.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.9571313858032227,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.027972706586888517,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1908156027057365,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08734775114237132,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 66
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1330.0,
"completions/mean_length": 1478.5625,
"completions/mean_terminated_length": 1328.5,
"completions/min_length": 1327.0,
"completions/min_terminated_length": 1327.0,
"epoch": 0.08390732623669381,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.411248138087788,
"kl": 0.00255584716796875,
"learning_rate": 8.249999999999999e-07,
"loss": 0.0085,
"num_tokens": 3591331.0,
"reward": -5.960464477539063e-08,
"reward_std": 0.6705090403556824,
"rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.3499282464198203,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3060898603663511,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921946,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 67
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1422.0,
"completions/mean_length": 1495.125,
"completions/mean_terminated_length": 1422.0,
"completions/min_length": 1422.0,
"completions/min_terminated_length": 1422.0,
"epoch": 0.08515967438948027,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.025267713589772,
"kl": 0.002880096435546875,
"learning_rate": 8.375e-07,
"loss": -0.0014,
"num_tokens": 3658421.0,
"reward": 0.0,
"reward_std": 0.9633276462554932,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07580010422442789,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.17700501480681413,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05692750425533113,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 68
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1309.0,
"completions/mean_length": 1469.875,
"completions/mean_terminated_length": 1259.0,
"completions/min_length": 1209.0,
"completions/min_terminated_length": 1209.0,
"epoch": 0.08641202254226675,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0466734222423546,
"kl": 0.002544403076171875,
"learning_rate": 8.499999999999999e-07,
"loss": 0.0044,
"num_tokens": 3724899.0,
"reward": 0.0,
"reward_std": 1.0227458477020264,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.002677645774302454,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11990711113827299,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10470416879457552,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 69
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1499.0,
"completions/mean_length": 1410.8125,
"completions/mean_terminated_length": 1321.625,
"completions/min_length": 1070.0,
"completions/min_terminated_length": 1070.0,
"epoch": 0.08766437069505323,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.318934314260283,
"kl": 0.002834320068359375,
"learning_rate": 8.625e-07,
"loss": 0.0072,
"num_tokens": 3777184.0,
"reward": 7.450580596923828e-09,
"reward_std": 1.0494259595870972,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.024827264373621732,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.036366284403351476,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0859586463881842,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 70
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1479.0,
"completions/mean_length": 1399.25,
"completions/mean_terminated_length": 1231.3333740234375,
"completions/min_length": 1009.0,
"completions/min_terminated_length": 1009.0,
"epoch": 0.08891671884783969,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8509264779724033,
"kl": 0.002223968505859375,
"learning_rate": 8.75e-07,
"loss": 0.0037,
"num_tokens": 3836428.0,
"reward": 0.0,
"reward_std": 1.0668516159057617,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.053166199498163626,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12647299276011556,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242309,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 71
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1447.0,
"completions/mean_length": 1199.625,
"completions/mean_terminated_length": 1130.3077392578125,
"completions/min_length": 968.0,
"completions/min_terminated_length": 968.0,
"epoch": 0.09016906700062617,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.886201838693034,
"kl": 0.001605987548828125,
"learning_rate": 8.874999999999999e-07,
"loss": -0.0027,
"num_tokens": 3881094.0,
"reward": 0.0,
"reward_std": 0.8761758804321289,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.02026371657719268,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04408943383486411,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14548768561863465,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 72
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 1.0,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 0.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 0.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 0.0,
"epoch": 0.09142141515341265,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.60284832797847,
"kl": 0.00217437744140625,
"learning_rate": 9e-07,
"loss": 0.0001,
"num_tokens": 3940518.0,
"reward": 0.0,
"reward_std": 0.5877071619033813,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.027393406712592036,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0844493241747004,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.067631901304592,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 73
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1482.0,
"completions/mean_length": 1269.3125,
"completions/mean_terminated_length": 1192.416748046875,
"completions/min_length": 959.0,
"completions/min_terminated_length": 959.0,
"epoch": 0.09267376330619913,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2612995556206354,
"kl": 0.002330780029296875,
"learning_rate": 9.124999999999999e-07,
"loss": -0.0066,
"num_tokens": 3982827.0,
"reward": -1.4901161193847656e-08,
"reward_std": 0.924209713935852,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.021465279786927867,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03289535545475229,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11979921473804345,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 74
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1419.0,
"completions/mean_length": 1346.4375,
"completions/mean_terminated_length": 1227.0,
"completions/min_length": 1081.0,
"completions/min_terminated_length": 1081.0,
"epoch": 0.09392611145898559,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6195723075826596,
"kl": 0.00183868408203125,
"learning_rate": 9.25e-07,
"loss": -0.0361,
"num_tokens": 4041194.0,
"reward": 1.1175870895385742e-08,
"reward_std": 1.0540246963500977,
"rewards/wordcountpos_reward_ecommerce/mean": 1.1175870895385742e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03654417489517675,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.055054088822312976,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07588978362901858,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327954292297363,
"step": 75
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1492.0,
"completions/mean_length": 1492.0625,
"completions/mean_terminated_length": 1436.5,
"completions/min_length": 1381.0,
"completions/min_terminated_length": 1381.0,
"epoch": 0.09517845961177207,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1883337396909632,
"kl": 0.0028228759765625,
"learning_rate": 9.374999999999999e-07,
"loss": -0.0004,
"num_tokens": 4102531.0,
"reward": 0.0,
"reward_std": 0.7272332906723022,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06657008296291109,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08174957503379145,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11213417888437976,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 76
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1428.0,
"completions/mean_length": 1347.875,
"completions/mean_terminated_length": 1152.2857666015625,
"completions/min_length": 807.0,
"completions/min_terminated_length": 807.0,
"epoch": 0.09643080776455855,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.323504050782515,
"kl": 0.002685546875,
"learning_rate": 9.499999999999999e-07,
"loss": -0.012,
"num_tokens": 4154537.0,
"reward": 0.0,
"reward_std": 0.9932632446289062,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12575056940966298,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15133213208857665,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14782371884055634,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 77
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1477.0,
"completions/mean_length": 1434.4375,
"completions/mean_terminated_length": 1290.2000732421875,
"completions/min_length": 1178.0,
"completions/min_terminated_length": 1178.0,
"epoch": 0.09768315591734503,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.4382048596496553,
"kl": 0.002773284912109375,
"learning_rate": 9.624999999999999e-07,
"loss": -0.0322,
"num_tokens": 4221464.0,
"reward": -2.60770320892334e-08,
"reward_std": 1.0265973806381226,
"rewards/wordcountpos_reward_ecommerce/mean": -2.60770320892334e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08170559900334663,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10185399685140464,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.161245154965971,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 78
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1500.0,
"completions/mean_length": 1248.3125,
"completions/mean_terminated_length": 1097.300048828125,
"completions/min_length": 870.0,
"completions/min_terminated_length": 870.0,
"epoch": 0.09893550407013149,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.648370216175046,
"kl": 0.0019168853759765625,
"learning_rate": 9.75e-07,
"loss": -0.027,
"num_tokens": 4267669.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.9588196873664856,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07500714246624458,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06993198507995109,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05288001793018134,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 79
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1494.0,
"completions/mean_length": 1436.75,
"completions/mean_terminated_length": 1247.0,
"completions/min_length": 1132.0,
"completions/min_terminated_length": 1132.0,
"epoch": 0.10018785222291797,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.879418147641467,
"kl": 0.0019855499267578125,
"learning_rate": 9.875e-07,
"loss": -0.0127,
"num_tokens": 4328465.0,
"reward": 0.0,
"reward_std": 0.9200654029846191,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1453335125370645,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1827536027247548,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09496588081262934,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 80
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1488.0,
"completions/mean_length": 1422.1875,
"completions/mean_terminated_length": 1344.375,
"completions/min_length": 1237.0,
"completions/min_terminated_length": 1237.0,
"epoch": 0.10144020037570445,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.635617733673008,
"kl": 0.0017528533935546875,
"learning_rate": 1e-06,
"loss": -0.0046,
"num_tokens": 4373324.0,
"reward": -3.725290298461914e-09,
"reward_std": 1.0682477951049805,
"rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.053201182409366166,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.044798463974146746,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07876359377087683,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 81
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1459.0,
"completions/mean_length": 1170.5,
"completions/mean_terminated_length": 1020.727294921875,
"completions/min_length": 844.0,
"completions/min_terminated_length": 844.0,
"epoch": 0.10269254852849093,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8578219249339107,
"kl": 0.0019054412841796875,
"learning_rate": 9.999957044004145e-07,
"loss": -0.0353,
"num_tokens": 4419844.0,
"reward": 0.0,
"reward_std": 0.4868781566619873,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1691690312178033,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1856850439917278,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08509254221575908,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 82
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1413.0,
"completions/mean_length": 1202.4375,
"completions/mean_terminated_length": 1182.60009765625,
"completions/min_length": 943.0,
"completions/min_terminated_length": 943.0,
"epoch": 0.10394489668127739,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.206094248867889,
"kl": 0.0022640228271484375,
"learning_rate": 9.999828176836682e-07,
"loss": -0.0042,
"num_tokens": 4464763.0,
"reward": 7.450580596923828e-09,
"reward_std": 0.9854896068572998,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.11969234946420118,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3068885267137289,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078614,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 83
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1490.0,
"completions/mean_length": 1347.25,
"completions/mean_terminated_length": 1228.4444580078125,
"completions/min_length": 872.0,
"completions/min_terminated_length": 872.0,
"epoch": 0.10519724483406387,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.3502401935196273,
"kl": 0.0025177001953125,
"learning_rate": 9.99961340095788e-07,
"loss": -0.0232,
"num_tokens": 4520295.0,
"reward": -7.450580596923828e-09,
"reward_std": 1.0421638488769531,
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04940475583906399,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10190244243958202,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12102953419784838,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 84
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1485.0,
"completions/mean_length": 1309.6875,
"completions/mean_terminated_length": 1265.769287109375,
"completions/min_length": 859.0,
"completions/min_terminated_length": 859.0,
"epoch": 0.10644959298685035,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.728411933718049,
"kl": 0.001689910888671875,
"learning_rate": 9.99931272046815e-07,
"loss": -0.0142,
"num_tokens": 4576338.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.8622345924377441,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.016984465370970727,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.040579939841277814,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08595864638818418,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 85
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1489.0,
"completions/mean_length": 1341.5625,
"completions/mean_terminated_length": 1269.5455322265625,
"completions/min_length": 982.0,
"completions/min_terminated_length": 982.0,
"epoch": 0.10770194113963683,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.200552135647029,
"kl": 0.002315521240234375,
"learning_rate": 9.998926141107945e-07,
"loss": 0.0351,
"num_tokens": 4618667.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.8471476435661316,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.22087111411084098,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24091025740898386,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08153617692869927,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 86
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1489.0,
"completions/mean_length": 1394.6875,
"completions/mean_terminated_length": 1259.2857666015625,
"completions/min_length": 1069.0,
"completions/min_terminated_length": 1069.0,
"epoch": 0.10895428929242329,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.177667386837352,
"kl": 0.002468109130859375,
"learning_rate": 9.998453670257666e-07,
"loss": 0.0024,
"num_tokens": 4675550.0,
"reward": 0.0,
"reward_std": 0.3878336548805237,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06448512648276508,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0842294519714606,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8375,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12405196043952266,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 87
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1363.0,
"completions/mean_length": 1469.6875,
"completions/mean_terminated_length": 1338.3333740234375,
"completions/min_length": 1296.0,
"completions/min_terminated_length": 1296.0,
"epoch": 0.11020663744520977,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.074898456526424,
"kl": 0.00238037109375,
"learning_rate": 9.997895316937517e-07,
"loss": 0.0066,
"num_tokens": 4734649.0,
"reward": -4.470348358154297e-08,
"reward_std": 0.9637711048126221,
"rewards/wordcountpos_reward_ecommerce/mean": -4.470348358154297e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09676546074924117,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06959776462437538,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10886621079036349,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 88
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1454.0,
"completions/mean_length": 1303.1875,
"completions/mean_terminated_length": 1237.5833740234375,
"completions/min_length": 1039.0,
"completions/min_terminated_length": 1039.0,
"epoch": 0.11145898559799625,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.3833185192510284,
"kl": 0.001354217529296875,
"learning_rate": 9.997251091807332e-07,
"loss": 0.0171,
"num_tokens": 4789676.0,
"reward": 0.0,
"reward_std": 1.016492486000061,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12777237426683458,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21498123308224262,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11080513425729775,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 89
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1348.0,
"completions/mean_length": 1453.0625,
"completions/mean_terminated_length": 1249.666748046875,
"completions/min_length": 1149.0,
"completions/min_terminated_length": 1149.0,
"epoch": 0.11271133375078271,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1689450227648854,
"kl": 0.002933502197265625,
"learning_rate": 9.99652100716637e-07,
"loss": -0.0062,
"num_tokens": 4847781.0,
"reward": 0.0,
"reward_std": 0.64935302734375,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.16229754855451553,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20151739444607794,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6291666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.18373692949230228,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 90
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1492.0,
"completions/mean_length": 1450.0,
"completions/mean_terminated_length": 1300.0,
"completions/min_length": 1049.0,
"completions/min_terminated_length": 1049.0,
"epoch": 0.11396368190356919,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0146226006623476,
"kl": 0.002593994140625,
"learning_rate": 9.995705076953075e-07,
"loss": -0.0291,
"num_tokens": 4905421.0,
"reward": 0.0,
"reward_std": 1.0383461713790894,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06052119205813296,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12160618129006116,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09108400680852977,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 91
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 910.0,
"completions/mean_length": 1144.0625,
"completions/mean_terminated_length": 788.125,
"completions/min_length": 610.0,
"completions/min_terminated_length": 610.0,
"epoch": 0.11521603005635567,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.228817256723133,
"kl": 0.0014410018920898438,
"learning_rate": 9.994803316744828e-07,
"loss": 0.0105,
"num_tokens": 4950462.0,
"reward": -4.470348358154297e-08,
"reward_std": 0.9390549659729004,
"rewards/wordcountpos_reward_ecommerce/mean": -4.470348358154297e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07564319510568883,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1514996148617109,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15770342536029575,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 92
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1471.0,
"completions/max_terminated_length": 1471.0,
"completions/mean_length": 1093.8125,
"completions/mean_terminated_length": 1093.8125,
"completions/min_length": 638.0,
"completions/min_terminated_length": 638.0,
"epoch": 0.11646837820914215,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.6363277397384617,
"kl": 0.002498626708984375,
"learning_rate": 9.993815743757633e-07,
"loss": -0.0484,
"num_tokens": 4983835.0,
"reward": 0.0,
"reward_std": 0.8996579647064209,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0037569304970198007,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07736656048737343,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.2014760347847669,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 93
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1494.0,
"completions/mean_length": 1417.75,
"completions/mean_terminated_length": 1335.5,
"completions/min_length": 1111.0,
"completions/min_terminated_length": 1111.0,
"epoch": 0.11772072636192861,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.20990587817039,
"kl": 0.002735137939453125,
"learning_rate": 9.99274237684579e-07,
"loss": 0.004,
"num_tokens": 5030407.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.6368776559829712,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.029770016601004534,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0349532410691535,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10461569884316813,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 94
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1347.0,
"completions/max_terminated_length": 1347.0,
"completions/mean_length": 926.8125,
"completions/mean_terminated_length": 926.8125,
"completions/min_length": 631.0,
"completions/min_terminated_length": 631.0,
"epoch": 0.11897307451471509,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7699870834508333,
"kl": 0.0008082389831542969,
"learning_rate": 9.99158323650154e-07,
"loss": -0.0527,
"num_tokens": 5074556.0,
"reward": -3.725290298461914e-09,
"reward_std": 1.0668668746948242,
"rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01722883909028131,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19517428674960768,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0843274042711568,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 95
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1393.0,
"completions/mean_length": 1283.5625,
"completions/mean_terminated_length": 1115.2222900390625,
"completions/min_length": 942.0,
"completions/min_terminated_length": 942.0,
"epoch": 0.12022542266750157,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2021886227663034,
"kl": 0.002685546875,
"learning_rate": 9.990338344854676e-07,
"loss": -0.0074,
"num_tokens": 5120597.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.9720104336738586,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.024841432663237503,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.17561297504079998,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08850612031567837,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 96
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1486.0,
"completions/mean_length": 1415.3125,
"completions/mean_terminated_length": 1364.5,
"completions/min_length": 1206.0,
"completions/min_terminated_length": 1206.0,
"epoch": 0.12147777082028804,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2246665277704185,
"kl": 0.002559661865234375,
"learning_rate": 9.989007725672113e-07,
"loss": 0.0063,
"num_tokens": 5158170.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.7684129476547241,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.020625,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0825,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1586400537905439,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 97
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1334.0,
"completions/mean_length": 1175.3125,
"completions/mean_terminated_length": 922.7777709960938,
"completions/min_length": 596.0,
"completions/min_terminated_length": 596.0,
"epoch": 0.12273011897307451,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.169149502657088,
"kl": 0.00231170654296875,
"learning_rate": 9.987591404357437e-07,
"loss": -0.0811,
"num_tokens": 5215647.0,
"reward": 0.0,
"reward_std": 0.9120274782180786,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.005036444545787546,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10234315753446507,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1387777332977422,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 98
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1366.0,
"completions/mean_length": 1264.5,
"completions/mean_terminated_length": 1123.2000732421875,
"completions/min_length": 983.0,
"completions/min_terminated_length": 983.0,
"epoch": 0.12398246712586099,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9409598040312765,
"kl": 0.002063751220703125,
"learning_rate": 9.986089407950426e-07,
"loss": -0.0453,
"num_tokens": 5250879.0,
"reward": 7.450580596923828e-09,
"reward_std": 1.0199556350708008,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11830339701018143,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.25916185560707883,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 1.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408157,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 99
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1437.0,
"completions/mean_length": 1406.9375,
"completions/mean_terminated_length": 1251.8333740234375,
"completions/min_length": 906.0,
"completions/min_terminated_length": 906.0,
"epoch": 0.12523481527864747,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.236489502184281,
"kl": 0.0029754638671875,
"learning_rate": 9.98450176512652e-07,
"loss": 0.0261,
"num_tokens": 5303030.0,
"reward": 0.0,
"reward_std": 0.8868198990821838,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.14501472660672157,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15004116932595393,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1172998689652263,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 100
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1468.0,
"completions/mean_length": 1374.0625,
"completions/mean_terminated_length": 1332.0833740234375,
"completions/min_length": 1208.0,
"completions/min_terminated_length": 1208.0,
"epoch": 0.12648716343143393,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7393115321098898,
"kl": 0.0021686553955078125,
"learning_rate": 9.982828506196295e-07,
"loss": 0.0475,
"num_tokens": 5348991.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.744665265083313,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.16115596269847898,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19475646493041288,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07649739768026005,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 101
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1472.0,
"completions/mean_length": 1392.25,
"completions/mean_terminated_length": 1284.5,
"completions/min_length": 957.0,
"completions/min_terminated_length": 957.0,
"epoch": 0.12773951158422042,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.465229932517055,
"kl": 0.00170135498046875,
"learning_rate": 9.981069663104853e-07,
"loss": -0.0292,
"num_tokens": 5393291.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.9994131326675415,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.010671914654693294,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.027094219261353553,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194866,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 102
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1500.0,
"completions/mean_length": 1388.5625,
"completions/mean_terminated_length": 1277.125,
"completions/min_length": 1062.0,
"completions/min_terminated_length": 1062.0,
"epoch": 0.1289918597370069,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.97886098445916,
"kl": 0.00238800048828125,
"learning_rate": 9.979225269431252e-07,
"loss": 0.0455,
"num_tokens": 5437588.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.0143799781799316,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0018910121903646018,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21804038685357507,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.55,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12292725943057183,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 103
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1482.0,
"completions/mean_length": 1159.8125,
"completions/mean_terminated_length": 955.7000122070312,
"completions/min_length": 402.0,
"completions/min_terminated_length": 402.0,
"epoch": 0.13024420788979335,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2070297326725687,
"kl": 0.0024261474609375,
"learning_rate": 9.977295360387827e-07,
"loss": -0.0325,
"num_tokens": 5469273.0,
"reward": 0.0,
"reward_std": 0.848124623298645,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0002889221914715882,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03991849505429317,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.4875,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1495053572680653,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 104
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1500.0,
"completions/mean_length": 1405.4375,
"completions/mean_terminated_length": 1247.8333740234375,
"completions/min_length": 959.0,
"completions/min_terminated_length": 959.0,
"epoch": 0.13149655604257984,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1693981909983457,
"kl": 0.00269317626953125,
"learning_rate": 9.97527997281954e-07,
"loss": -0.0085,
"num_tokens": 5527744.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.0289491415023804,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07601873282977642,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2329329780235847,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0787635937708768,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 105
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1250.0,
"completions/mean_length": 1258.5625,
"completions/mean_terminated_length": 1070.77783203125,
"completions/min_length": 958.0,
"completions/min_terminated_length": 958.0,
"epoch": 0.1327489041953663,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.4731519538264903,
"kl": 0.0015192031860351562,
"learning_rate": 9.973179145203272e-07,
"loss": -0.0122,
"num_tokens": 5571305.0,
"reward": 0.0,
"reward_std": 1.046633243560791,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.027299266065874364,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09683294681842305,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09067647005823631,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327954292297363,
"step": 106
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1376.0,
"completions/mean_length": 1361.875,
"completions/mean_terminated_length": 1223.75,
"completions/min_length": 937.0,
"completions/min_terminated_length": 937.0,
"epoch": 0.1340012523481528,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.781242946832243,
"kl": 0.0024871826171875,
"learning_rate": 9.970992917647088e-07,
"loss": -0.0163,
"num_tokens": 5617855.0,
"reward": 0.0,
"reward_std": 0.9318596124649048,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.19798356691808755,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.29651415192877617,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11021863793455328,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 107
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1487.0,
"completions/mean_length": 1146.0625,
"completions/mean_terminated_length": 1122.4666748046875,
"completions/min_length": 848.0,
"completions/min_terminated_length": 848.0,
"epoch": 0.13525360050093926,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9997009952780855,
"kl": 0.0022125244140625,
"learning_rate": 9.968721331889465e-07,
"loss": 0.0235,
"num_tokens": 5654992.0,
"reward": 7.450580596923828e-09,
"reward_std": 1.0186116695404053,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0558045951815816,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.029030233660680062,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242312,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 108
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1390.0,
"completions/mean_length": 1410.4375,
"completions/mean_terminated_length": 1261.166748046875,
"completions/min_length": 1123.0,
"completions/min_terminated_length": 1123.0,
"epoch": 0.13650594865372573,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9682648437410637,
"kl": 0.002681732177734375,
"learning_rate": 9.966364431298509e-07,
"loss": -0.022,
"num_tokens": 5711927.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.0176870822906494,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.26425948065238597,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.28899722395436095,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09428090415820636,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 109
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1455.0,
"completions/mean_length": 1212.4375,
"completions/mean_terminated_length": 1081.727294921875,
"completions/min_length": 791.0,
"completions/min_terminated_length": 791.0,
"epoch": 0.13775829680651222,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.323352542220494,
"kl": 0.002532958984375,
"learning_rate": 9.963922260871115e-07,
"loss": -0.0134,
"num_tokens": 5754094.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.9666612148284912,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.051175618061779164,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.039320213077717464,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5833333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14504150108516195,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 110
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1486.0,
"completions/mean_length": 1485.25,
"completions/mean_terminated_length": 1421.3333740234375,
"completions/min_length": 1380.0,
"completions/min_terminated_length": 1380.0,
"epoch": 0.13901064495929868,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8360270019245446,
"kl": 0.0024871826171875,
"learning_rate": 9.9613948672321e-07,
"loss": -0.0014,
"num_tokens": 5814162.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.0610442161560059,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.009639880768854782,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045421738289270215,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12292725943057184,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 111
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1323.0,
"completions/mean_length": 1434.75,
"completions/mean_terminated_length": 1239.0,
"completions/min_length": 1100.0,
"completions/min_terminated_length": 1100.0,
"epoch": 0.14026299311208515,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.999336260576351,
"kl": 0.0024852752685546875,
"learning_rate": 9.958782298633351e-07,
"loss": -0.0196,
"num_tokens": 5879078.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.7917496562004089,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03413289340922598,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05688585018947227,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1261979632400061,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 112
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1290.0,
"completions/mean_length": 1377.4375,
"completions/mean_terminated_length": 1173.166748046875,
"completions/min_length": 998.0,
"completions/min_terminated_length": 998.0,
"epoch": 0.14151534126487164,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.661753098948472,
"kl": 0.0021266937255859375,
"learning_rate": 9.95608460495285e-07,
"loss": -0.0087,
"num_tokens": 5933045.0,
"reward": 2.2351741790771484e-08,
"reward_std": 1.0039006471633911,
"rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05284198848548562,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05437266883758088,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036263,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 113
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1187.0,
"completions/mean_length": 1322.25,
"completions/mean_terminated_length": 1093.71435546875,
"completions/min_length": 991.0,
"completions/min_terminated_length": 991.0,
"epoch": 0.1427676894176581,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8780144010263284,
"kl": 0.0020542144775390625,
"learning_rate": 9.953301837693767e-07,
"loss": 0.003,
"num_tokens": 5979113.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.8175742626190186,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.005502994719066203,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06974582191643876,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 1.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0758897836290186,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 114
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1488.0,
"completions/mean_length": 1401.1875,
"completions/mean_terminated_length": 1324.3333740234375,
"completions/min_length": 1118.0,
"completions/min_terminated_length": 1118.0,
"epoch": 0.14402003757044457,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.139329899307579,
"kl": 0.002716064453125,
"learning_rate": 9.95043404998345e-07,
"loss": 0.0292,
"num_tokens": 6040452.0,
"reward": 0.0,
"reward_std": 1.0616416931152344,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03748903917915849,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14395002297286164,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1641476300299351,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 115
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1452.0,
"completions/mean_length": 1185.375,
"completions/mean_terminated_length": 1140.4285888671875,
"completions/min_length": 804.0,
"completions/min_terminated_length": 804.0,
"epoch": 0.14527238572323106,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.3086990939983667,
"kl": 0.0029144287109375,
"learning_rate": 9.947481296572423e-07,
"loss": -0.014,
"num_tokens": 6090810.0,
"reward": -2.2351741790771484e-08,
"reward_std": 1.0066075325012207,
"rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05014218857813404,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09276403913432626,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12171612389003693,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 116
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1433.0,
"completions/mean_length": 1351.9375,
"completions/mean_terminated_length": 1263.0999755859375,
"completions/min_length": 1043.0,
"completions/min_terminated_length": 1043.0,
"epoch": 0.14652473387601753,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.035226656450535,
"kl": 0.002376556396484375,
"learning_rate": 9.944443633833335e-07,
"loss": 0.0179,
"num_tokens": 6148881.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.7348309755325317,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0762897874284947,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12841725021840134,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036262,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 117
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1392.0,
"completions/mean_length": 1474.6875,
"completions/mean_terminated_length": 1297.5,
"completions/min_length": 1203.0,
"completions/min_terminated_length": 1203.0,
"epoch": 0.14777708202880402,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9234616863737957,
"kl": 0.0024566650390625,
"learning_rate": 9.94132111975989e-07,
"loss": 0.0031,
"num_tokens": 6213916.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.5194555521011353,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.018562499999999996,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.024749999999999994,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11213417888437974,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 118
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1228.0,
"completions/mean_length": 1220.0625,
"completions/mean_terminated_length": 940.125,
"completions/min_length": 820.0,
"completions/min_terminated_length": 820.0,
"epoch": 0.14902943018159048,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7331417657136603,
"kl": 0.002094268798828125,
"learning_rate": 9.93811381396573e-07,
"loss": -0.0031,
"num_tokens": 6257485.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.7746272087097168,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02009986693954008,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07362867807980181,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333336,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 119
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1479.0,
"completions/mean_length": 1400.375,
"completions/mean_terminated_length": 1340.5999755859375,
"completions/min_length": 1181.0,
"completions/min_terminated_length": 1181.0,
"epoch": 0.15028177833437695,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.834637825788323,
"kl": 0.003093719482421875,
"learning_rate": 9.934821777683306e-07,
"loss": 0.0269,
"num_tokens": 6319963.0,
"reward": 0.0,
"reward_std": 1.0544224977493286,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.18103321643586406,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14394672405121658,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8083333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036263,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 120
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1428.0,
"completions/mean_length": 1325.5,
"completions/mean_terminated_length": 1189.77783203125,
"completions/min_length": 853.0,
"completions/min_terminated_length": 853.0,
"epoch": 0.15153412648716344,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2642836490036453,
"kl": 0.0030364990234375,
"learning_rate": 9.93144507376271e-07,
"loss": -0.005,
"num_tokens": 6385427.0,
"reward": 0.0,
"reward_std": 0.8268899917602539,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1112911236291226,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1569615458099141,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6124999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09803627446568493,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 121
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1465.0,
"completions/mean_length": 1305.875,
"completions/mean_terminated_length": 1217.6363525390625,
"completions/min_length": 922.0,
"completions/min_terminated_length": 922.0,
"epoch": 0.1527864746399499,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6641398914857923,
"kl": 0.002033233642578125,
"learning_rate": 9.927983766670462e-07,
"loss": -0.0098,
"num_tokens": 6440177.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.0115642547607422,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06872988161057395,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1025211626906069,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194866,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 122
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1338.0,
"completions/mean_length": 1239.6875,
"completions/mean_terminated_length": 1083.5,
"completions/min_length": 886.0,
"completions/min_terminated_length": 886.0,
"epoch": 0.15403882279273637,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9543701078797575,
"kl": 0.0018558502197265625,
"learning_rate": 9.924437922488291e-07,
"loss": 0.0245,
"num_tokens": 6498212.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.6738491654396057,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.038590091343060344,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09510012784467493,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6041666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12524050936172842,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 123
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1499.0,
"completions/mean_length": 1403.625,
"completions/mean_terminated_length": 1345.800048828125,
"completions/min_length": 1121.0,
"completions/min_terminated_length": 1121.0,
"epoch": 0.15529117094552286,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.5178341976839556,
"kl": 0.0033111572265625,
"learning_rate": 9.920807608911876e-07,
"loss": 0.0022,
"num_tokens": 6553902.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.8796525597572327,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.013190710670885862,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1480868926971966,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10183501544346313,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 124
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 825.0,
"completions/mean_length": 1112.5625,
"completions/mean_terminated_length": 725.125,
"completions/min_length": 613.0,
"completions/min_terminated_length": 613.0,
"epoch": 0.15654351909830932,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.378105432163651,
"kl": 0.0008687973022460938,
"learning_rate": 9.917092895249543e-07,
"loss": -0.0272,
"num_tokens": 6589311.0,
"reward": -1.4901161193847656e-08,
"reward_std": 0.9441956877708435,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0014329624416098018,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.112902138916422,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12758439472669758,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 125
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1404.0,
"completions/mean_length": 1137.0625,
"completions/mean_terminated_length": 1053.3077392578125,
"completions/min_length": 749.0,
"completions/min_terminated_length": 749.0,
"epoch": 0.15779586725109582,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2527212324187826,
"kl": 0.002117156982421875,
"learning_rate": 9.913293852420946e-07,
"loss": -0.0249,
"num_tokens": 6618304.0,
"reward": 2.2351741790771484e-08,
"reward_std": 1.035041093826294,
"rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.007633954846541112,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.032194935573291575,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.103905227473387,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 126
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1405.0,
"completions/mean_length": 1462.125,
"completions/mean_terminated_length": 1348.5,
"completions/min_length": 1255.0,
"completions/min_terminated_length": 1255.0,
"epoch": 0.15904821540388228,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.091983329824593,
"kl": 0.00301361083984375,
"learning_rate": 9.909410552955712e-07,
"loss": 0.0155,
"num_tokens": 6681314.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.784981369972229,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12737730164130195,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21747166290242714,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6208333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.093392838174146,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 127
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1474.0,
"completions/mean_length": 1378.875,
"completions/mean_terminated_length": 1223.1429443359375,
"completions/min_length": 904.0,
"completions/min_terminated_length": 904.0,
"epoch": 0.16030056355666875,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2503169455658982,
"kl": 0.002620697021484375,
"learning_rate": 9.905443070992068e-07,
"loss": -0.0039,
"num_tokens": 6723448.0,
"reward": -3.3527612686157227e-08,
"reward_std": 1.06490159034729,
"rewards/wordcountpos_reward_ecommerce/mean": -3.3527612686157227e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07877405649297206,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0705921273253386,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 128
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1476.0,
"completions/mean_length": 1418.0,
"completions/mean_terminated_length": 1281.3333740234375,
"completions/min_length": 1167.0,
"completions/min_terminated_length": 1167.0,
"epoch": 0.16155291170945524,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.419069025864104,
"kl": 0.003154754638671875,
"learning_rate": 9.901391482275403e-07,
"loss": -0.0084,
"num_tokens": 6774208.0,
"reward": -1.4901161193847656e-08,
"reward_std": 0.9308052062988281,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06996807244867725,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1266299752409378,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0909822937597079,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 129
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1098.0,
"completions/mean_length": 1249.3125,
"completions/mean_terminated_length": 998.625,
"completions/min_length": 929.0,
"completions/min_terminated_length": 929.0,
"epoch": 0.1628052598622417,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.593196585544164,
"kl": 0.001987457275390625,
"learning_rate": 9.897255864156847e-07,
"loss": 0.0036,
"num_tokens": 6807421.0,
"reward": 0.0,
"reward_std": 0.4564354419708252,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0429616858320893,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07600285040401121,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08243965245133134,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 130
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1394.0,
"completions/mean_length": 1382.375,
"completions/mean_terminated_length": 1231.1429443359375,
"completions/min_length": 1075.0,
"completions/min_terminated_length": 1075.0,
"epoch": 0.16405760801502817,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0914996686420104,
"kl": 0.002330780029296875,
"learning_rate": 9.893036295591768e-07,
"loss": -0.0116,
"num_tokens": 6866379.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.9815191626548767,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04217953361323695,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06871670933278229,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.102469507659596,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 131
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1424.0,
"completions/mean_length": 1347.125,
"completions/mean_terminated_length": 1150.571533203125,
"completions/min_length": 371.0,
"completions/min_terminated_length": 371.0,
"epoch": 0.16530995616781466,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.001502145266533,
"kl": 0.0022430419921875,
"learning_rate": 9.888732857138291e-07,
"loss": -0.04,
"num_tokens": 6912533.0,
"reward": 0.0,
"reward_std": 0.8428164720535278,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.020130872057838745,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04873657297962695,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11417984514369003,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 132
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 882.0,
"completions/mean_length": 1177.75,
"completions/mean_terminated_length": 855.5,
"completions/min_length": 795.0,
"completions/min_terminated_length": 795.0,
"epoch": 0.16656230432060112,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.849369584207102,
"kl": 0.00208282470703125,
"learning_rate": 9.884345630955742e-07,
"loss": -0.0097,
"num_tokens": 6966273.0,
"reward": 7.450580596923828e-09,
"reward_std": 1.0472090244293213,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08003635148497827,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09874522821696813,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09067647005823629,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 133
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1491.0,
"completions/mean_length": 1488.0,
"completions/mean_terminated_length": 1404.0,
"completions/min_length": 1317.0,
"completions/min_terminated_length": 1317.0,
"epoch": 0.16781465247338762,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.688722282572705,
"kl": 0.0026092529296875,
"learning_rate": 9.879874700803082e-07,
"loss": 0.0158,
"num_tokens": 7027657.0,
"reward": 3.166496753692627e-08,
"reward_std": 1.0543937683105469,
"rewards/wordcountpos_reward_ecommerce/mean": 3.166496753692627e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11540214745824308,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.23102363071615145,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042256,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 134
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1312.0,
"completions/mean_length": 1146.375,
"completions/mean_terminated_length": 1064.769287109375,
"completions/min_length": 858.0,
"completions/min_terminated_length": 858.0,
"epoch": 0.16906700062617408,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.664759257998968,
"kl": 0.0027923583984375,
"learning_rate": 9.875320152037318e-07,
"loss": -0.0535,
"num_tokens": 7084095.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.8985534906387329,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.23834962043700852,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.27030996076033054,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11417984514369006,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 135
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1237.0,
"completions/mean_length": 1274.875,
"completions/mean_terminated_length": 1049.75,
"completions/min_length": 931.0,
"completions/min_terminated_length": 931.0,
"epoch": 0.17031934877896054,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.4849191974864953,
"kl": 0.0015964508056640625,
"learning_rate": 9.870682071611862e-07,
"loss": 0.0064,
"num_tokens": 7133293.0,
"reward": 0.0,
"reward_std": 0.6105766892433167,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02324001170505371,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08829030406958045,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11013459778666118,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 136
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1462.0,
"completions/max_terminated_length": 1462.0,
"completions/mean_length": 955.6875,
"completions/mean_terminated_length": 955.6875,
"completions/min_length": 761.0,
"completions/min_terminated_length": 761.0,
"epoch": 0.17157169693174704,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.620633996964256,
"kl": 0.0014123916625976562,
"learning_rate": 9.865960548074874e-07,
"loss": 0.0103,
"num_tokens": 7187688.0,
"reward": 5.960464477539063e-08,
"reward_std": 0.6596803069114685,
"rewards/wordcountpos_reward_ecommerce/mean": 5.960464477539063e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0038102094327885448,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12227248665731598,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194865,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 137
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1478.0,
"completions/mean_length": 1322.0,
"completions/mean_terminated_length": 1093.1429443359375,
"completions/min_length": 733.0,
"completions/min_terminated_length": 733.0,
"epoch": 0.1728240450845335,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8032617883799893,
"kl": 0.0021915435791015625,
"learning_rate": 9.861155671567572e-07,
"loss": 0.0513,
"num_tokens": 7236832.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.938301682472229,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06466602322499601,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05158824252677371,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5666666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09428090415820632,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 138
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1436.0,
"completions/mean_length": 1335.8125,
"completions/mean_terminated_length": 1237.300048828125,
"completions/min_length": 944.0,
"completions/min_terminated_length": 944.0,
"epoch": 0.17407639323731997,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.6291703245010103,
"kl": 0.00284576416015625,
"learning_rate": 9.856267533822519e-07,
"loss": -0.021,
"num_tokens": 7293301.0,
"reward": 0.0,
"reward_std": 0.7662729024887085,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.08970693607829759,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16534434492549577,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1970147578604578,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 139
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1340.0,
"completions/mean_length": 1120.8125,
"completions/mean_terminated_length": 994.4166870117188,
"completions/min_length": 844.0,
"completions/min_terminated_length": 844.0,
"epoch": 0.17532874139010646,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6820256510644604,
"kl": 0.0022602081298828125,
"learning_rate": 9.851296228161857e-07,
"loss": 0.019,
"num_tokens": 7341130.0,
"reward": 0.0,
"reward_std": 0.8969849348068237,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.026873742767844065,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0656536955300479,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1060223596263578,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 140
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1404.0,
"completions/mean_length": 1399.75,
"completions/mean_terminated_length": 1232.666748046875,
"completions/min_length": 936.0,
"completions/min_terminated_length": 936.0,
"epoch": 0.17658108954289292,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.372829326628183,
"kl": 0.003108978271484375,
"learning_rate": 9.846241849495535e-07,
"loss": 0.0153,
"num_tokens": 7410982.0,
"reward": -1.4901161193847656e-08,
"reward_std": 0.9778778553009033,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05033218082218886,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.025136378125956142,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13333333333333333,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 141
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1488.0,
"completions/mean_length": 1449.5625,
"completions/mean_terminated_length": 1399.125,
"completions/min_length": 1240.0,
"completions/min_terminated_length": 1240.0,
"epoch": 0.17783343769567939,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.396651196247218,
"kl": 0.001613616943359375,
"learning_rate": 9.841104494319492e-07,
"loss": -0.0053,
"num_tokens": 7468879.0,
"reward": 0.0,
"reward_std": 0.6796972155570984,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.021608644866332537,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11519923314511032,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7875,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0739118594202782,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 142
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1458.0,
"completions/mean_length": 1434.4375,
"completions/mean_terminated_length": 1350.1429443359375,
"completions/min_length": 1215.0,
"completions/min_terminated_length": 1215.0,
"epoch": 0.17908578584846588,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6795502864911453,
"kl": 0.00296783447265625,
"learning_rate": 9.835884260713826e-07,
"loss": 0.0053,
"num_tokens": 7526334.0,
"reward": 7.450580596923828e-09,
"reward_std": 0.9401005506515503,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05878136743445916,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15259208491300538,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09583937179043481,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 143
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1271.0,
"completions/mean_length": 1081.1875,
"completions/mean_terminated_length": 1021.357177734375,
"completions/min_length": 760.0,
"completions/min_terminated_length": 760.0,
"epoch": 0.18033813400125234,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.4443025787306487,
"kl": 0.0014491081237792969,
"learning_rate": 9.830581248340904e-07,
"loss": 0.0523,
"num_tokens": 7560449.0,
"reward": 0.0,
"reward_std": 0.6386822462081909,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.030072721096349574,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07933031547923879,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07490735018081408,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 144
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1438.0,
"completions/mean_length": 1464.875,
"completions/mean_terminated_length": 1312.666748046875,
"completions/min_length": 1143.0,
"completions/min_terminated_length": 1143.0,
"epoch": 0.18159048215403883,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.281062477288285,
"kl": 0.00322723388671875,
"learning_rate": 9.82519555844347e-07,
"loss": 0.0292,
"num_tokens": 7621295.0,
"reward": 0.0,
"reward_std": 0.8289343118667603,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12500933186269494,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10441096539901965,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07097208632298363,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 145
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1440.0,
"completions/mean_length": 1436.875,
"completions/mean_terminated_length": 1298.0,
"completions/min_length": 1137.0,
"completions/min_terminated_length": 1137.0,
"epoch": 0.1828428303068253,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.852746135375484,
"kl": 0.0022106170654296875,
"learning_rate": 9.819727293842715e-07,
"loss": -0.0099,
"num_tokens": 7663125.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.9735676646232605,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.008435227123041298,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08786012223776958,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1299572579307862,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 146
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1465.0,
"completions/mean_length": 1447.0625,
"completions/mean_terminated_length": 1288.25,
"completions/min_length": 1028.0,
"completions/min_terminated_length": 1028.0,
"epoch": 0.18409517845961176,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7900001366496268,
"kl": 0.002574920654296875,
"learning_rate": 9.814176558936306e-07,
"loss": 0.0107,
"num_tokens": 7727518.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.3397839367389679,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09743503994599206,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16748018946124937,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09953596037316068,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 147
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 863.0,
"completions/mean_length": 1135.0625,
"completions/mean_terminated_length": 770.125,
"completions/min_length": 571.0,
"completions/min_terminated_length": 571.0,
"epoch": 0.18534752661239826,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.560665055023378,
"kl": 0.00197601318359375,
"learning_rate": 9.808543459696394e-07,
"loss": -0.0149,
"num_tokens": 7771327.0,
"reward": 0.0,
"reward_std": 0.9778045415878296,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.20066201620356428,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3214780108822807,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0938872452190116,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 148
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1456.0,
"completions/mean_length": 1307.375,
"completions/mean_terminated_length": 1157.5555419921875,
"completions/min_length": 336.0,
"completions/min_terminated_length": 336.0,
"epoch": 0.18659987476518472,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.698308959281013,
"kl": 0.003589630126953125,
"learning_rate": 9.802828103667598e-07,
"loss": 0.0049,
"num_tokens": 7824917.0,
"reward": -9.313225746154785e-09,
"reward_std": 0.929603099822998,
"rewards/wordcountpos_reward_ecommerce/mean": -9.313225746154785e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0017376960374372932,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03411053398366144,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1954576775256058,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 149
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1390.0,
"completions/mean_length": 1350.8125,
"completions/mean_terminated_length": 1234.77783203125,
"completions/min_length": 897.0,
"completions/min_terminated_length": 897.0,
"epoch": 0.18785222291797118,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.37227831535049,
"kl": 0.003437042236328125,
"learning_rate": 9.797030599964946e-07,
"loss": -0.0282,
"num_tokens": 7879658.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.6427962779998779,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0803417321639054,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11525098223680169,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1253144193766372,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 150
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1500.0,
"completions/mean_length": 1446.0625,
"completions/mean_terminated_length": 1327.4000244140625,
"completions/min_length": 1075.0,
"completions/min_terminated_length": 1075.0,
"epoch": 0.18910457107075768,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6387419512111028,
"kl": 0.002285003662109375,
"learning_rate": 9.791151059271787e-07,
"loss": -0.0106,
"num_tokens": 7927819.0,
"reward": 0.0,
"reward_std": 0.9979233145713806,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06657694240337725,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20181152584757237,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194862,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 151
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1493.0,
"completions/mean_length": 1422.1875,
"completions/mean_terminated_length": 1344.375,
"completions/min_length": 1078.0,
"completions/min_terminated_length": 1078.0,
"epoch": 0.19035691922354414,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8499814649815485,
"kl": 0.00269317626953125,
"learning_rate": 9.78518959383769e-07,
"loss": -0.0267,
"num_tokens": 7979030.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.6457971334457397,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.032289559957375875,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03678022720872768,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12881223774390613,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 152
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 1.0,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 0.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 0.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 0.0,
"epoch": 0.19160926737633063,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7439239666738633,
"kl": 0.00275421142578125,
"learning_rate": 9.779146317476294e-07,
"loss": 0.0001,
"num_tokens": 8039006.0,
"reward": -1.1175870895385742e-08,
"reward_std": 1.0521876811981201,
"rewards/wordcountpos_reward_ecommerce/mean": -1.1175870895385742e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09760563861386369,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10390475856290554,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05692750425533111,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 153
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1500.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 1500.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 1500.0,
"epoch": 0.1928616155291171,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0303320592221237,
"kl": 0.003154754638671875,
"learning_rate": 9.773021345563133e-07,
"loss": 0.0001,
"num_tokens": 8103454.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.0458917617797852,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.25916260149601344,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.18093382728997642,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746357,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 154
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1470.0,
"completions/mean_length": 1404.1875,
"completions/mean_terminated_length": 1346.7000732421875,
"completions/min_length": 1252.0,
"completions/min_terminated_length": 1252.0,
"epoch": 0.19411396368190356,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7184034168824684,
"kl": 0.002166748046875,
"learning_rate": 9.766814795033438e-07,
"loss": 0.0074,
"num_tokens": 8157473.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.9921345710754395,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05048016331986036,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1222984521625515,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07187952884282611,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 155
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1361.0,
"completions/mean_length": 1382.875,
"completions/mean_terminated_length": 1031.5,
"completions/min_length": 853.0,
"completions/min_terminated_length": 853.0,
"epoch": 0.19536631183469005,
"frac_reward_zero_std": 0.5,
"grad_norm": 1.7493199389788998,
"kl": 0.002361297607421875,
"learning_rate": 9.7605267843799e-07,
"loss": -0.0294,
"num_tokens": 8204367.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.7406100630760193,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.4396175531814227,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.42217210131772864,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6375,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09098229375970789,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 156
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1403.0,
"completions/mean_length": 1252.6875,
"completions/mean_terminated_length": 1104.300048828125,
"completions/min_length": 780.0,
"completions/min_terminated_length": 780.0,
"epoch": 0.19661865998747652,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1812238222319373,
"kl": 0.002895355224609375,
"learning_rate": 9.754157433650416e-07,
"loss": 0.0099,
"num_tokens": 8250426.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.7503967881202698,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09038614901064657,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10393207102574,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14580555290954889,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 157
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1470.0,
"completions/mean_length": 1485.9375,
"completions/mean_terminated_length": 1425.0,
"completions/min_length": 1367.0,
"completions/min_terminated_length": 1367.0,
"epoch": 0.19787100814026298,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.274381341557373,
"kl": 0.0019855499267578125,
"learning_rate": 9.74770686444578e-07,
"loss": -0.0039,
"num_tokens": 8312649.0,
"reward": 2.9802322387695312e-08,
"reward_std": 1.0463612079620361,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01854492153050523,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07355929227115507,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7124999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333334,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 158
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1296.0,
"completions/mean_length": 1468.6875,
"completions/mean_terminated_length": 1249.5,
"completions/min_length": 1203.0,
"completions/min_terminated_length": 1203.0,
"epoch": 0.19912335629304947,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.037789984063529,
"kl": 0.00295257568359375,
"learning_rate": 9.74117519991739e-07,
"loss": 0.0195,
"num_tokens": 8372460.0,
"reward": 5.960464477539063e-08,
"reward_std": 0.6518849730491638,
"rewards/wordcountpos_reward_ecommerce/mean": 5.960464477539063e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010805361779511215,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10954072593469087,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12041594578792295,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 159
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1353.0,
"completions/mean_length": 1422.5625,
"completions/mean_terminated_length": 1293.5,
"completions/min_length": 1240.0,
"completions/min_terminated_length": 1240.0,
"epoch": 0.20037570444583594,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.4914861954275853,
"kl": 0.0020751953125,
"learning_rate": 9.734562564764863e-07,
"loss": -0.0084,
"num_tokens": 8441477.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.0050157308578491,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.20082839440532127,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24021378555176306,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08073734277593311,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 160
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1444.0,
"completions/mean_length": 1496.5,
"completions/mean_terminated_length": 1444.0,
"completions/min_length": 1444.0,
"completions/min_terminated_length": 1444.0,
"epoch": 0.2016280525986224,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.96733321823591,
"kl": 0.003032684326171875,
"learning_rate": 9.727869085233683e-07,
"loss": 0.0008,
"num_tokens": 8500525.0,
"reward": 0.0,
"reward_std": 1.0511749982833862,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.30725599890646893,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11503663852918616,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333336,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 161
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1417.0,
"completions/mean_length": 1405.9375,
"completions/mean_terminated_length": 1199.0,
"completions/min_length": 992.0,
"completions/min_terminated_length": 992.0,
"epoch": 0.2028804007514089,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1286840232588795,
"kl": 0.003131866455078125,
"learning_rate": 9.721094889112769e-07,
"loss": -0.0017,
"num_tokens": 8561668.0,
"reward": 0.0,
"reward_std": 1.0658842325210571,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.3530029462031852,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3684803684710799,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10036968702787749,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 162
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1488.0,
"completions/mean_length": 1408.4375,
"completions/mean_terminated_length": 1290.71435546875,
"completions/min_length": 1074.0,
"completions/min_terminated_length": 1074.0,
"epoch": 0.20413274890419536,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9421524856257575,
"kl": 0.003437042236328125,
"learning_rate": 9.714240105732056e-07,
"loss": -0.0217,
"num_tokens": 8611395.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.8545268774032593,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.004406093333840853,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07521193600811737,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11385500851066223,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 163
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1406.0,
"completions/mean_length": 1383.4375,
"completions/mean_terminated_length": 878.3333740234375,
"completions/min_length": 209.0,
"completions/min_terminated_length": 209.0,
"epoch": 0.20538509705698185,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8733824694411965,
"kl": 0.002685546875,
"learning_rate": 9.707304865960003e-07,
"loss": 0.0086,
"num_tokens": 8668282.0,
"reward": -2.9802322387695312e-08,
"reward_std": 1.019072413444519,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09355282337201007,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11627823063016991,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12619796324000607,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 164
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1349.0,
"completions/mean_length": 1461.0625,
"completions/mean_terminated_length": 1292.3333740234375,
"completions/min_length": 1182.0,
"completions/min_terminated_length": 1182.0,
"epoch": 0.20663744520976832,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0251334252111324,
"kl": 0.00315093994140625,
"learning_rate": 9.700289302201118e-07,
"loss": -0.0054,
"num_tokens": 8726843.0,
"reward": 2.2351741790771484e-08,
"reward_std": 0.9717680215835571,
"rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03285324398900216,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1305907322232915,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921946,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 165
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1492.0,
"completions/mean_length": 1479.9375,
"completions/mean_terminated_length": 1339.5,
"completions/min_length": 1187.0,
"completions/min_terminated_length": 1187.0,
"epoch": 0.20788979336255478,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1164633460783593,
"kl": 0.00360107421875,
"learning_rate": 9.69319354839341e-07,
"loss": -0.01,
"num_tokens": 8774074.0,
"reward": 0.0,
"reward_std": 0.6172374486923218,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010136480012205995,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05376440319397317,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10741060020797316,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 166
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1453.0,
"completions/mean_length": 1443.6875,
"completions/mean_terminated_length": 1371.2857666015625,
"completions/min_length": 1221.0,
"completions/min_terminated_length": 1221.0,
"epoch": 0.20914214151534127,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8681254932066893,
"kl": 0.003143310546875,
"learning_rate": 9.686017740005845e-07,
"loss": -0.0029,
"num_tokens": 8833421.0,
"reward": 0.0,
"reward_std": 1.049817442893982,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.23814174262345672,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24595173419132288,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10470416879457554,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 167
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1483.0,
"completions/mean_length": 1498.9375,
"completions/mean_terminated_length": 1483.0,
"completions/min_length": 1483.0,
"completions/min_terminated_length": 1483.0,
"epoch": 0.21039448966812774,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.699748602413328,
"kl": 0.002422332763671875,
"learning_rate": 9.678762014035755e-07,
"loss": 0.001,
"num_tokens": 8896332.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.8306390047073364,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.01676756574749607,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03592053954406261,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078614,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 168
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1481.0,
"completions/mean_length": 1463.25,
"completions/mean_terminated_length": 1304.0,
"completions/min_length": 1117.0,
"completions/min_terminated_length": 1117.0,
"epoch": 0.2116468378209142,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9281066984860664,
"kl": 0.0030670166015625,
"learning_rate": 9.67142650900622e-07,
"loss": 0.0284,
"num_tokens": 8960800.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.051703691482544,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02864644527108891,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12265684181148895,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460884,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 169
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1385.0,
"completions/mean_length": 1492.8125,
"completions/mean_terminated_length": 1385.0,
"completions/min_length": 1385.0,
"completions/min_terminated_length": 1385.0,
"epoch": 0.2128991859737007,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.719524959962169,
"kl": 0.002513885498046875,
"learning_rate": 9.664011364963427e-07,
"loss": -0.0014,
"num_tokens": 9014901.0,
"reward": 0.0,
"reward_std": 0.6419066190719604,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.026585625959977408,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05774533640389131,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6416666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901158,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 170
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1467.0,
"completions/mean_length": 1398.875,
"completions/mean_terminated_length": 1268.857177734375,
"completions/min_length": 977.0,
"completions/min_terminated_length": 977.0,
"epoch": 0.21415153412648716,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2711757628292637,
"kl": 0.003734588623046875,
"learning_rate": 9.656516723474003e-07,
"loss": 0.0199,
"num_tokens": 9082635.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.7075515985488892,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04754440907840732,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19258567827157586,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11155467020454342,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 171
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1500.0,
"completions/mean_length": 1372.0,
"completions/mean_terminated_length": 1244.0,
"completions/min_length": 414.0,
"completions/min_terminated_length": 414.0,
"epoch": 0.21540388227927365,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.130031481410935,
"kl": 0.003208160400390625,
"learning_rate": 9.648942727622293e-07,
"loss": -0.0004,
"num_tokens": 9139131.0,
"reward": 4.470348358154297e-08,
"reward_std": 0.8231313824653625,
"rewards/wordcountpos_reward_ecommerce/mean": 4.470348358154297e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.15172830287547154,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10156016936265624,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.825,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11894598836509011,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 172
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1441.0,
"completions/mean_length": 1439.125,
"completions/mean_terminated_length": 1337.666748046875,
"completions/min_length": 1231.0,
"completions/min_terminated_length": 1231.0,
"epoch": 0.21665623043206012,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8174656495384003,
"kl": 0.003330230712890625,
"learning_rate": 9.641289522007648e-07,
"loss": 0.0184,
"num_tokens": 9189589.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.9120515584945679,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.08079485341203167,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.404800820644525,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.059004080210452274,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 173
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1436.0,
"completions/mean_length": 1416.4375,
"completions/mean_terminated_length": 1232.5999755859375,
"completions/min_length": 961.0,
"completions/min_terminated_length": 961.0,
"epoch": 0.21790857858484658,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.883462614618569,
"kl": 0.003101348876953125,
"learning_rate": 9.633557252741655e-07,
"loss": -0.0209,
"num_tokens": 9242428.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.7592308521270752,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09250187361454984,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24709362304891008,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1630723538573985,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 174
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1310.0,
"completions/mean_length": 1153.5625,
"completions/mean_terminated_length": 884.1111450195312,
"completions/min_length": 704.0,
"completions/min_terminated_length": 704.0,
"epoch": 0.21916092673763307,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.067205627199215,
"kl": 0.003124237060546875,
"learning_rate": 9.625746067445344e-07,
"loss": 0.0267,
"num_tokens": 9286885.0,
"reward": 0.0,
"reward_std": 0.8734534978866577,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0040830023789233914,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.007219286680192259,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242308,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 175
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1475.0,
"completions/mean_length": 1478.625,
"completions/mean_terminated_length": 1329.0,
"completions/min_length": 1183.0,
"completions/min_terminated_length": 1183.0,
"epoch": 0.22041327489041954,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1276319460691004,
"kl": 0.003200531005859375,
"learning_rate": 9.61785611524638e-07,
"loss": -0.0146,
"num_tokens": 9345695.0,
"reward": 0.0,
"reward_std": 0.7759820222854614,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.008401002667427777,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08187937939788012,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11547005383792516,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 176
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1363.0,
"completions/mean_length": 1251.3125,
"completions/mean_terminated_length": 1215.7857666015625,
"completions/min_length": 1017.0,
"completions/min_terminated_length": 1017.0,
"epoch": 0.221665623043206,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6069318364125738,
"kl": 0.0021648406982421875,
"learning_rate": 9.609887546776213e-07,
"loss": -0.0061,
"num_tokens": 9382804.0,
"reward": 0.0,
"reward_std": 0.8300349712371826,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04134925667146179,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05624626120552443,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07698003589195014,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 177
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1424.0,
"completions/mean_length": 1388.0,
"completions/mean_terminated_length": 1244.0,
"completions/min_length": 998.0,
"completions/min_terminated_length": 998.0,
"epoch": 0.2229179711959925,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.156831352479139,
"kl": 0.00347137451171875,
"learning_rate": 9.601840514167194e-07,
"loss": -0.0001,
"num_tokens": 9443532.0,
"reward": 0.0,
"reward_std": 0.9561296701431274,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02300302439349743,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06503983162022253,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.775,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13305526559931294,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 178
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1495.0,
"completions/mean_length": 1296.75,
"completions/mean_terminated_length": 1204.3636474609375,
"completions/min_length": 963.0,
"completions/min_terminated_length": 963.0,
"epoch": 0.22417031934877896,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2566872824431337,
"kl": 0.003185272216796875,
"learning_rate": 9.593715171049677e-07,
"loss": -0.0019,
"num_tokens": 9493936.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.9979840517044067,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04123772744400983,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.055545285602727666,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5708333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921942,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 179
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1482.0,
"completions/mean_length": 1462.0625,
"completions/mean_terminated_length": 1348.25,
"completions/min_length": 1185.0,
"completions/min_terminated_length": 1185.0,
"epoch": 0.22542266750156542,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.118655152417983,
"kl": 0.003711700439453125,
"learning_rate": 9.585511672549087e-07,
"loss": -0.0119,
"num_tokens": 9547913.0,
"reward": 0.0,
"reward_std": 0.6055276393890381,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.2990419990496254,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.5212506601592531,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11547005383792518,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 180
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1483.0,
"completions/mean_length": 1253.875,
"completions/mean_terminated_length": 1062.4444580078125,
"completions/min_length": 742.0,
"completions/min_terminated_length": 742.0,
"epoch": 0.2266750156543519,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.453588710121719,
"kl": 0.003208160400390625,
"learning_rate": 9.577230175282956e-07,
"loss": -0.0189,
"num_tokens": 9590383.0,
"reward": -2.9802322387695312e-08,
"reward_std": 1.026740550994873,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.22982623849797099,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.35491751307206737,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194866,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 181
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1431.0,
"completions/mean_length": 1397.3125,
"completions/mean_terminated_length": 1294.625,
"completions/min_length": 1209.0,
"completions/min_terminated_length": 1209.0,
"epoch": 0.22792736380713838,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8117171390856717,
"kl": 0.00273895263671875,
"learning_rate": 9.568870837357933e-07,
"loss": 0.0049,
"num_tokens": 9635180.0,
"reward": 0.0,
"reward_std": 0.9024027585983276,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.015236533423952495,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05515104905405319,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1270024788326182,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 182
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1479.0,
"completions/mean_length": 1389.4375,
"completions/mean_terminated_length": 1247.2857666015625,
"completions/min_length": 1029.0,
"completions/min_terminated_length": 1029.0,
"epoch": 0.22917971195992487,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.146143558726643,
"kl": 0.003173828125,
"learning_rate": 9.56043381836677e-07,
"loss": 0.0244,
"num_tokens": 9691707.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.6937527656555176,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06475936323780643,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07867382027532054,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07781745019952505,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 183
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1496.0,
"completions/mean_length": 1331.75,
"completions/mean_terminated_length": 1275.666748046875,
"completions/min_length": 857.0,
"completions/min_terminated_length": 857.0,
"epoch": 0.23043206011271133,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.445265358440665,
"kl": 0.003719329833984375,
"learning_rate": 9.551919279385267e-07,
"loss": 0.0321,
"num_tokens": 9741247.0,
"reward": -1.4901161193847656e-08,
"reward_std": 0.9354739785194397,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011286604414356131,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06370003732540648,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09878896324620107,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 184
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1381.0,
"completions/mean_length": 1312.6875,
"completions/mean_terminated_length": 1269.4615478515625,
"completions/min_length": 994.0,
"completions/min_terminated_length": 994.0,
"epoch": 0.2316844082654978,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2176769971519983,
"kl": 0.0028533935546875,
"learning_rate": 9.543327382969203e-07,
"loss": 0.0001,
"num_tokens": 9800986.0,
"reward": 0.0,
"reward_std": 0.8514897227287292,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06189917187460071,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09461702207527528,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09067647005823631,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 185
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1437.0,
"completions/max_terminated_length": 1437.0,
"completions/mean_length": 1251.9375,
"completions/mean_terminated_length": 1251.9375,
"completions/min_length": 1139.0,
"completions/min_terminated_length": 1139.0,
"epoch": 0.2329367564182843,
"frac_reward_zero_std": 0.0,
"grad_norm": 1.8078703137230523,
"kl": 0.0009489059448242188,
"learning_rate": 9.534658293151226e-07,
"loss": 0.0206,
"num_tokens": 9844961.0,
"reward": -2.2351741790771484e-08,
"reward_std": 1.0031490325927734,
"rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1837486103073024,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2121586351571871,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.675,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11642832797715322,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 186
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 1.0,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 0.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 0.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 0.0,
"epoch": 0.23418910457107076,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.341222957754127,
"kl": 0.0020427703857421875,
"learning_rate": 9.525912175437733e-07,
"loss": 0.0001,
"num_tokens": 9904889.0,
"reward": 0.0,
"reward_std": 0.8993015289306641,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0626094048175301,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14524912930313416,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1102186379345533,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 187
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1459.0,
"completions/mean_length": 1401.75,
"completions/mean_terminated_length": 1303.5,
"completions/min_length": 946.0,
"completions/min_terminated_length": 946.0,
"epoch": 0.23544145272385722,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7109926581212966,
"kl": 0.0028514862060546875,
"learning_rate": 9.5170891968057e-07,
"loss": 0.0103,
"num_tokens": 9960061.0,
"reward": 0.0,
"reward_std": 0.4977339506149292,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.007690022648520695,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11369344635650466,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11279282877125754,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 188
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1497.0,
"completions/mean_length": 1399.3125,
"completions/mean_terminated_length": 1321.0,
"completions/min_length": 1270.0,
"completions/min_terminated_length": 1270.0,
"epoch": 0.2366938008766437,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.2019750861669105,
"kl": 0.0019474029541015625,
"learning_rate": 9.508189525699498e-07,
"loss": 0.0016,
"num_tokens": 10018474.0,
"reward": 0.0,
"reward_std": 0.40811485052108765,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09626746004308685,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11868608664564458,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8416666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11385500851066221,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 189
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1067.0,
"completions/mean_length": 1176.1875,
"completions/mean_terminated_length": 852.375,
"completions/min_length": 641.0,
"completions/min_terminated_length": 641.0,
"epoch": 0.23794614902943018,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0536777064675973,
"kl": 0.00238037109375,
"learning_rate": 9.499213332027676e-07,
"loss": -0.0079,
"num_tokens": 10055509.0,
"reward": -5.960464477539063e-08,
"reward_std": 0.5494594573974609,
"rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09604975311367514,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10254148239725733,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07084150279686702,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 190
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1421.0,
"completions/max_terminated_length": 1421.0,
"completions/mean_length": 980.5,
"completions/mean_terminated_length": 980.5,
"completions/min_length": 598.0,
"completions/min_terminated_length": 598.0,
"epoch": 0.23919849718221667,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.5775077184845583,
"kl": 0.003173828125,
"learning_rate": 9.490160787159716e-07,
"loss": -0.0435,
"num_tokens": 10088493.0,
"reward": 0.0,
"reward_std": 0.7993010878562927,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.017021331786918385,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08925782815695868,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5791666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10809803506625447,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 191
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1491.0,
"completions/mean_length": 1205.125,
"completions/mean_terminated_length": 1028.2000732421875,
"completions/min_length": 700.0,
"completions/min_terminated_length": 700.0,
"epoch": 0.24045084533500313,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.472937551782796,
"kl": 0.003765106201171875,
"learning_rate": 9.481032063922764e-07,
"loss": 0.0801,
"num_tokens": 10134447.0,
"reward": 0.0,
"reward_std": 0.9049590826034546,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.028540941769550358,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.047797358383350766,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10183501544346314,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 192
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1462.0,
"completions/mean_length": 1342.1875,
"completions/mean_terminated_length": 1219.4444580078125,
"completions/min_length": 823.0,
"completions/min_terminated_length": 823.0,
"epoch": 0.2417031934877896,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0654183912224107,
"kl": 0.00345611572265625,
"learning_rate": 9.471827336598332e-07,
"loss": -0.0116,
"num_tokens": 10182434.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.894692599773407,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.10903944916375954,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15533453332102554,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06540472290116196,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 193
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 1.0,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 0.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 0.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 0.0,
"epoch": 0.2429555416405761,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7779664849113415,
"kl": 0.003116607666015625,
"learning_rate": 9.462546780918966e-07,
"loss": 0.0001,
"num_tokens": 10244530.0,
"reward": 0.0,
"reward_std": 0.967013955116272,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.053086024723834134,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06887877561253418,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07888106377466157,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 194
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1470.0,
"completions/mean_length": 1454.9375,
"completions/mean_terminated_length": 1397.0001220703125,
"completions/min_length": 1265.0,
"completions/min_terminated_length": 1265.0,
"epoch": 0.24420788979336255,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1642061803468913,
"kl": 0.003826141357421875,
"learning_rate": 9.453190574064893e-07,
"loss": -0.0047,
"num_tokens": 10299345.0,
"reward": 1.862645149230957e-08,
"reward_std": 1.04762601852417,
"rewards/wordcountpos_reward_ecommerce/mean": 1.862645149230957e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04354357070732585,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08903133853741613,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08777074514725114,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 195
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1492.0,
"completions/mean_length": 1337.0625,
"completions/mean_terminated_length": 1282.75,
"completions/min_length": 999.0,
"completions/min_terminated_length": 999.0,
"epoch": 0.24546023794614902,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0019183290119758,
"kl": 0.003467559814453125,
"learning_rate": 9.443758894660638e-07,
"loss": 0.0284,
"num_tokens": 10358514.0,
"reward": 0.0,
"reward_std": 0.6315692067146301,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07697389081957594,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12645427286420413,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5875,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10671873729054746,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 196
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1413.0,
"completions/mean_length": 1494.5625,
"completions/mean_terminated_length": 1413.0,
"completions/min_length": 1413.0,
"completions/min_terminated_length": 1413.0,
"epoch": 0.2467125860989355,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8267138784190755,
"kl": 0.002933502197265625,
"learning_rate": 9.434251922771616e-07,
"loss": 0.0078,
"num_tokens": 10411171.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.884939432144165,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01747490695405262,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06365932956310252,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6416666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12141145226353543,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 197
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1418.0,
"completions/mean_length": 1410.375,
"completions/mean_terminated_length": 1261.0,
"completions/min_length": 803.0,
"completions/min_terminated_length": 803.0,
"epoch": 0.24796493425172197,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1021608965378076,
"kl": 0.003887176513671875,
"learning_rate": 9.424669839900691e-07,
"loss": 0.0143,
"num_tokens": 10469257.0,
"reward": -5.21540641784668e-08,
"reward_std": 1.061091661453247,
"rewards/wordcountpos_reward_ecommerce/mean": -5.21540641784668e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0551289409217747,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2097823559795121,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08421753138505424,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 198
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1407.0,
"completions/mean_length": 1370.9375,
"completions/mean_terminated_length": 1205.0,
"completions/min_length": 1019.0,
"completions/min_terminated_length": 1019.0,
"epoch": 0.24921728240450847,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6919185602949653,
"kl": 0.002506256103515625,
"learning_rate": 9.415012828984714e-07,
"loss": 0.0067,
"num_tokens": 10523624.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.7187443971633911,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03211836693332174,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13737955494238535,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.775,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07649739768026005,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 199
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1361.0,
"completions/mean_length": 1390.8125,
"completions/mean_terminated_length": 1281.625,
"completions/min_length": 1177.0,
"completions/min_terminated_length": 1177.0,
"epoch": 0.25046963055729493,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.450252869715986,
"kl": 0.00223541259765625,
"learning_rate": 9.405281074391022e-07,
"loss": -0.0098,
"num_tokens": 10579429.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.4103597402572632,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1406289464666968,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15985873234433481,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08933913745655643,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 200
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1486.0,
"completions/mean_length": 1295.8125,
"completions/mean_terminated_length": 1266.6429443359375,
"completions/min_length": 994.0,
"completions/min_terminated_length": 994.0,
"epoch": 0.2517219787100814,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.642709475322054,
"kl": 0.0023345947265625,
"learning_rate": 9.395474761913939e-07,
"loss": 0.014,
"num_tokens": 10628866.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.7710261940956116,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04348868814755175,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0830759853911682,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14504150108516198,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 201
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1494.0,
"completions/mean_length": 1357.125,
"completions/mean_terminated_length": 1324.1539306640625,
"completions/min_length": 1030.0,
"completions/min_terminated_length": 1030.0,
"epoch": 0.25297432686286786,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.216143536482822,
"kl": 0.004241943359375,
"learning_rate": 9.3855940787712e-07,
"loss": -0.0086,
"num_tokens": 10670092.0,
"reward": 0.0,
"reward_std": 0.6420686841011047,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03687807737633173,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16934247164490465,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14343665526661611,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327954292297363,
"step": 202
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1434.0,
"completions/mean_length": 1278.625,
"completions/mean_terminated_length": 1227.5384521484375,
"completions/min_length": 970.0,
"completions/min_terminated_length": 970.0,
"epoch": 0.2542266750156543,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.4244156852349814,
"kl": 0.0052642822265625,
"learning_rate": 9.375639213600401e-07,
"loss": -0.0436,
"num_tokens": 10728350.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.9011333584785461,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09818030402455966,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07523729893672071,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13080944580232393,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 203
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1490.0,
"completions/mean_length": 1443.9375,
"completions/mean_terminated_length": 1371.857177734375,
"completions/min_length": 1252.0,
"completions/min_terminated_length": 1252.0,
"epoch": 0.25547902316844084,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.5077076210411278,
"kl": 0.00457000732421875,
"learning_rate": 9.365610356455384e-07,
"loss": 0.0019,
"num_tokens": 10791365.0,
"reward": 4.470348358154297e-08,
"reward_std": 0.8847507238388062,
"rewards/wordcountpos_reward_ecommerce/mean": 4.470348358154297e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.015580215905333485,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06751943458738671,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.135263802609184,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 204
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1351.0,
"completions/mean_length": 1391.8125,
"completions/mean_terminated_length": 1067.25,
"completions/min_length": 264.0,
"completions/min_terminated_length": 264.0,
"epoch": 0.2567313713212273,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.827913375094597,
"kl": 0.003643035888671875,
"learning_rate": 9.355507698802613e-07,
"loss": -0.0786,
"num_tokens": 10852330.0,
"reward": 7.450580596923828e-09,
"reward_std": 1.0562589168548584,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09460135777577211,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12653992925605045,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11122216672215289,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 205
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1397.0,
"completions/mean_length": 1426.6875,
"completions/mean_terminated_length": 1265.4000244140625,
"completions/min_length": 1092.0,
"completions/min_terminated_length": 1092.0,
"epoch": 0.2579837194740138,
"frac_reward_zero_std": 0.0,
"grad_norm": 4.109416757035146,
"kl": 0.00577545166015625,
"learning_rate": 9.345331433517522e-07,
"loss": 0.0289,
"num_tokens": 10918837.0,
"reward": -1.4901161193847656e-08,
"reward_std": 0.9729784727096558,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04706903609226349,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08099201475868337,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11917929226045818,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 206
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1478.0,
"completions/mean_length": 1464.1875,
"completions/mean_terminated_length": 1356.75,
"completions/min_length": 1188.0,
"completions/min_terminated_length": 1188.0,
"epoch": 0.25923606762680024,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1385572152974905,
"kl": 0.003864288330078125,
"learning_rate": 9.335081754880825e-07,
"loss": 0.0082,
"num_tokens": 10974608.0,
"reward": -5.960464477539063e-08,
"reward_std": 0.5515385270118713,
"rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06366384054522155,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10778487016156474,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8291666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1002773930432755,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 207
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1487.0,
"completions/mean_length": 1448.625,
"completions/mean_terminated_length": 1363.0,
"completions/min_length": 1103.0,
"completions/min_terminated_length": 1103.0,
"epoch": 0.2604884157795867,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2758893957680097,
"kl": 0.004230499267578125,
"learning_rate": 9.32475885857481e-07,
"loss": -0.0053,
"num_tokens": 11033482.0,
"reward": 0.0,
"reward_std": 0.5894155502319336,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05038277241462744,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07116397984833597,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07097208632298363,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 208
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1463.0,
"completions/mean_length": 1455.75,
"completions/mean_terminated_length": 1264.0,
"completions/min_length": 1089.0,
"completions/min_terminated_length": 1089.0,
"epoch": 0.2617407639323732,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6461488967254634,
"kl": 0.0025310516357421875,
"learning_rate": 9.31436294167961e-07,
"loss": -0.0132,
"num_tokens": 11098902.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.6484573483467102,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1468978313797672,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24420746920563674,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1164283279771532,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 209
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1413.0,
"completions/mean_length": 1237.1875,
"completions/mean_terminated_length": 1079.5,
"completions/min_length": 742.0,
"completions/min_terminated_length": 742.0,
"epoch": 0.2629931120851597,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.313685913897086,
"kl": 0.003513336181640625,
"learning_rate": 9.303894202669428e-07,
"loss": 0.0531,
"num_tokens": 11148649.0,
"reward": -1.1175870895385742e-08,
"reward_std": 0.990402102470398,
"rewards/wordcountpos_reward_ecommerce/mean": -1.1175870895385742e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0009552414586071921,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0038209658344287682,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333333,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 210
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1433.0,
"completions/mean_length": 1327.125,
"completions/mean_terminated_length": 1154.25,
"completions/min_length": 970.0,
"completions/min_terminated_length": 970.0,
"epoch": 0.26424546023794615,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0546042142648298,
"kl": 0.0033416748046875,
"learning_rate": 9.293352841408759e-07,
"loss": -0.0213,
"num_tokens": 11207483.0,
"reward": 0.0,
"reward_std": 0.5952367186546326,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.035691884267146166,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07657424493915134,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.16324260518672248,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 211
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1375.0,
"completions/mean_length": 1306.25,
"completions/mean_terminated_length": 1190.0,
"completions/min_length": 1071.0,
"completions/min_terminated_length": 1071.0,
"epoch": 0.2654978083907326,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.123766104646228,
"kl": 0.0015621185302734375,
"learning_rate": 9.282739059148566e-07,
"loss": -0.0237,
"num_tokens": 11255703.0,
"reward": 0.0,
"reward_std": 0.9929344654083252,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.025706850415670862,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11233922174981649,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07685966046898342,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 212
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1287.0,
"completions/mean_length": 1126.125,
"completions/mean_terminated_length": 956.1818237304688,
"completions/min_length": 674.0,
"completions/min_terminated_length": 674.0,
"epoch": 0.2667501565435191,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.488984850056713,
"kl": 0.0038604736328125,
"learning_rate": 9.272053058522444e-07,
"loss": -0.0253,
"num_tokens": 11294505.0,
"reward": 0.0,
"reward_std": 0.5554646253585815,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.23003407087469527,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20616326736471785,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6291666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15000000000000002,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 213
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1279.0,
"completions/max_terminated_length": 1279.0,
"completions/mean_length": 883.5,
"completions/mean_terminated_length": 883.5,
"completions/min_length": 673.0,
"completions/min_terminated_length": 673.0,
"epoch": 0.2680025046963056,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2969967424976456,
"kl": 0.002185821533203125,
"learning_rate": 9.261295043542747e-07,
"loss": 0.0085,
"num_tokens": 11325305.0,
"reward": 2.421438694000244e-08,
"reward_std": 1.039635419845581,
"rewards/wordcountpos_reward_ecommerce/mean": 2.421438694000244e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06031083797758491,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16643314604295306,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11603000888978231,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 214
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1499.0,
"completions/mean_length": 1423.8125,
"completions/mean_terminated_length": 1296.8333740234375,
"completions/min_length": 987.0,
"completions/min_terminated_length": 987.0,
"epoch": 0.26925485284909206,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.683342974010468,
"kl": 0.0028839111328125,
"learning_rate": 9.250465219596699e-07,
"loss": 0.0,
"num_tokens": 11384166.0,
"reward": 0.0,
"reward_std": 0.6987115144729614,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.015736024702926166,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06158481768754947,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1057600358603626,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 215
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1390.0,
"completions/max_terminated_length": 1390.0,
"completions/mean_length": 1160.6875,
"completions/mean_terminated_length": 1160.6875,
"completions/min_length": 865.0,
"completions/min_terminated_length": 865.0,
"epoch": 0.27050720100187853,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.800146584559223,
"kl": 0.004093170166015625,
"learning_rate": 9.239563793442462e-07,
"loss": 0.0174,
"num_tokens": 11441313.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.8806728720664978,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0032805949907051112,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0510781770746922,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11894598836509009,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 216
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1450.0,
"completions/mean_length": 1307.0625,
"completions/mean_terminated_length": 1279.5,
"completions/min_length": 985.0,
"completions/min_terminated_length": 985.0,
"epoch": 0.271759549154665,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.806350656696486,
"kl": 0.002574920654296875,
"learning_rate": 9.228590973205201e-07,
"loss": -0.0377,
"num_tokens": 11499258.0,
"reward": -7.450580596923828e-09,
"reward_std": 1.0440177917480469,
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0614237528104428,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06944213481803516,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09953596037316066,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 217
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 1.0,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 0.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 0.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 0.0,
"epoch": 0.27301189730745146,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.4434394608429915,
"kl": 0.002834320068359375,
"learning_rate": 9.2175469683731e-07,
"loss": 0.0001,
"num_tokens": 11554162.0,
"reward": 0.0,
"reward_std": 0.9512232542037964,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.006459758393578777,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09841534495892398,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7875,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13601470508735444,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 218
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1378.0,
"completions/mean_length": 1139.4375,
"completions/mean_terminated_length": 1115.4000244140625,
"completions/min_length": 793.0,
"completions/min_terminated_length": 793.0,
"epoch": 0.2742642454602379,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.183780267914974,
"kl": 0.0016641616821289062,
"learning_rate": 9.206431989793374e-07,
"loss": 0.0171,
"num_tokens": 11599913.0,
"reward": 0.0,
"reward_std": 0.7830429077148438,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.006944415247763777,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03283008559006156,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 219
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1295.0,
"completions/mean_length": 1346.5,
"completions/mean_terminated_length": 1149.1429443359375,
"completions/min_length": 1004.0,
"completions/min_terminated_length": 1004.0,
"epoch": 0.27551659361302444,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.3703074221103817,
"kl": 0.00464630126953125,
"learning_rate": 9.195246249668232e-07,
"loss": -0.0007,
"num_tokens": 11664265.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.7190686464309692,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.3476598454237376,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.4301665677025463,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08255189164891873,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 220
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1436.0,
"completions/mean_length": 1309.5625,
"completions/mean_terminated_length": 1119.125,
"completions/min_length": 955.0,
"completions/min_terminated_length": 955.0,
"epoch": 0.2767689417658109,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.622730015507812,
"kl": 0.0024242401123046875,
"learning_rate": 9.183989961550832e-07,
"loss": -0.0219,
"num_tokens": 11719922.0,
"reward": 0.0,
"reward_std": 0.72877037525177,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0952471076969717,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12586824643040787,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08606629658238704,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 221
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1392.0,
"completions/mean_length": 1485.875,
"completions/mean_terminated_length": 1387.0,
"completions/min_length": 1382.0,
"completions/min_terminated_length": 1382.0,
"epoch": 0.27802128991859737,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.782237650167012,
"kl": 0.004253387451171875,
"learning_rate": 9.172663340341204e-07,
"loss": -0.0028,
"num_tokens": 11778680.0,
"reward": 0.0,
"reward_std": 1.0009106397628784,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.012701224890322388,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.02815604341593864,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7124999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408155,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 222
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1462.0,
"completions/mean_length": 1443.0625,
"completions/mean_terminated_length": 1317.800048828125,
"completions/min_length": 1121.0,
"completions/min_terminated_length": 1121.0,
"epoch": 0.27927363807138383,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.998694336626448,
"kl": 0.003566741943359375,
"learning_rate": 9.161266602282147e-07,
"loss": -0.0055,
"num_tokens": 11838169.0,
"reward": 0.0,
"reward_std": 0.9211122989654541,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04490957636365446,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09152261044011904,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10741060020797315,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 223
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1450.0,
"completions/mean_length": 1488.875,
"completions/mean_terminated_length": 1411.0,
"completions/min_length": 1372.0,
"completions/min_terminated_length": 1372.0,
"epoch": 0.2805259862241703,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9658149533875187,
"kl": 0.004146575927734375,
"learning_rate": 9.149799964955093e-07,
"loss": 0.008,
"num_tokens": 11899975.0,
"reward": -3.725290298461914e-09,
"reward_std": 1.0432794094085693,
"rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.012174573886332358,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04195711207506097,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07685966046898342,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 224
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1359.0,
"completions/mean_length": 1409.8125,
"completions/mean_terminated_length": 1139.25,
"completions/min_length": 925.0,
"completions/min_terminated_length": 925.0,
"epoch": 0.2817783343769568,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.936718458392348,
"kl": 0.00342559814453125,
"learning_rate": 9.138263647275969e-07,
"loss": -0.0033,
"num_tokens": 11941164.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.0000627040863037,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.15080494449355206,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08565387051258783,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09339283817414601,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 225
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1475.0,
"completions/mean_length": 1183.3125,
"completions/mean_terminated_length": 1110.2308349609375,
"completions/min_length": 786.0,
"completions/min_terminated_length": 786.0,
"epoch": 0.2830306825297433,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.658827402816175,
"kl": 0.00414276123046875,
"learning_rate": 9.126657869491e-07,
"loss": 0.0126,
"num_tokens": 11992657.0,
"reward": 0.0,
"reward_std": 0.9479507207870483,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.017415102975537073,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.031912571116253466,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07649739768026002,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 226
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1484.0,
"completions/mean_length": 1499.0,
"completions/mean_terminated_length": 1484.0,
"completions/min_length": 1484.0,
"completions/min_terminated_length": 1484.0,
"epoch": 0.28428303068252975,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1261193198966652,
"kl": 0.0041351318359375,
"learning_rate": 9.114982853172521e-07,
"loss": 0.0009,
"num_tokens": 12054529.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.0018526315689087,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.012434236974245455,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045472914513713596,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10292032157252812,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 227
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 1.0,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 0.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 0.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 0.0,
"epoch": 0.2855353788353162,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.4904650551200485,
"kl": 0.00292205810546875,
"learning_rate": 9.103238821214727e-07,
"loss": 0.0001,
"num_tokens": 12114017.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.4001474976539612,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.19495499044861478,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.26273237351903383,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.059628479399994425,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 228
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1329.0,
"completions/mean_length": 1472.3125,
"completions/mean_terminated_length": 1278.5,
"completions/min_length": 1228.0,
"completions/min_terminated_length": 1228.0,
"epoch": 0.2867877269881027,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9738846657010085,
"kl": 0.00360107421875,
"learning_rate": 9.09142599782944e-07,
"loss": -0.0048,
"num_tokens": 12167838.0,
"reward": 0.0,
"reward_std": 0.981914758682251,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.2657549523204851,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.31826899071497716,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6333333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08777074514725108,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 229
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1488.0,
"completions/mean_length": 1397.75,
"completions/mean_terminated_length": 1266.2857666015625,
"completions/min_length": 987.0,
"completions/min_terminated_length": 987.0,
"epoch": 0.28804007514088914,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9008571656686524,
"kl": 0.003810882568359375,
"learning_rate": 9.07954460854181e-07,
"loss": -0.0435,
"num_tokens": 12219114.0,
"reward": -1.4901161193847656e-08,
"reward_std": 0.9752408266067505,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 1.7226310978600795e-05,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.017137695280743562,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08255189164891871,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 230
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1462.0,
"completions/mean_length": 1381.0625,
"completions/mean_terminated_length": 1262.125,
"completions/min_length": 1118.0,
"completions/min_terminated_length": 1118.0,
"epoch": 0.28929242329367566,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.579522085731086,
"kl": 0.00286102294921875,
"learning_rate": 9.067594880186016e-07,
"loss": 0.0118,
"num_tokens": 12283627.0,
"reward": 0.0,
"reward_std": 0.8155025839805603,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.4072348540230938,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.33458166056964905,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11474609652039004,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 231
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1402.0,
"completions/mean_length": 1432.375,
"completions/mean_terminated_length": 1229.5,
"completions/min_length": 1085.0,
"completions/min_terminated_length": 1085.0,
"epoch": 0.2905447714464621,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.3504781665775463,
"kl": 0.00449371337890625,
"learning_rate": 9.055577040900944e-07,
"loss": 0.0198,
"num_tokens": 12334705.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.0079009532928467,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.007641388631451263,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1447140199531734,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1067187372905475,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 232
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1439.0,
"completions/mean_length": 1411.4375,
"completions/mean_terminated_length": 1322.875,
"completions/min_length": 1169.0,
"completions/min_terminated_length": 1169.0,
"epoch": 0.2917971195992486,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.3485586422127005,
"kl": 0.004638671875,
"learning_rate": 9.043491320125814e-07,
"loss": 0.0213,
"num_tokens": 12389648.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.8743376731872559,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.002227354544120855,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08926616854117143,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13109227736669002,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 233
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1492.0,
"completions/mean_length": 1474.0,
"completions/mean_terminated_length": 1430.666748046875,
"completions/min_length": 1263.0,
"completions/min_terminated_length": 1263.0,
"epoch": 0.29304946775203505,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8494643618437947,
"kl": 0.00304412841796875,
"learning_rate": 9.031337948595817e-07,
"loss": 0.0093,
"num_tokens": 12456272.0,
"reward": 0.0,
"reward_std": 0.7424121499061584,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04108305878098174,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1161369232371233,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408158,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 234
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1477.0,
"completions/mean_length": 1141.4375,
"completions/mean_terminated_length": 1117.533447265625,
"completions/min_length": 557.0,
"completions/min_terminated_length": 557.0,
"epoch": 0.2943018159048215,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.2293142678990754,
"kl": 0.0016422271728515625,
"learning_rate": 9.019117158337695e-07,
"loss": 0.0038,
"num_tokens": 12498031.0,
"reward": -5.960464477539063e-08,
"reward_std": 0.6336873769760132,
"rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.009688556708469433,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05746171503021093,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1529342632927262,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 235
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1360.0,
"completions/mean_length": 1298.0,
"completions/mean_terminated_length": 1140.888916015625,
"completions/min_length": 942.0,
"completions/min_terminated_length": 942.0,
"epoch": 0.29555416405760804,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.714021525456579,
"kl": 0.00262451171875,
"learning_rate": 9.006829182665325e-07,
"loss": -0.0167,
"num_tokens": 12548119.0,
"reward": 0.0,
"reward_std": 0.6711900234222412,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07748680022506171,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09355524405080126,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09953596037316063,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 236
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1489.0,
"completions/mean_length": 1300.5625,
"completions/mean_terminated_length": 1254.5384521484375,
"completions/min_length": 1067.0,
"completions/min_terminated_length": 1067.0,
"epoch": 0.2968065122103945,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.982742616917611,
"kl": 0.003223419189453125,
"learning_rate": 8.99447425617525e-07,
"loss": 0.0208,
"num_tokens": 12596288.0,
"reward": 7.450580596923828e-09,
"reward_std": 1.049065351486206,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.24719836974150322,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.26811306631065646,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08850612031567837,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 237
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1490.0,
"completions/mean_length": 1029.3125,
"completions/mean_terminated_length": 997.9334106445312,
"completions/min_length": 658.0,
"completions/min_terminated_length": 658.0,
"epoch": 0.29805886036318097,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8416489290807947,
"kl": 0.002620697021484375,
"learning_rate": 8.982052614742218e-07,
"loss": 0.011,
"num_tokens": 12642901.0,
"reward": 0.0,
"reward_std": 1.0193631649017334,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07904007503321656,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05617218071571685,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08243965245133134,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 238
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1465.0,
"completions/mean_length": 1096.1875,
"completions/mean_terminated_length": 782.1111450195312,
"completions/min_length": 444.0,
"completions/min_terminated_length": 444.0,
"epoch": 0.29931120851596743,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.052640590300802,
"kl": 0.003078460693359375,
"learning_rate": 8.96956449551466e-07,
"loss": 0.0293,
"num_tokens": 12685520.0,
"reward": 3.725290298461914e-08,
"reward_std": 1.0355110168457031,
"rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0007223476637822487,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045707258037314374,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6416666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1164283279771532,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 239
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1352.0,
"completions/mean_length": 1176.375,
"completions/mean_terminated_length": 1101.6923828125,
"completions/min_length": 795.0,
"completions/min_terminated_length": 795.0,
"epoch": 0.3005635566687539,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.409137882904874,
"kl": 0.00447845458984375,
"learning_rate": 8.957010136910177e-07,
"loss": 0.0027,
"num_tokens": 12732478.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.0500978231430054,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03744221002235665,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07008909373099989,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09574271077563382,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 240
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1235.0,
"completions/mean_length": 1412.4375,
"completions/mean_terminated_length": 799.5,
"completions/min_length": 364.0,
"completions/min_terminated_length": 364.0,
"epoch": 0.3018159048215404,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.773019820684676,
"kl": 0.003604888916015625,
"learning_rate": 8.944389778610978e-07,
"loss": -0.0118,
"num_tokens": 12801637.0,
"reward": -2.9802322387695312e-08,
"reward_std": 1.0457574129104614,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0662282436201746,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07940471297587236,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.22273551829717486,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 241
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1189.0,
"completions/mean_length": 1244.8125,
"completions/mean_terminated_length": 989.625,
"completions/min_length": 844.0,
"completions/min_terminated_length": 844.0,
"epoch": 0.3030682529743269,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2644500704934876,
"kl": 0.00386810302734375,
"learning_rate": 8.931703661559313e-07,
"loss": -0.0143,
"num_tokens": 12856914.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.8334095478057861,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.056274055481427915,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06634909249021953,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07888106377466157,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 242
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1470.0,
"completions/mean_length": 1456.9375,
"completions/mean_terminated_length": 1327.75,
"completions/min_length": 1033.0,
"completions/min_terminated_length": 1033.0,
"epoch": 0.30432060112711334,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6637299937398455,
"kl": 0.003570556640625,
"learning_rate": 8.918952027952867e-07,
"loss": 0.0284,
"num_tokens": 12917977.0,
"reward": 0.0,
"reward_std": 0.6500852704048157,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.022227592869964712,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.035807130460280175,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6375,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.16324260518672246,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 243
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1087.0,
"completions/mean_length": 1474.1875,
"completions/mean_terminated_length": 1087.0,
"completions/min_length": 1087.0,
"completions/min_terminated_length": 1087.0,
"epoch": 0.3055729492798998,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9975900171544794,
"kl": 0.003841400146484375,
"learning_rate": 8.906135121240139e-07,
"loss": -0.0025,
"num_tokens": 12975724.0,
"reward": 7.450580596923828e-09,
"reward_std": 1.0467472076416016,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.042561575382490995,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12106724719901756,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042258,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 244
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1411.0,
"completions/mean_length": 1228.9375,
"completions/mean_terminated_length": 1066.300048828125,
"completions/min_length": 227.0,
"completions/min_terminated_length": 227.0,
"epoch": 0.3068252974326863,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9921353466243374,
"kl": 0.00344085693359375,
"learning_rate": 8.89325318611579e-07,
"loss": -0.1088,
"num_tokens": 13028715.0,
"reward": 0.0,
"reward_std": 0.8063486218452454,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04616985070885913,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.17900764914436168,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0718795288428261,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 245
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1489.0,
"completions/mean_length": 1367.3125,
"completions/mean_terminated_length": 1307.0,
"completions/min_length": 1073.0,
"completions/min_terminated_length": 1073.0,
"epoch": 0.30807764558547274,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.02852662262937,
"kl": 0.00337982177734375,
"learning_rate": 8.880306468515979e-07,
"loss": 0.0285,
"num_tokens": 13077528.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.4837535619735718,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.010209232644034694,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1481102929172379,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.16947631758514883,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 246
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1491.0,
"completions/mean_length": 1327.5625,
"completions/mean_terminated_length": 1270.0833740234375,
"completions/min_length": 1026.0,
"completions/min_terminated_length": 1026.0,
"epoch": 0.30932999373825926,
"frac_reward_zero_std": 0.0,
"grad_norm": 5.433838129064804,
"kl": 0.009929656982421875,
"learning_rate": 8.867295215613659e-07,
"loss": 0.0288,
"num_tokens": 13145409.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.691638708114624,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06467589999789795,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0938792951394418,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0824396524513313,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 247
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1490.0,
"completions/mean_length": 1467.9375,
"completions/mean_terminated_length": 1397.4000244140625,
"completions/min_length": 1284.0,
"completions/min_terminated_length": 1284.0,
"epoch": 0.3105823418910457,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9888443937256404,
"kl": 0.0040283203125,
"learning_rate": 8.85421967581386e-07,
"loss": 0.0184,
"num_tokens": 13198848.0,
"reward": -2.2351741790771484e-08,
"reward_std": 0.9693495035171509,
"rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.032938770819161474,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.162768145864506,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.675,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1803289175881631,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 248
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1483.0,
"completions/mean_length": 1498.9375,
"completions/mean_terminated_length": 1483.0,
"completions/min_length": 1483.0,
"completions/min_terminated_length": 1483.0,
"epoch": 0.3118346900438322,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.721188390535492,
"kl": 0.003574371337890625,
"learning_rate": 8.841080098748959e-07,
"loss": 0.0006,
"num_tokens": 13257207.0,
"reward": 0.0,
"reward_std": 0.9934348464012146,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.054418946541605284,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14760181642272932,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12345339501504504,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 249
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1351.0,
"completions/mean_length": 1359.3125,
"completions/mean_terminated_length": 1178.4285888671875,
"completions/min_length": 890.0,
"completions/min_terminated_length": 890.0,
"epoch": 0.31308703819661865,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2668088351359708,
"kl": 0.005401611328125,
"learning_rate": 8.827876735273893e-07,
"loss": -0.03,
"num_tokens": 13314820.0,
"reward": -3.725290298461914e-09,
"reward_std": 1.0606722831726074,
"rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1087294165966756,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07396732734066605,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.775,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10000000000000002,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 250
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1458.0,
"completions/mean_length": 1409.3125,
"completions/mean_terminated_length": 1258.166748046875,
"completions/min_length": 1036.0,
"completions/min_terminated_length": 1036.0,
"epoch": 0.3143393863494051,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0088889020769733,
"kl": 0.003604888916015625,
"learning_rate": 8.814609837461385e-07,
"loss": 0.0432,
"num_tokens": 13381449.0,
"reward": 0.0,
"reward_std": 0.6518675088882446,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.005307542092858496,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.017828779266863094,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1112221667221529,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 251
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1472.0,
"completions/mean_length": 1289.9375,
"completions/mean_terminated_length": 1126.5555419921875,
"completions/min_length": 1031.0,
"completions/min_terminated_length": 1031.0,
"epoch": 0.31559173450219163,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.2400933230709956,
"kl": 0.0020122528076171875,
"learning_rate": 8.801279658597131e-07,
"loss": 0.0011,
"num_tokens": 13430872.0,
"reward": 0.0,
"reward_std": 0.8595645427703857,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12188488436675578,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.32049499716061297,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901162,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 252
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1234.0,
"completions/mean_length": 1483.375,
"completions/mean_terminated_length": 1234.0,
"completions/min_length": 1234.0,
"completions/min_terminated_length": 1234.0,
"epoch": 0.3168440826549781,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0575565927809905,
"kl": 0.004093170166015625,
"learning_rate": 8.787886453174951e-07,
"loss": -0.0053,
"num_tokens": 13479446.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.9997775554656982,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.051598953607968394,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06761287588738078,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12583057392117916,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 253
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1317.0,
"completions/max_terminated_length": 1317.0,
"completions/mean_length": 1068.0625,
"completions/mean_terminated_length": 1068.0625,
"completions/min_length": 758.0,
"completions/min_terminated_length": 758.0,
"epoch": 0.31809643080776456,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.56780625703575,
"kl": 0.00376129150390625,
"learning_rate": 8.77443047689195e-07,
"loss": -0.0249,
"num_tokens": 13534791.0,
"reward": 3.725290298461914e-09,
"reward_std": 1.0647456645965576,
"rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12037176129735677,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15720532676467985,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08681611046941137,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 254
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1149.0,
"completions/mean_length": 1264.8125,
"completions/mean_terminated_length": 1029.625,
"completions/min_length": 999.0,
"completions/min_terminated_length": 999.0,
"epoch": 0.319348778960551,
"frac_reward_zero_std": 0.0,
"grad_norm": 1.9002914944914675,
"kl": 0.001689910888671875,
"learning_rate": 8.760911986643621e-07,
"loss": 0.0079,
"num_tokens": 13585044.0,
"reward": 7.450580596923828e-09,
"reward_std": 1.0633113384246826,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11099520216632296,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11289406797895053,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11013459778666118,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 255
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1356.0,
"completions/max_terminated_length": 1356.0,
"completions/mean_length": 1039.625,
"completions/mean_terminated_length": 1039.625,
"completions/min_length": 816.0,
"completions/min_terminated_length": 816.0,
"epoch": 0.3206011271133375,
"frac_reward_zero_std": 0.0,
"grad_norm": 1.7437212883045385,
"kl": 0.0007447004318237305,
"learning_rate": 8.747331240518946e-07,
"loss": -0.0359,
"num_tokens": 13622654.0,
"reward": 0.0,
"reward_std": 0.80560302734375,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05283560581406991,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1274858045865064,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11213417888437975,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 256
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1468.0,
"completions/mean_length": 1147.25,
"completions/mean_terminated_length": 1123.7333984375,
"completions/min_length": 871.0,
"completions/min_terminated_length": 871.0,
"epoch": 0.32185347526612396,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.829288365048447,
"kl": 0.00440216064453125,
"learning_rate": 8.73368849779547e-07,
"loss": -0.0586,
"num_tokens": 13666658.0,
"reward": 2.2351741790771484e-08,
"reward_std": 1.0113918781280518,
"rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.004399012913845209,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.022002579783276802,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15104573749303493,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 257
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1453.0,
"completions/mean_length": 1205.0,
"completions/mean_terminated_length": 1136.923095703125,
"completions/min_length": 934.0,
"completions/min_terminated_length": 934.0,
"epoch": 0.3231058234189105,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.5508475650699274,
"kl": 0.00417327880859375,
"learning_rate": 8.719984018934348e-07,
"loss": -0.0198,
"num_tokens": 13713002.0,
"reward": -7.450580596923828e-09,
"reward_std": 0.9411071538925171,
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.049397690395078006,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14628546425305664,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13381856152046848,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 258
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1379.0,
"completions/mean_length": 1393.9375,
"completions/mean_terminated_length": 1287.875,
"completions/min_length": 1155.0,
"completions/min_terminated_length": 1155.0,
"epoch": 0.32435817157169694,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1259216947519164,
"kl": 0.004150390625,
"learning_rate": 8.706218065575374e-07,
"loss": 0.0051,
"num_tokens": 13765289.0,
"reward": -5.960464477539063e-08,
"reward_std": 0.7700310945510864,
"rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011427243535616135,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12374645217812205,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8458333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13045504405165223,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 259
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1404.0,
"completions/mean_length": 1161.25,
"completions/mean_terminated_length": 1007.2727661132812,
"completions/min_length": 872.0,
"completions/min_terminated_length": 872.0,
"epoch": 0.3256105197244834,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.3286130814036103,
"kl": 0.00212058424949646,
"learning_rate": 8.692390900531985e-07,
"loss": 0.0569,
"num_tokens": 13819269.0,
"reward": 0.0,
"reward_std": 0.45210930705070496,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.23343450769100488,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.33023521153193414,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14981470036162822,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 260
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1162.0,
"completions/max_terminated_length": 1162.0,
"completions/mean_length": 974.1875,
"completions/mean_terminated_length": 974.1875,
"completions/min_length": 675.0,
"completions/min_terminated_length": 675.0,
"epoch": 0.32686286787726987,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.6973290112944848,
"kl": 0.0045013427734375,
"learning_rate": 8.678502787786249e-07,
"loss": -0.0481,
"num_tokens": 13849256.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.8221656084060669,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01926574676180823,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.028234090328970243,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042256,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 261
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1350.0,
"completions/mean_length": 1254.3125,
"completions/mean_terminated_length": 1106.9000244140625,
"completions/min_length": 850.0,
"completions/min_terminated_length": 850.0,
"epoch": 0.32811521603005633,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8513068805319826,
"kl": 0.003444671630859375,
"learning_rate": 8.664553992483812e-07,
"loss": -0.0294,
"num_tokens": 13886621.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.8134012818336487,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04343925396813008,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08880475360320686,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11979921473804349,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 262
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1419.0,
"completions/mean_length": 1267.5,
"completions/mean_terminated_length": 1161.8182373046875,
"completions/min_length": 936.0,
"completions/min_terminated_length": 936.0,
"epoch": 0.32936756418284285,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8893558213524178,
"kl": 0.003467559814453125,
"learning_rate": 8.650544780928851e-07,
"loss": -0.0196,
"num_tokens": 13935477.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.748847246170044,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03399978669526769,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0913917502530681,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8291666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1060223596263578,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 263
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1467.0,
"completions/mean_length": 1393.75,
"completions/mean_terminated_length": 1287.5,
"completions/min_length": 1085.0,
"completions/min_terminated_length": 1085.0,
"epoch": 0.3306199123356293,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2549999172638953,
"kl": 0.00475311279296875,
"learning_rate": 8.63647542057898e-07,
"loss": -0.0273,
"num_tokens": 13998809.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.9957271814346313,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.032845331546287986,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1426354161680431,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08255189164891871,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 264
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1476.0,
"completions/mean_length": 1465.625,
"completions/mean_terminated_length": 1316.666748046875,
"completions/min_length": 1156.0,
"completions/min_terminated_length": 1156.0,
"epoch": 0.3318722604884158,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.761811279986975,
"kl": 0.004451751708984375,
"learning_rate": 8.622346180040149e-07,
"loss": 0.0022,
"num_tokens": 14063899.0,
"reward": 0.0,
"reward_std": 0.9594628810882568,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.003469042362222641,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06436545386363138,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11917929226045819,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 265
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1351.0,
"completions/mean_length": 1490.6875,
"completions/mean_terminated_length": 1351.0,
"completions/min_length": 1351.0,
"completions/min_terminated_length": 1351.0,
"epoch": 0.33312460864120225,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.812294289039839,
"kl": 0.00394439697265625,
"learning_rate": 8.608157329061513e-07,
"loss": -0.0088,
"num_tokens": 14117462.0,
"reward": 0.0,
"reward_std": 0.8838216066360474,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.040579408270268943,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06117251081342495,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10809803506625451,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 266
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1498.0,
"completions/mean_length": 1466.375,
"completions/mean_terminated_length": 1410.3333740234375,
"completions/min_length": 1160.0,
"completions/min_terminated_length": 1160.0,
"epoch": 0.3343769567939887,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1377512293638716,
"kl": 0.00469207763671875,
"learning_rate": 8.59390913853028e-07,
"loss": 0.0227,
"num_tokens": 14167892.0,
"reward": 0.0,
"reward_std": 0.5932345390319824,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.095152127303474,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13689770081097544,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 1.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10886621079036349,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 267
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 1.0,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 0.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 0.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 0.0,
"epoch": 0.33562930494677523,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.006613104134553,
"kl": 0.004669189453125,
"learning_rate": 8.579601880466547e-07,
"loss": 0.0002,
"num_tokens": 14229372.0,
"reward": 0.0,
"reward_std": 1.0337638854980469,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04680772992368523,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14029739799038618,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10886621079036349,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 268
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1468.0,
"completions/mean_length": 1392.9375,
"completions/mean_terminated_length": 1214.5,
"completions/min_length": 1000.0,
"completions/min_terminated_length": 1000.0,
"epoch": 0.3368816530995617,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7703650077113675,
"kl": 0.0036773681640625,
"learning_rate": 8.565235828018099e-07,
"loss": 0.0013,
"num_tokens": 14289123.0,
"reward": 0.0,
"reward_std": 1.0367697477340698,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07296543210522512,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07812168004547569,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078611,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 269
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1487.0,
"completions/mean_length": 1464.5625,
"completions/mean_terminated_length": 1358.25,
"completions/min_length": 1214.0,
"completions/min_terminated_length": 1214.0,
"epoch": 0.33813400125234816,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.773518375849927,
"kl": 0.0041961669921875,
"learning_rate": 8.550811255455198e-07,
"loss": -0.0021,
"num_tokens": 14352892.0,
"reward": -2.2351741790771484e-08,
"reward_std": 0.9712283611297607,
"rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06869379781464208,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0930651391561654,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12881223774390613,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 270
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 1.0,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 0.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 0.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 0.0,
"epoch": 0.3393863494051346,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.79098774129514,
"kl": 0.003692626953125,
"learning_rate": 8.536328438165346e-07,
"loss": 0.0001,
"num_tokens": 14414740.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.0314404964447021,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0126400376983615,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11352147882865961,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07252075054258102,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 271
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1366.0,
"completions/mean_length": 1491.625,
"completions/mean_terminated_length": 1366.0,
"completions/min_length": 1366.0,
"completions/min_terminated_length": 1366.0,
"epoch": 0.3406386975579211,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.3328071582670127,
"kl": 0.002582550048828125,
"learning_rate": 8.521787652648026e-07,
"loss": -0.0005,
"num_tokens": 14475390.0,
"reward": -2.9802322387695312e-08,
"reward_std": 1.066014051437378,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04816321266725149,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.23143656867957818,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10292032157252809,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 272
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1274.0,
"completions/max_terminated_length": 1274.0,
"completions/mean_length": 966.0,
"completions/mean_terminated_length": 966.0,
"completions/min_length": 818.0,
"completions/min_terminated_length": 818.0,
"epoch": 0.34189104571070755,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.623716761400387,
"kl": 0.00237274169921875,
"learning_rate": 8.507189176509429e-07,
"loss": 0.0118,
"num_tokens": 14519830.0,
"reward": 0.0,
"reward_std": 0.8626605868339539,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09591776756938776,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0561472451616448,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460884,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 273
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1356.0,
"completions/mean_length": 1441.8125,
"completions/mean_terminated_length": 1267.25,
"completions/min_length": 1188.0,
"completions/min_terminated_length": 1188.0,
"epoch": 0.3431433938634941,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6860149670271087,
"kl": 0.003993988037109375,
"learning_rate": 8.492533288457142e-07,
"loss": 0.0176,
"num_tokens": 14562059.0,
"reward": 0.0,
"reward_std": 0.7108601331710815,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08617312005850387,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09760563193409819,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10878112581387149,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 274
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1485.0,
"completions/mean_length": 1495.3125,
"completions/mean_terminated_length": 1462.5,
"completions/min_length": 1440.0,
"completions/min_terminated_length": 1440.0,
"epoch": 0.34439574201628054,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.40909669375685,
"kl": 0.00632476806640625,
"learning_rate": 8.477820268294844e-07,
"loss": 0.0006,
"num_tokens": 14626280.0,
"reward": 0.0,
"reward_std": 0.9581431150436401,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0518540297916247,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05711745364940273,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07876359377087681,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 275
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1494.0,
"completions/mean_length": 1498.8125,
"completions/mean_terminated_length": 1490.5,
"completions/min_length": 1487.0,
"completions/min_terminated_length": 1487.0,
"epoch": 0.345648090169067,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.606304176627287,
"kl": 0.003498077392578125,
"learning_rate": 8.463050396916945e-07,
"loss": 0.0,
"num_tokens": 14686461.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.8919962048530579,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.16232941024284467,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.38333692394397534,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901162,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 276
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1387.0,
"completions/mean_length": 1296.25,
"completions/mean_terminated_length": 1034.2857666015625,
"completions/min_length": 430.0,
"completions/min_terminated_length": 430.0,
"epoch": 0.34690043832185347,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.4727706531128004,
"kl": 0.0047149658203125,
"learning_rate": 8.44822395630324e-07,
"loss": -0.0713,
"num_tokens": 14729641.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.0214866399765015,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.006892221922202982,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.027501536576714,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746357,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 277
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1486.0,
"completions/mean_length": 1469.6875,
"completions/mean_terminated_length": 1338.3333740234375,
"completions/min_length": 1234.0,
"completions/min_terminated_length": 1234.0,
"epoch": 0.34815278647463993,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.079175222753427,
"kl": 0.004638671875,
"learning_rate": 8.433341229513516e-07,
"loss": 0.0011,
"num_tokens": 14784988.0,
"reward": 2.9802322387695312e-08,
"reward_std": 1.011260986328125,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.016744175612928278,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10326622112744127,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09098229375970787,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 278
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1403.0,
"completions/max_terminated_length": 1403.0,
"completions/mean_length": 926.75,
"completions/mean_terminated_length": 926.75,
"completions/min_length": 629.0,
"completions/min_terminated_length": 629.0,
"epoch": 0.34940513462742645,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.3928033221665683,
"kl": 0.0011532902717590332,
"learning_rate": 8.41840250068215e-07,
"loss": 0.0325,
"num_tokens": 14819992.0,
"reward": 0.0,
"reward_std": 1.027898907661438,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0026503222290372498,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06993382935974904,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.059472994182545084,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 279
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1429.0,
"completions/mean_length": 1439.4375,
"completions/mean_terminated_length": 1338.5,
"completions/min_length": 1219.0,
"completions/min_terminated_length": 1219.0,
"epoch": 0.3506574827802129,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.958647342200965,
"kl": 0.00386810302734375,
"learning_rate": 8.403408055012688e-07,
"loss": 0.0226,
"num_tokens": 14868223.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.9957724213600159,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010003602936438873,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10122225063918935,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.575,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14782371884055634,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 280
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1497.0,
"completions/mean_length": 1458.3125,
"completions/mean_terminated_length": 1366.5999755859375,
"completions/min_length": 1197.0,
"completions/min_terminated_length": 1197.0,
"epoch": 0.3519098309329994,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2796328783487394,
"kl": 0.0066375732421875,
"learning_rate": 8.388358178772394e-07,
"loss": -0.0218,
"num_tokens": 14927820.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.8733463287353516,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.014896438499357663,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0518786397936184,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13655822255780922,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 281
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1483.0,
"completions/mean_length": 1253.0625,
"completions/mean_terminated_length": 1140.8182373046875,
"completions/min_length": 807.0,
"completions/min_terminated_length": 807.0,
"epoch": 0.35316217908578584,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.1237440148175604,
"kl": 0.004863739013671875,
"learning_rate": 8.373253159286788e-07,
"loss": -0.0073,
"num_tokens": 14982213.0,
"reward": 0.0,
"reward_std": 0.7831696271896362,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06306545956559828,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05774599513752542,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14580555290954889,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 282
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1496.0,
"completions/mean_length": 1441.875,
"completions/mean_terminated_length": 1345.0,
"completions/min_length": 1205.0,
"completions/min_terminated_length": 1205.0,
"epoch": 0.3544145272385723,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.904795417720074,
"kl": 0.00449371337890625,
"learning_rate": 8.35809328493416e-07,
"loss": -0.0205,
"num_tokens": 15040715.0,
"reward": 3.725290298461914e-09,
"reward_std": 1.0330736637115479,
"rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.012980308714010343,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0673415334549809,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042258,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 283
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1163.0,
"completions/mean_length": 1267.125,
"completions/mean_terminated_length": 1034.25,
"completions/min_length": 623.0,
"completions/min_terminated_length": 623.0,
"epoch": 0.35566687539135877,
"frac_reward_zero_std": 0.0,
"grad_norm": 1.9530736863410463,
"kl": 0.0021953582763671875,
"learning_rate": 8.342878845140067e-07,
"loss": 0.0243,
"num_tokens": 15099253.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.060163974761963,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.030749215825924263,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045245562410845486,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09179284245476839,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 284
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1380.0,
"completions/mean_length": 1273.375,
"completions/mean_terminated_length": 1046.75,
"completions/min_length": 856.0,
"completions/min_terminated_length": 856.0,
"epoch": 0.3569192235441453,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9643132058260195,
"kl": 0.00377655029296875,
"learning_rate": 8.327610130371804e-07,
"loss": -0.0085,
"num_tokens": 15156899.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.9131340980529785,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05994073836967858,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16156243225331035,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06885303726590962,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 285
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1133.0,
"completions/mean_length": 1196.4375,
"completions/mean_terminated_length": 892.875,
"completions/min_length": 726.0,
"completions/min_terminated_length": 726.0,
"epoch": 0.35817157169693176,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.4448564572392772,
"kl": 0.005279541015625,
"learning_rate": 8.312287432132857e-07,
"loss": -0.0008,
"num_tokens": 15210234.0,
"reward": 0.0,
"reward_std": 0.6276436448097229,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08401960696737835,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.31657785119011167,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11800816042090449,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 286
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1480.0,
"completions/mean_length": 1118.0,
"completions/mean_terminated_length": 1029.84619140625,
"completions/min_length": 768.0,
"completions/min_terminated_length": 768.0,
"epoch": 0.3594239198497182,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.649351006275311,
"kl": 0.004192352294921875,
"learning_rate": 8.296911042957347e-07,
"loss": 0.0474,
"num_tokens": 15254266.0,
"reward": 7.450580596923828e-09,
"reward_std": 1.0014917850494385,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05039245601276097,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0698277819618762,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08850612031567838,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 287
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1495.0,
"completions/mean_length": 1408.8125,
"completions/mean_terminated_length": 1317.625,
"completions/min_length": 1145.0,
"completions/min_terminated_length": 1145.0,
"epoch": 0.3606762680025047,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.3753456567317306,
"kl": 0.003170013427734375,
"learning_rate": 8.281481256404427e-07,
"loss": -0.0065,
"num_tokens": 15310551.0,
"reward": -7.450580596923828e-09,
"reward_std": 1.0467666387557983,
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.011685861651235842,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.020940553119970465,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901161,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 288
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1287.0,
"completions/mean_length": 1308.5,
"completions/mean_terminated_length": 1117.0,
"completions/min_length": 1059.0,
"completions/min_terminated_length": 1059.0,
"epoch": 0.36192861615529115,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.7116355558289365,
"kl": 0.0031452178955078125,
"learning_rate": 8.265998367052699e-07,
"loss": -0.0148,
"num_tokens": 15357047.0,
"reward": 0.0,
"reward_std": 0.7932579517364502,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05102504905151101,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.046974298933007336,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.102469507659596,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 289
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1322.0,
"completions/mean_length": 1303.375,
"completions/mean_terminated_length": 1150.4444580078125,
"completions/min_length": 910.0,
"completions/min_terminated_length": 910.0,
"epoch": 0.36318096430807767,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6471576640483025,
"kl": 0.002368927001953125,
"learning_rate": 8.25046267049458e-07,
"loss": -0.0155,
"num_tokens": 15419477.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.9311500191688538,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.17596829941789516,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.18219217687822756,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12412657816683506,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 290
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 1.0,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 0.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 0.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 0.0,
"epoch": 0.36443331246086413,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.972210343638841,
"kl": 0.004058837890625,
"learning_rate": 8.234874463330651e-07,
"loss": 0.0002,
"num_tokens": 15481293.0,
"reward": 0.0,
"reward_std": 0.6159095764160156,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06449275539626861,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07616565949841655,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10852547064066473,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 291
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1348.0,
"completions/mean_length": 1140.125,
"completions/mean_terminated_length": 1020.1666870117188,
"completions/min_length": 215.0,
"completions/min_terminated_length": 215.0,
"epoch": 0.3656856606136506,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.7273022702751355,
"kl": 0.00434112548828125,
"learning_rate": 8.219234043164007e-07,
"loss": -0.0148,
"num_tokens": 15538271.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.8317296504974365,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.002159562349982134,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.040954201238117,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1261979632400061,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 292
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1495.0,
"completions/mean_length": 1346.875,
"completions/mean_terminated_length": 1227.77783203125,
"completions/min_length": 1055.0,
"completions/min_terminated_length": 1055.0,
"epoch": 0.36693800876643706,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.479167145868108,
"kl": 0.005218505859375,
"learning_rate": 8.203541708594571e-07,
"loss": -0.0154,
"num_tokens": 15584509.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.0486056804656982,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.005770089344222506,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07441253794038902,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078614,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 293
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1474.0,
"completions/mean_length": 1284.5625,
"completions/mean_terminated_length": 1186.6363525390625,
"completions/min_length": 979.0,
"completions/min_terminated_length": 979.0,
"epoch": 0.3681903569192235,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.139340219060384,
"kl": 0.00438690185546875,
"learning_rate": 8.18779775921339e-07,
"loss": 0.0201,
"num_tokens": 15631742.0,
"reward": -2.9802322387695312e-08,
"reward_std": 1.0337092876434326,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.061225139692727595,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0882517727987926,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08153617692869927,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 294
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1388.0,
"completions/mean_length": 1493.0,
"completions/mean_terminated_length": 1388.0,
"completions/min_length": 1388.0,
"completions/min_terminated_length": 1388.0,
"epoch": 0.36944270507201,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.4977483724240614,
"kl": 0.0029296875,
"learning_rate": 8.17200249559692e-07,
"loss": -0.0007,
"num_tokens": 15698798.0,
"reward": 0.0,
"reward_std": 0.4475941061973572,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.24246809612484624,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.35195872278638696,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05900408021045224,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 295
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1412.0,
"completions/mean_length": 1458.625,
"completions/mean_terminated_length": 1367.5999755859375,
"completions/min_length": 1330.0,
"completions/min_terminated_length": 1330.0,
"epoch": 0.3706950532247965,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.867561636937004,
"kl": 0.004486083984375,
"learning_rate": 8.156156219301287e-07,
"loss": -0.0096,
"num_tokens": 15766096.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.9567909240722656,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09166855392489899,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11839819598536988,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13709958532503408,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 296
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1384.0,
"completions/mean_length": 1323.6875,
"completions/mean_terminated_length": 1217.9000244140625,
"completions/min_length": 1037.0,
"completions/min_terminated_length": 1037.0,
"epoch": 0.371947401377583,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.38013590229778,
"kl": 0.00470733642578125,
"learning_rate": 8.140259232856521e-07,
"loss": -0.0394,
"num_tokens": 15817547.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.9704372882843018,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05488740961091947,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10481500155411475,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13158576980363348,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 297
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1423.0,
"completions/mean_length": 1495.1875,
"completions/mean_terminated_length": 1423.0,
"completions/min_length": 1423.0,
"completions/min_terminated_length": 1423.0,
"epoch": 0.37319974953036944,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0382889306606327,
"kl": 0.004367828369140625,
"learning_rate": 8.124311839760797e-07,
"loss": -0.0027,
"num_tokens": 15868646.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.8351828455924988,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03419630895774928,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1367852358309971,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09418264367902598,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 298
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1344.0,
"completions/mean_length": 1399.875,
"completions/mean_terminated_length": 1179.5999755859375,
"completions/min_length": 1011.0,
"completions/min_terminated_length": 1011.0,
"epoch": 0.3744520976831559,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.362094019703377,
"kl": 0.003170013427734375,
"learning_rate": 8.108314344474623e-07,
"loss": 0.0162,
"num_tokens": 15934516.0,
"reward": 7.450580596923828e-09,
"reward_std": 0.9300060868263245,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.026608295676684646,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05700271984957867,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09803627446568497,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 299
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1385.0,
"completions/mean_length": 1310.1875,
"completions/mean_terminated_length": 1120.375,
"completions/min_length": 849.0,
"completions/min_terminated_length": 849.0,
"epoch": 0.37570444583594237,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2267030639366325,
"kl": 0.004962921142578125,
"learning_rate": 8.092267052415044e-07,
"loss": 0.0104,
"num_tokens": 15981759.0,
"reward": 0.0,
"reward_std": 0.9144766330718994,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.13077711907103481,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1576724545552638,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.55,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08944271909999157,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 300
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1495.0,
"completions/mean_length": 1499.6875,
"completions/mean_terminated_length": 1495.0,
"completions/min_length": 1495.0,
"completions/min_terminated_length": 1495.0,
"epoch": 0.3769567939887289,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.857067953077971,
"kl": 0.004589080810546875,
"learning_rate": 8.076170269949795e-07,
"loss": 0.0005,
"num_tokens": 16032986.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.8725603818893433,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.01691320115670981,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.057867471716033625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194864,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 301
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1476.0,
"completions/mean_length": 1402.5,
"completions/mean_terminated_length": 1305.0,
"completions/min_length": 1100.0,
"completions/min_terminated_length": 1100.0,
"epoch": 0.37820914214151535,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0631024256482773,
"kl": 0.00476837158203125,
"learning_rate": 8.060024304391464e-07,
"loss": -0.0059,
"num_tokens": 16075122.0,
"reward": 1.4901161193847656e-08,
"reward_std": 1.0385990142822266,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0014002892068640102,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04032032793331211,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11917929226045819,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 302
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1480.0,
"completions/mean_length": 1234.625,
"completions/mean_terminated_length": 1196.71435546875,
"completions/min_length": 1007.0,
"completions/min_terminated_length": 1007.0,
"epoch": 0.3794614902943018,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.625794290754689,
"kl": 0.0052490234375,
"learning_rate": 8.043829463991619e-07,
"loss": -0.0729,
"num_tokens": 16137860.0,
"reward": 0.0,
"reward_std": 0.7281184196472168,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.15255108490634472,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0853071621433351,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08944271909999162,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 303
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1479.0,
"completions/mean_length": 1229.0,
"completions/mean_terminated_length": 1105.8182373046875,
"completions/min_length": 759.0,
"completions/min_terminated_length": 759.0,
"epoch": 0.3807138384470883,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.260598284534238,
"kl": 0.00495147705078125,
"learning_rate": 8.027586057934928e-07,
"loss": -0.0588,
"num_tokens": 16193676.0,
"reward": 7.450580596923828e-09,
"reward_std": 1.0218051671981812,
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.00276089248932003,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03710765345598682,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14168300559373406,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 304
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1381.0,
"completions/mean_length": 1335.75,
"completions/mean_terminated_length": 1208.0,
"completions/min_length": 1052.0,
"completions/min_terminated_length": 1052.0,
"epoch": 0.38196618659987475,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.305649016415018,
"kl": 0.00536346435546875,
"learning_rate": 8.011294396333247e-07,
"loss": 0.035,
"num_tokens": 16241520.0,
"reward": 2.2351741790771484e-08,
"reward_std": 1.0677435398101807,
"rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0046395341039948005,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04123648809292501,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12995725793078622,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 305
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1387.0,
"completions/mean_length": 1287.0,
"completions/mean_terminated_length": 1159.2000732421875,
"completions/min_length": 871.0,
"completions/min_terminated_length": 871.0,
"epoch": 0.38321853475266127,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.727887770111367,
"kl": 0.00641632080078125,
"learning_rate": 7.99495479021971e-07,
"loss": -0.022,
"num_tokens": 16295288.0,
"reward": -4.470348358154297e-08,
"reward_std": 1.053145408630371,
"rewards/wordcountpos_reward_ecommerce/mean": -4.470348358154297e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.038974548522257506,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10020848772744548,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12224747213928168,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 306
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1449.0,
"completions/mean_length": 1242.0625,
"completions/mean_terminated_length": 1124.8182373046875,
"completions/min_length": 732.0,
"completions/min_terminated_length": 732.0,
"epoch": 0.38447088290544773,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.5550785119927446,
"kl": 0.0034637451171875,
"learning_rate": 7.978567551542785e-07,
"loss": -0.0756,
"num_tokens": 16333129.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.6722694635391235,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06133805044031608,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07932499651372282,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194865,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 307
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1431.0,
"completions/mean_length": 1466.0625,
"completions/mean_terminated_length": 1319.0,
"completions/min_length": 1256.0,
"completions/min_terminated_length": 1256.0,
"epoch": 0.3857232310582342,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.914391532004015,
"kl": 0.00472259521484375,
"learning_rate": 7.962132993160318e-07,
"loss": -0.0031,
"num_tokens": 16393066.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.5695419311523438,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03688578385137459,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05735193102645086,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6208333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14548768561863462,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 308
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1300.0,
"completions/mean_length": 1468.125,
"completions/mean_terminated_length": 1245.0,
"completions/min_length": 1190.0,
"completions/min_terminated_length": 1190.0,
"epoch": 0.38697557921102066,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.462122287718944,
"kl": 0.003597259521484375,
"learning_rate": 7.945651428833566e-07,
"loss": -0.0086,
"num_tokens": 16455300.0,
"reward": 0.0,
"reward_std": 0.9045326113700867,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07094748829476913,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07518616664712767,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036263,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 309
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1460.0,
"completions/mean_length": 1250.0625,
"completions/mean_terminated_length": 1055.6666259765625,
"completions/min_length": 953.0,
"completions/min_terminated_length": 953.0,
"epoch": 0.3882279273638071,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.9451996655491754,
"kl": 0.003414154052734375,
"learning_rate": 7.929123173221197e-07,
"loss": 0.016,
"num_tokens": 16510829.0,
"reward": -2.60770320892334e-08,
"reward_std": 0.9780210256576538,
"rewards/wordcountpos_reward_ecommerce/mean": -2.60770320892334e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.11998443212330577,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.273643343882272,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13743685418725535,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
"step": 310
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1482.0,
"completions/mean_length": 1477.3125,
"completions/mean_terminated_length": 1409.25,
"completions/min_length": 1344.0,
"completions/min_terminated_length": 1344.0,
"epoch": 0.3894802755165936,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.5088718729093884,
"kl": 0.0039215087890625,
"learning_rate": 7.91254854187329e-07,
"loss": 0.0109,
"num_tokens": 16557338.0,
"reward": -2.9802322387695312e-08,
"reward_std": 0.8606460094451904,
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11084663306324073,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10033388109681571,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07490735018081414,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 311
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1250.0,
"completions/mean_length": 1070.625,
"completions/mean_terminated_length": 1042.0,
"completions/min_length": 692.0,
"completions/min_terminated_length": 692.0,
"epoch": 0.3907326236693801,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.633658673586784,
"kl": 0.004913330078125,
"learning_rate": 7.895927851225315e-07,
"loss": -0.0045,
"num_tokens": 16585492.0,
"reward": 0.0,
"reward_std": 0.8763086795806885,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.010857263566407607,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06826631403415188,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8333333333333334,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12171612389003693,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 312
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1453.0,
"completions/mean_length": 1316.9375,
"completions/mean_terminated_length": 1207.0999755859375,
"completions/min_length": 926.0,
"completions/min_terminated_length": 926.0,
"epoch": 0.3919849718221666,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.5743987456607456,
"kl": 0.00490570068359375,
"learning_rate": 7.879261418592072e-07,
"loss": -0.0521,
"num_tokens": 16629555.0,
"reward": 1.4901161193847656e-08,
"reward_std": 0.9046754240989685,
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07072576648968745,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14285699045244268,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06885303726590966,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 313
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1433.0,
"completions/mean_length": 1382.4375,
"completions/mean_terminated_length": 1329.0,
"completions/min_length": 1093.0,
"completions/min_terminated_length": 1093.0,
"epoch": 0.39323731997495304,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.1526667079717625,
"kl": 0.002288818359375,
"learning_rate": 7.862549562161661e-07,
"loss": -0.0277,
"num_tokens": 16682250.0,
"reward": -7.450580596923828e-09,
"reward_std": 1.0446007251739502,
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1297401874034389,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1781696946469639,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7625,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06871842709362772,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 314
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1463.0,
"completions/mean_length": 1497.6875,
"completions/mean_terminated_length": 1463.0,
"completions/min_length": 1463.0,
"completions/min_terminated_length": 1463.0,
"epoch": 0.3944896681277395,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0308641968752266,
"kl": 0.005645751953125,
"learning_rate": 7.845792600989385e-07,
"loss": -0.0009,
"num_tokens": 16736925.0,
"reward": 0.0,
"reward_std": 1.0489060878753662,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0036366895025502417,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.014546758010200967,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10183501544346313,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 315
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1361.0,
"completions/mean_length": 1448.0,
"completions/mean_terminated_length": 1292.0,
"completions/min_length": 1213.0,
"completions/min_terminated_length": 1213.0,
"epoch": 0.39574201628052597,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.5606215816363824,
"kl": 0.003444671630859375,
"learning_rate": 7.828990854991669e-07,
"loss": -0.0016,
"num_tokens": 16805501.0,
"reward": -1.4901161193847656e-08,
"reward_std": 1.0115642547607422,
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.021560930387654664,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.031239915717000032,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08595864638818418,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 316
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 1.0,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 0.0,
"completions/mean_length": 1500.0,
"completions/mean_terminated_length": 0.0,
"completions/min_length": 1500.0,
"completions/min_terminated_length": 0.0,
"epoch": 0.3969943644333125,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.8884496894175316,
"kl": 0.00472259521484375,
"learning_rate": 7.812144644939948e-07,
"loss": 0.0002,
"num_tokens": 16868629.0,
"reward": 2.9802322387695312e-08,
"reward_std": 0.9699341058731079,
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.024920589109913467,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09830967886668995,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 317
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1493.0,
"completions/mean_length": 1401.8125,
"completions/mean_terminated_length": 1342.9000244140625,
"completions/min_length": 1196.0,
"completions/min_terminated_length": 1196.0,
"epoch": 0.39824671258609895,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.4263971987387944,
"kl": 0.00539398193359375,
"learning_rate": 7.795254292454546e-07,
"loss": -0.0029,
"num_tokens": 16930194.0,
"reward": 3.725290298461914e-09,
"reward_std": 1.058499813079834,
"rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.003584071693735027,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06412609719118169,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 318
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1327.0,
"completions/mean_length": 1266.875,
"completions/mean_terminated_length": 1085.5555419921875,
"completions/min_length": 632.0,
"completions/min_terminated_length": 632.0,
"epoch": 0.3994990607388854,
"frac_reward_zero_std": 0.0,
"grad_norm": 2.6400441912231263,
"kl": 0.00444793701171875,
"learning_rate": 7.778320119998535e-07,
"loss": -0.121,
"num_tokens": 16979440.0,
"reward": -9.313225746154785e-09,
"reward_std": 1.0413284301757812,
"rewards/wordcountpos_reward_ecommerce/mean": -9.313225746154785e-09,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.005981072426200435,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04425931042175955,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8166666666666667,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0926962382871743,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 319
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1500.0,
"completions/max_terminated_length": 1494.0,
"completions/mean_length": 1350.1875,
"completions/mean_terminated_length": 1260.300048828125,
"completions/min_length": 1061.0,
"completions/min_terminated_length": 1061.0,
"epoch": 0.4007514088916719,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.2125264454232823,
"kl": 0.00494384765625,
"learning_rate": 7.761342450871578e-07,
"loss": -0.0401,
"num_tokens": 17023723.0,
"reward": 0.0,
"reward_std": 0.9411365389823914,
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08138630489162721,
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09236477000312811,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14548768561863465,
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
"step": 320
}
],
"logging_steps": 1,
"max_steps": 799,
"num_input_tokens_seen": 17023723,
"num_train_epochs": 1,
"save_steps": 80,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}