Text Generation
Transformers
Safetensors
English
qwen3
text-rewriting
web
generative-engine-optimization
geo
reinforcement-learning
grpo
conversational
text-generation-inference
Instructions to use cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce") model = AutoModelForCausalLM.from_pretrained("cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Inference
- Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce
- SGLang
How to use cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce with Docker Model Runner:
docker model run hf.co/cx-cmu/AutoGEO_mini_Qwen1.7B_Ecommerce
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4007514088916719, | |
| "eval_steps": 500, | |
| "global_step": 320, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1427.0, | |
| "completions/mean_length": 1310.375, | |
| "completions/mean_terminated_length": 1120.75, | |
| "completions/min_length": 941.0, | |
| "completions/min_terminated_length": 941.0, | |
| "epoch": 0.0012523481527864746, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.4675665797659936, | |
| "kl": 0.0014476776123046875, | |
| "learning_rate": 0.0, | |
| "loss": -0.0042, | |
| "num_tokens": 47606.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 1.0425715446472168, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.020242706942291286, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08320206610241015, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1128748897706693, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1493.0, | |
| "completions/mean_length": 1215.625, | |
| "completions/mean_terminated_length": 1120.8333740234375, | |
| "completions/min_length": 920.0, | |
| "completions/min_terminated_length": 920.0, | |
| "epoch": 0.002504696305572949, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.5220841352987073, | |
| "kl": 0.002323150634765625, | |
| "learning_rate": 1.25e-08, | |
| "loss": -0.0365, | |
| "num_tokens": 78984.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9615500569343567, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.019240361081273367, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0375240418925418, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242309, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1488.0, | |
| "completions/mean_length": 1430.8125, | |
| "completions/mean_terminated_length": 1341.857177734375, | |
| "completions/min_length": 1171.0, | |
| "completions/min_terminated_length": 1171.0, | |
| "epoch": 0.003757044458359424, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7257956401904653, | |
| "kl": 0.0018787384033203125, | |
| "learning_rate": 2.5e-08, | |
| "loss": -0.014, | |
| "num_tokens": 126437.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 1.0492231845855713, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09708628067006185, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16724793667635054, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09179284245476838, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1490.0, | |
| "completions/mean_length": 1463.4375, | |
| "completions/mean_terminated_length": 1353.75, | |
| "completions/min_length": 1084.0, | |
| "completions/min_terminated_length": 1084.0, | |
| "epoch": 0.005009392611145898, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0068456094040337, | |
| "kl": 0.00238037109375, | |
| "learning_rate": 3.75e-08, | |
| "loss": -0.0103, | |
| "num_tokens": 192900.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.4076952338218689, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.42554686388976987, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3748667411110748, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14580555290954889, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 0.0, | |
| "epoch": 0.006261740763932373, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0250923226839315, | |
| "kl": 0.002262115478515625, | |
| "learning_rate": 5e-08, | |
| "loss": 0.0001, | |
| "num_tokens": 257452.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.9494391083717346, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0021633155301854353, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04003332867073718, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09583937179043475, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1443.0, | |
| "completions/mean_length": 1213.4375, | |
| "completions/mean_terminated_length": 1041.5, | |
| "completions/min_length": 749.0, | |
| "completions/min_terminated_length": 749.0, | |
| "epoch": 0.007514088916718848, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.520673181444066, | |
| "kl": 0.002166748046875, | |
| "learning_rate": 6.25e-08, | |
| "loss": -0.0047, | |
| "num_tokens": 300227.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.000030517578125, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1494053837623106, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21650138601325905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6291666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921942, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1475.0, | |
| "completions/mean_length": 1441.5, | |
| "completions/mean_terminated_length": 1266.0, | |
| "completions/min_length": 868.0, | |
| "completions/min_terminated_length": 868.0, | |
| "epoch": 0.008766437069505322, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.131914910533151, | |
| "kl": 0.002285003662109375, | |
| "learning_rate": 7.5e-08, | |
| "loss": -0.0115, | |
| "num_tokens": 365811.0, | |
| "reward": 0.0, | |
| "reward_std": 1.021754264831543, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.20434821411964987, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13055976557133547, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08509254221575907, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1037.0, | |
| "completions/mean_length": 1197.6875, | |
| "completions/mean_terminated_length": 895.375, | |
| "completions/min_length": 718.0, | |
| "completions/min_terminated_length": 718.0, | |
| "epoch": 0.010018785222291797, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1709545933290264, | |
| "kl": 0.001911163330078125, | |
| "learning_rate": 8.75e-08, | |
| "loss": 0.0124, | |
| "num_tokens": 406590.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7096362113952637, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011512278889933215, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.017023573988747046, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07588978362901863, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1368.0, | |
| "completions/mean_length": 1424.8125, | |
| "completions/mean_terminated_length": 1299.5, | |
| "completions/min_length": 1222.0, | |
| "completions/min_terminated_length": 1222.0, | |
| "epoch": 0.011271133375078271, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.4985696146916663, | |
| "kl": 0.0014972686767578125, | |
| "learning_rate": 1e-07, | |
| "loss": 0.0017, | |
| "num_tokens": 449955.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.592147946357727, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09093222668860702, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16366647482965233, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10741060020797315, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1452.0, | |
| "completions/mean_length": 1497.0, | |
| "completions/mean_terminated_length": 1452.0, | |
| "completions/min_length": 1452.0, | |
| "completions/min_terminated_length": 1452.0, | |
| "epoch": 0.012523481527864746, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.994231395858393, | |
| "kl": 0.002552032470703125, | |
| "learning_rate": 1.125e-07, | |
| "loss": 0.0008, | |
| "num_tokens": 512611.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7100945115089417, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.39335439512941156, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.44383620756924225, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11013459778666118, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1436.0, | |
| "completions/mean_length": 1319.0, | |
| "completions/mean_terminated_length": 1258.666748046875, | |
| "completions/min_length": 1147.0, | |
| "completions/min_terminated_length": 1147.0, | |
| "epoch": 0.013775829680651221, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.29188099989823, | |
| "kl": 0.002773284912109375, | |
| "learning_rate": 1.25e-07, | |
| "loss": -0.0193, | |
| "num_tokens": 578363.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7537417411804199, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.006668674614171876, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06272286484055771, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.49583333333333335, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15581327856693655, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1362.0, | |
| "completions/max_terminated_length": 1362.0, | |
| "completions/mean_length": 856.4375, | |
| "completions/mean_terminated_length": 856.4375, | |
| "completions/min_length": 689.0, | |
| "completions/min_terminated_length": 689.0, | |
| "epoch": 0.015028177833437696, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.02451725178761, | |
| "kl": 0.0014524459838867188, | |
| "learning_rate": 1.375e-07, | |
| "loss": -0.0034, | |
| "num_tokens": 624626.0, | |
| "reward": 0.0, | |
| "reward_std": 0.25382307171821594, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.16171540881469407, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04512392405527899, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.17293758240303758, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1386.0, | |
| "completions/mean_length": 1088.5625, | |
| "completions/mean_terminated_length": 901.5454711914062, | |
| "completions/min_length": 674.0, | |
| "completions/min_terminated_length": 674.0, | |
| "epoch": 0.01628052598622417, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 4.015402694119637, | |
| "kl": 0.0021724700927734375, | |
| "learning_rate": 1.5e-07, | |
| "loss": -0.0824, | |
| "num_tokens": 681059.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.9276120662689209, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.041562779715464626, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1909826248378845, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11409872268574492, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1445.0, | |
| "completions/mean_length": 1496.5625, | |
| "completions/mean_terminated_length": 1445.0, | |
| "completions/min_length": 1445.0, | |
| "completions/min_terminated_length": 1445.0, | |
| "epoch": 0.017532874139010644, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.995346934747371, | |
| "kl": 0.002460479736328125, | |
| "learning_rate": 1.625e-07, | |
| "loss": -0.001, | |
| "num_tokens": 745196.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.008323073387146, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.057098024958501865, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10812840498160957, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1398411797560202, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1481.0, | |
| "completions/mean_length": 1267.8125, | |
| "completions/mean_terminated_length": 1128.5, | |
| "completions/min_length": 842.0, | |
| "completions/min_terminated_length": 842.0, | |
| "epoch": 0.01878522229179712, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.681468707345247, | |
| "kl": 0.002552032470703125, | |
| "learning_rate": 1.75e-07, | |
| "loss": -0.0258, | |
| "num_tokens": 802777.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.5409140586853027, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.025752634294563932, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1190717918627845, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10318986456114838, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1424.0, | |
| "completions/mean_length": 1465.75, | |
| "completions/mean_terminated_length": 1317.3333740234375, | |
| "completions/min_length": 1240.0, | |
| "completions/min_terminated_length": 1240.0, | |
| "epoch": 0.020037570444583593, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.952391425850165, | |
| "kl": 0.00229644775390625, | |
| "learning_rate": 1.875e-07, | |
| "loss": 0.0085, | |
| "num_tokens": 852397.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.9532216191291809, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.009914003172755002, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14279656209744931, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1085254706406647, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1415.0, | |
| "completions/mean_length": 1464.1875, | |
| "completions/mean_terminated_length": 1356.75, | |
| "completions/min_length": 1308.0, | |
| "completions/min_terminated_length": 1308.0, | |
| "epoch": 0.021289918597370068, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6835952376597447, | |
| "kl": 0.0016937255859375, | |
| "learning_rate": 2e-07, | |
| "loss": -0.0182, | |
| "num_tokens": 905544.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.670647144317627, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0385939635652747, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11140246797780545, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14950535726806533, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1456.0, | |
| "completions/mean_length": 1226.6875, | |
| "completions/mean_terminated_length": 1163.615478515625, | |
| "completions/min_length": 833.0, | |
| "completions/min_terminated_length": 833.0, | |
| "epoch": 0.022542266750156543, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 4.118592346302386, | |
| "kl": 0.0031585693359375, | |
| "learning_rate": 2.1249999999999998e-07, | |
| "loss": -0.014, | |
| "num_tokens": 958635.0, | |
| "reward": -3.725290298461914e-09, | |
| "reward_std": 1.0170769691467285, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.004864839675281578, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0373192839130601, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1192569587999888, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1374.0, | |
| "completions/mean_length": 1492.125, | |
| "completions/mean_terminated_length": 1374.0, | |
| "completions/min_length": 1374.0, | |
| "completions/min_terminated_length": 1374.0, | |
| "epoch": 0.023794614902943018, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9483970425927475, | |
| "kl": 0.0020732879638671875, | |
| "learning_rate": 2.25e-07, | |
| "loss": 0.0003, | |
| "num_tokens": 1017653.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8760651350021362, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.014058396075366015, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03110460490345673, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10292032157252812, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1462.0, | |
| "completions/mean_length": 1290.125, | |
| "completions/mean_terminated_length": 1164.2000732421875, | |
| "completions/min_length": 987.0, | |
| "completions/min_terminated_length": 987.0, | |
| "epoch": 0.025046963055729492, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.3136168175643763, | |
| "kl": 0.002140045166015625, | |
| "learning_rate": 2.3749999999999998e-07, | |
| "loss": -0.0256, | |
| "num_tokens": 1065663.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 0.9537639617919922, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.038097750035485885, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1082295867822669, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0850925422157591, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1263.0, | |
| "completions/mean_length": 1341.5, | |
| "completions/mean_terminated_length": 1183.0, | |
| "completions/min_length": 1034.0, | |
| "completions/min_terminated_length": 1034.0, | |
| "epoch": 0.026299311208515967, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.306897182610288, | |
| "kl": 0.0024871826171875, | |
| "learning_rate": 2.5e-07, | |
| "loss": -0.0094, | |
| "num_tokens": 1117263.0, | |
| "reward": 0.0, | |
| "reward_std": 0.990053117275238, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011397748892334698, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.046758634855771405, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13743685418725538, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1342.0, | |
| "completions/mean_length": 1266.625, | |
| "completions/mean_terminated_length": 1033.25, | |
| "completions/min_length": 774.0, | |
| "completions/min_terminated_length": 774.0, | |
| "epoch": 0.027551659361302442, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.3250640108747564, | |
| "kl": 0.002407073974609375, | |
| "learning_rate": 2.625e-07, | |
| "loss": -0.0385, | |
| "num_tokens": 1172489.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7966146469116211, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.020326344256082304, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14616918176802837, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0787635937708768, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1236.0, | |
| "completions/mean_length": 1460.875, | |
| "completions/mean_terminated_length": 1187.0, | |
| "completions/min_length": 1138.0, | |
| "completions/min_terminated_length": 1138.0, | |
| "epoch": 0.028804007514088917, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.4267377669243926, | |
| "kl": 0.0071010589599609375, | |
| "learning_rate": 2.75e-07, | |
| "loss": -0.0111, | |
| "num_tokens": 1234527.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.5723245143890381, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02122072131733574, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.157410051166117, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11642832797715322, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1489.0, | |
| "completions/mean_length": 1325.625, | |
| "completions/mean_terminated_length": 1151.25, | |
| "completions/min_length": 1018.0, | |
| "completions/min_terminated_length": 1018.0, | |
| "epoch": 0.03005635566687539, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.24473326800092, | |
| "kl": 0.0023651123046875, | |
| "learning_rate": 2.8749999999999995e-07, | |
| "loss": -0.0069, | |
| "num_tokens": 1269905.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 1.0385103225708008, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04647400767345873, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09647557054247557, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.675, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15371932093796678, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1429.0, | |
| "completions/mean_length": 1377.0625, | |
| "completions/mean_terminated_length": 1321.181884765625, | |
| "completions/min_length": 1206.0, | |
| "completions/min_terminated_length": 1206.0, | |
| "epoch": 0.031308703819661866, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6845626042385518, | |
| "kl": 0.001850128173828125, | |
| "learning_rate": 3e-07, | |
| "loss": 0.0174, | |
| "num_tokens": 1328778.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.7787291407585144, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09266568639996468, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0822707712414604, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12382783747337808, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1352.0, | |
| "completions/mean_length": 1169.5625, | |
| "completions/mean_terminated_length": 1093.3077392578125, | |
| "completions/min_length": 721.0, | |
| "completions/min_terminated_length": 721.0, | |
| "epoch": 0.03256105197244834, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.712859953207578, | |
| "kl": 0.00261688232421875, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "loss": 0.0167, | |
| "num_tokens": 1363011.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.0016117095947266, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1709176314049482, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1600211117254044, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11800816042090449, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1413.0, | |
| "completions/mean_length": 1215.9375, | |
| "completions/mean_terminated_length": 1197.0001220703125, | |
| "completions/min_length": 950.0, | |
| "completions/min_terminated_length": 950.0, | |
| "epoch": 0.033813400125234816, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.399525494329125, | |
| "kl": 0.002574920654296875, | |
| "learning_rate": 3.25e-07, | |
| "loss": 0.0013, | |
| "num_tokens": 1407434.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 0.9290227890014648, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03979791227452069, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13950243126020834, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08073734277593314, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1368.0, | |
| "completions/mean_length": 1340.4375, | |
| "completions/mean_terminated_length": 1180.875, | |
| "completions/min_length": 1034.0, | |
| "completions/min_terminated_length": 1034.0, | |
| "epoch": 0.03506574827802129, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9566553373980344, | |
| "kl": 0.0021228790283203125, | |
| "learning_rate": 3.375e-07, | |
| "loss": -0.0056, | |
| "num_tokens": 1458241.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7809990644454956, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.028119487654073606, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1198837710832071, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6083333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0873477511423713, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1488.0, | |
| "completions/mean_length": 1401.625, | |
| "completions/mean_terminated_length": 1275.1429443359375, | |
| "completions/min_length": 1054.0, | |
| "completions/min_terminated_length": 1054.0, | |
| "epoch": 0.036318096430807766, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9381632846830548, | |
| "kl": 0.002338409423828125, | |
| "learning_rate": 3.5e-07, | |
| "loss": 0.0125, | |
| "num_tokens": 1523987.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.9863969087600708, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06145046632658874, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08502220502724643, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6041666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11538983843829063, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1394.0, | |
| "completions/mean_length": 1257.625, | |
| "completions/mean_terminated_length": 1069.111083984375, | |
| "completions/min_length": 922.0, | |
| "completions/min_terminated_length": 922.0, | |
| "epoch": 0.03757044458359424, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.4028418647289094, | |
| "kl": 0.0042324066162109375, | |
| "learning_rate": 3.6249999999999997e-07, | |
| "loss": -0.0044, | |
| "num_tokens": 1582341.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.9369316697120667, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.058010557784549034, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06029435215775259, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08606629658238704, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1184.0, | |
| "completions/mean_length": 1480.25, | |
| "completions/mean_terminated_length": 1184.0, | |
| "completions/min_length": 1184.0, | |
| "completions/min_terminated_length": 1184.0, | |
| "epoch": 0.038822792736380715, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.842249981197029, | |
| "kl": 0.0021514892578125, | |
| "learning_rate": 3.75e-07, | |
| "loss": -0.0106, | |
| "num_tokens": 1629017.0, | |
| "reward": -2.2351741790771484e-08, | |
| "reward_std": 1.0243444442749023, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010824625533504566, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.02884739427994731, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11474609652039004, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1479.0, | |
| "completions/mean_length": 1032.0, | |
| "completions/mean_terminated_length": 668.0, | |
| "completions/min_length": 294.0, | |
| "completions/min_terminated_length": 294.0, | |
| "epoch": 0.040075140889167186, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.8067119735652115, | |
| "kl": 0.0024871826171875, | |
| "learning_rate": 3.875e-07, | |
| "loss": 0.0413, | |
| "num_tokens": 1666305.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.8114193677902222, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05789475536948171, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04045242685812858, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14707015206910487, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1495.0, | |
| "completions/mean_length": 1271.25, | |
| "completions/mean_terminated_length": 1195.0, | |
| "completions/min_length": 1030.0, | |
| "completions/min_terminated_length": 1030.0, | |
| "epoch": 0.041327489041953665, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1322197700429, | |
| "kl": 0.00167083740234375, | |
| "learning_rate": 4e-07, | |
| "loss": -0.0393, | |
| "num_tokens": 1727629.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.4495465159416199, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05764457052515048, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11640629412276694, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0906764700582363, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1436.0, | |
| "completions/mean_length": 1438.0625, | |
| "completions/mean_terminated_length": 1334.8333740234375, | |
| "completions/min_length": 1171.0, | |
| "completions/min_terminated_length": 1171.0, | |
| "epoch": 0.042579837194740136, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.291671899271785, | |
| "kl": 0.0019550323486328125, | |
| "learning_rate": 4.1249999999999997e-07, | |
| "loss": 0.0187, | |
| "num_tokens": 1794062.0, | |
| "reward": 2.60770320892334e-08, | |
| "reward_std": 1.0634629726409912, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.60770320892334e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05489684988302594, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2423673289052158, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11538983843829065, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1106.0, | |
| "completions/mean_length": 1253.1875, | |
| "completions/mean_terminated_length": 1061.2222900390625, | |
| "completions/min_length": 977.0, | |
| "completions/min_terminated_length": 977.0, | |
| "epoch": 0.043832185347526614, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.3253132789681, | |
| "kl": 0.00135040283203125, | |
| "learning_rate": 4.2499999999999995e-07, | |
| "loss": 0.0016, | |
| "num_tokens": 1847393.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.6563782691955566, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05072262342914357, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.195641332904443, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746356, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1479.0, | |
| "completions/mean_length": 1399.3125, | |
| "completions/mean_terminated_length": 1298.625, | |
| "completions/min_length": 1031.0, | |
| "completions/min_terminated_length": 1031.0, | |
| "epoch": 0.045084533500313086, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7548692610807795, | |
| "kl": 0.0021877288818359375, | |
| "learning_rate": 4.375e-07, | |
| "loss": -0.0061, | |
| "num_tokens": 1892206.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.9828654527664185, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06116004436340469, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10276980263780594, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460884, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1436.0, | |
| "completions/mean_length": 1352.6875, | |
| "completions/mean_terminated_length": 1318.6923828125, | |
| "completions/min_length": 1218.0, | |
| "completions/min_terminated_length": 1218.0, | |
| "epoch": 0.046336881653099564, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.47319096015074, | |
| "kl": 0.0014925003051757812, | |
| "learning_rate": 4.5e-07, | |
| "loss": -0.0109, | |
| "num_tokens": 1940945.0, | |
| "reward": 0.0, | |
| "reward_std": 0.955802857875824, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.041245323817924374, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19292307241869963, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194865, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1468.0, | |
| "completions/mean_length": 1375.25, | |
| "completions/mean_terminated_length": 1250.5, | |
| "completions/min_length": 959.0, | |
| "completions/min_terminated_length": 959.0, | |
| "epoch": 0.047589229805886035, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1878681605362496, | |
| "kl": 0.002475738525390625, | |
| "learning_rate": 4.625e-07, | |
| "loss": -0.0118, | |
| "num_tokens": 1985181.0, | |
| "reward": -7.450580596923828e-09, | |
| "reward_std": 1.054539442062378, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.019033803394582376, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10927050985901436, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06831300510639736, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1421.0, | |
| "completions/mean_length": 1447.0, | |
| "completions/mean_terminated_length": 1288.0, | |
| "completions/min_length": 1065.0, | |
| "completions/min_terminated_length": 1065.0, | |
| "epoch": 0.048841577958672514, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.188392715000756, | |
| "kl": 0.00237274169921875, | |
| "learning_rate": 4.7499999999999995e-07, | |
| "loss": 0.0406, | |
| "num_tokens": 2034525.0, | |
| "reward": -7.450580596923828e-09, | |
| "reward_std": 1.0613259077072144, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.019229174460983274, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03385821534786073, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09651328828101764, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1135.0, | |
| "completions/mean_length": 1148.75, | |
| "completions/mean_terminated_length": 797.5, | |
| "completions/min_length": 735.0, | |
| "completions/min_terminated_length": 735.0, | |
| "epoch": 0.050093926111458985, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6930283084099047, | |
| "kl": 0.0015668869018554688, | |
| "learning_rate": 4.875e-07, | |
| "loss": -0.0288, | |
| "num_tokens": 2080937.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.7898622751235962, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05072289975795826, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19078379794323846, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10036968702787749, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1433.0, | |
| "completions/mean_length": 1416.9375, | |
| "completions/mean_terminated_length": 1333.875, | |
| "completions/min_length": 1244.0, | |
| "completions/min_terminated_length": 1244.0, | |
| "epoch": 0.05134627426424546, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.177282619828498, | |
| "kl": 0.0012989044189453125, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0091, | |
| "num_tokens": 2136744.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.0509533882141113, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.031295483862865174, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11149225377383207, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07084150279686706, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1442.0, | |
| "completions/mean_length": 1273.4375, | |
| "completions/mean_terminated_length": 1197.916748046875, | |
| "completions/min_length": 943.0, | |
| "completions/min_terminated_length": 943.0, | |
| "epoch": 0.052598622417031934, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2241520761115305, | |
| "kl": 0.002368927001953125, | |
| "learning_rate": 5.125e-07, | |
| "loss": 0.004, | |
| "num_tokens": 2171271.0, | |
| "reward": -3.725290298461914e-09, | |
| "reward_std": 1.0308477878570557, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12167064883765863, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12965137595029042, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5416666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15177956725803718, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1497.0, | |
| "completions/mean_length": 1466.6875, | |
| "completions/mean_terminated_length": 1366.75, | |
| "completions/min_length": 1132.0, | |
| "completions/min_terminated_length": 1132.0, | |
| "epoch": 0.05385097056981841, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9471462447348635, | |
| "kl": 0.0021038055419921875, | |
| "learning_rate": 5.25e-07, | |
| "loss": -0.0164, | |
| "num_tokens": 2231986.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.23251324892044067, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04063102604876061, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.22066858001488113, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.205074512203627, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1492.0, | |
| "completions/mean_length": 1422.3125, | |
| "completions/mean_terminated_length": 1322.4285888671875, | |
| "completions/min_length": 1074.0, | |
| "completions/min_terminated_length": 1074.0, | |
| "epoch": 0.055103318722604884, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.4268519840677527, | |
| "kl": 0.0010042190551757812, | |
| "learning_rate": 5.374999999999999e-07, | |
| "loss": -0.0099, | |
| "num_tokens": 2288223.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 0.9692014455795288, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.070492449256456, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20832413138507544, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06440611887195309, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1448.0, | |
| "completions/mean_length": 1376.0, | |
| "completions/mean_terminated_length": 1301.5999755859375, | |
| "completions/min_length": 954.0, | |
| "completions/min_terminated_length": 954.0, | |
| "epoch": 0.056355666875391355, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.03273915452992, | |
| "kl": 0.002422332763671875, | |
| "learning_rate": 5.5e-07, | |
| "loss": 0.0123, | |
| "num_tokens": 2354343.0, | |
| "reward": 1.862645149230957e-08, | |
| "reward_std": 1.067973256111145, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.862645149230957e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1362560230689488, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16718884747044185, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.058214163988576643, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1460.0, | |
| "completions/mean_length": 1453.0625, | |
| "completions/mean_terminated_length": 1392.71435546875, | |
| "completions/min_length": 1326.0, | |
| "completions/min_terminated_length": 1326.0, | |
| "epoch": 0.057608015028177834, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7636896151627517, | |
| "kl": 0.001781463623046875, | |
| "learning_rate": 5.625e-07, | |
| "loss": -0.0161, | |
| "num_tokens": 2410776.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.722027599811554, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.175370955230916, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15404241260320187, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14446581038560777, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1185.0, | |
| "completions/mean_length": 1384.8125, | |
| "completions/mean_terminated_length": 1039.25, | |
| "completions/min_length": 797.0, | |
| "completions/min_terminated_length": 797.0, | |
| "epoch": 0.058860363180964305, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.218383433006556, | |
| "kl": 0.002429962158203125, | |
| "learning_rate": 5.749999999999999e-07, | |
| "loss": -0.0616, | |
| "num_tokens": 2460181.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.8904982209205627, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.020496850203242982, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13226975013047063, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08062257748298553, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1438.0, | |
| "completions/mean_length": 1472.0, | |
| "completions/mean_terminated_length": 1350.666748046875, | |
| "completions/min_length": 1240.0, | |
| "completions/min_terminated_length": 1240.0, | |
| "epoch": 0.06011271133375078, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.5256786742208663, | |
| "kl": 0.002872467041015625, | |
| "learning_rate": 5.875e-07, | |
| "loss": -0.0096, | |
| "num_tokens": 2524269.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.001219630241394, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0854065639247727, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0950921206250912, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10327955589886446, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1449.0, | |
| "completions/mean_length": 1430.9375, | |
| "completions/mean_terminated_length": 1315.8333740234375, | |
| "completions/min_length": 1131.0, | |
| "completions/min_terminated_length": 1131.0, | |
| "epoch": 0.061365059486537255, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.961325619079936, | |
| "kl": 0.002254486083984375, | |
| "learning_rate": 6e-07, | |
| "loss": 0.0013, | |
| "num_tokens": 2584356.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9873535633087158, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.029950137491573797, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16218750528728998, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08421753138505425, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1466.0, | |
| "completions/mean_length": 1491.125, | |
| "completions/mean_terminated_length": 1429.0, | |
| "completions/min_length": 1392.0, | |
| "completions/min_terminated_length": 1392.0, | |
| "epoch": 0.06261740763932373, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6822700139828397, | |
| "kl": 0.0020389556884765625, | |
| "learning_rate": 6.125000000000001e-07, | |
| "loss": -0.0009, | |
| "num_tokens": 2648270.0, | |
| "reward": -5.960464477539063e-08, | |
| "reward_std": 0.7698144912719727, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.032020807081585716, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.053695035371207615, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11417984514369005, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1457.0, | |
| "completions/mean_length": 1282.125, | |
| "completions/mean_terminated_length": 1112.6666259765625, | |
| "completions/min_length": 802.0, | |
| "completions/min_terminated_length": 802.0, | |
| "epoch": 0.06386975579211021, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.07804473575342, | |
| "kl": 0.002071380615234375, | |
| "learning_rate": 6.249999999999999e-07, | |
| "loss": -0.0074, | |
| "num_tokens": 2693776.0, | |
| "reward": 0.0, | |
| "reward_std": 0.5634655952453613, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.013292184885055576, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12085541345993306, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333336, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1489.0, | |
| "completions/mean_length": 1462.75, | |
| "completions/mean_terminated_length": 1301.3333740234375, | |
| "completions/min_length": 998.0, | |
| "completions/min_terminated_length": 998.0, | |
| "epoch": 0.06512210394489668, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.194495117066712, | |
| "kl": 0.002658843994140625, | |
| "learning_rate": 6.374999999999999e-07, | |
| "loss": 0.0226, | |
| "num_tokens": 2758980.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 0.985281229019165, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.02226574155778713, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05167870819779757, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6208333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746353, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1426.0, | |
| "completions/mean_length": 1321.3125, | |
| "completions/mean_terminated_length": 1295.7857666015625, | |
| "completions/min_length": 1123.0, | |
| "completions/min_terminated_length": 1123.0, | |
| "epoch": 0.06637445209768315, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.085117182590426, | |
| "kl": 0.0022735595703125, | |
| "learning_rate": 6.5e-07, | |
| "loss": 0.0196, | |
| "num_tokens": 2825249.0, | |
| "reward": 4.470348358154297e-08, | |
| "reward_std": 0.9839984774589539, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 4.470348358154297e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.08735912330077701, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14559155866011037, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06978803887752093, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1477.0, | |
| "completions/mean_length": 1498.5625, | |
| "completions/mean_terminated_length": 1477.0, | |
| "completions/min_length": 1477.0, | |
| "completions/min_terminated_length": 1477.0, | |
| "epoch": 0.06762680025046963, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9348811206443304, | |
| "kl": 0.00191497802734375, | |
| "learning_rate": 6.624999999999999e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 2889498.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.0318164825439453, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04941181253574712, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06836218150195612, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05900408021045227, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1472.0, | |
| "completions/mean_length": 1384.875, | |
| "completions/mean_terminated_length": 1236.857177734375, | |
| "completions/min_length": 913.0, | |
| "completions/min_terminated_length": 913.0, | |
| "epoch": 0.06887914840325611, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0200180369762695, | |
| "kl": 0.0021152496337890625, | |
| "learning_rate": 6.75e-07, | |
| "loss": -0.0343, | |
| "num_tokens": 2950200.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 1.0550494194030762, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.038887574815180403, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06912072840442107, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07252075054258099, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1450.0, | |
| "completions/mean_length": 1435.0625, | |
| "completions/mean_terminated_length": 1370.125, | |
| "completions/min_length": 1161.0, | |
| "completions/min_terminated_length": 1161.0, | |
| "epoch": 0.07013149655604257, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9170742885205607, | |
| "kl": 0.0019893646240234375, | |
| "learning_rate": 6.875e-07, | |
| "loss": 0.0029, | |
| "num_tokens": 3019673.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 0.9821785688400269, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.016992912073662925, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.105336871629235, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11385500851066223, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1499.0, | |
| "completions/mean_length": 1360.75, | |
| "completions/mean_terminated_length": 1221.5, | |
| "completions/min_length": 1081.0, | |
| "completions/min_terminated_length": 1081.0, | |
| "epoch": 0.07138384470882905, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.3821452004991794, | |
| "kl": 0.0014314651489257812, | |
| "learning_rate": 7e-07, | |
| "loss": 0.029, | |
| "num_tokens": 3075477.0, | |
| "reward": 2.60770320892334e-08, | |
| "reward_std": 1.0472596883773804, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.60770320892334e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010678083797130186, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11394385265661125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.045338235029118164, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1392.0, | |
| "completions/max_terminated_length": 1392.0, | |
| "completions/mean_length": 970.625, | |
| "completions/mean_terminated_length": 970.625, | |
| "completions/min_length": 715.0, | |
| "completions/min_terminated_length": 715.0, | |
| "epoch": 0.07263619286161553, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.5943049280036243, | |
| "kl": 0.0017871856689453125, | |
| "learning_rate": 7.125e-07, | |
| "loss": -0.0489, | |
| "num_tokens": 3103423.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.8579948544502258, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11955284309699343, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1294140259487627, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0909822937597079, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1451.0, | |
| "completions/mean_length": 1484.75, | |
| "completions/mean_terminated_length": 1378.0, | |
| "completions/min_length": 1305.0, | |
| "completions/min_terminated_length": 1305.0, | |
| "epoch": 0.07388854101440201, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.914915162479294, | |
| "kl": 0.0023479461669921875, | |
| "learning_rate": 7.249999999999999e-07, | |
| "loss": -0.0133, | |
| "num_tokens": 3170979.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.0570372343063354, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.004701879619984315, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0950367185128266, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408157, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1500.0, | |
| "completions/mean_length": 1359.9375, | |
| "completions/mean_terminated_length": 1179.857177734375, | |
| "completions/min_length": 406.0, | |
| "completions/min_terminated_length": 406.0, | |
| "epoch": 0.07514088916718847, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.283089786479577, | |
| "kl": 0.002674102783203125, | |
| "learning_rate": 7.375e-07, | |
| "loss": -0.0525, | |
| "num_tokens": 3233802.0, | |
| "reward": -1.6763806343078613e-08, | |
| "reward_std": 1.050881028175354, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.6763806343078613e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.02996931362982372, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07266154836265915, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08606629658238706, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 0.0, | |
| "epoch": 0.07639323731997495, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.311519312396822, | |
| "kl": 0.00279998779296875, | |
| "learning_rate": 7.5e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 3293354.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 1.0101943016052246, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04793344228148064, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12274932480508612, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13655822255780922, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1454.0, | |
| "completions/mean_length": 1410.1875, | |
| "completions/mean_terminated_length": 1294.71435546875, | |
| "completions/min_length": 1137.0, | |
| "completions/min_terminated_length": 1137.0, | |
| "epoch": 0.07764558547276143, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0789417256546874, | |
| "kl": 0.0022792816162109375, | |
| "learning_rate": 7.624999999999999e-07, | |
| "loss": -0.0109, | |
| "num_tokens": 3334909.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.9900147914886475, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.004903461451645089, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03771048515625185, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15581327856693658, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1316.0, | |
| "completions/mean_length": 1133.0, | |
| "completions/mean_terminated_length": 1108.533447265625, | |
| "completions/min_length": 957.0, | |
| "completions/min_terminated_length": 957.0, | |
| "epoch": 0.07889793362554791, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.259053190740638, | |
| "kl": 0.0018634796142578125, | |
| "learning_rate": 7.75e-07, | |
| "loss": -0.0194, | |
| "num_tokens": 3383333.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.671829104423523, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09142372399409204, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09598955648379433, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08509254221575907, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1484.0, | |
| "completions/mean_length": 1463.0, | |
| "completions/mean_terminated_length": 1352.0, | |
| "completions/min_length": 1206.0, | |
| "completions/min_terminated_length": 1206.0, | |
| "epoch": 0.08015028177833437, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.187716368550353, | |
| "kl": 0.002471923828125, | |
| "learning_rate": 7.875e-07, | |
| "loss": 0.0106, | |
| "num_tokens": 3442269.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.0351850986480713, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12370484162737726, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1619343847332339, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10470416879457553, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1483.0, | |
| "completions/mean_length": 1410.625, | |
| "completions/mean_terminated_length": 1261.666748046875, | |
| "completions/min_length": 995.0, | |
| "completions/min_terminated_length": 995.0, | |
| "epoch": 0.08140262993112085, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.218117740066407, | |
| "kl": 0.002559661865234375, | |
| "learning_rate": 8e-07, | |
| "loss": -0.0443, | |
| "num_tokens": 3489911.0, | |
| "reward": 5.960464477539063e-08, | |
| "reward_std": 0.5395079851150513, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 5.960464477539063e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.061171909778282046, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06618755934392026, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460886, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1491.0, | |
| "completions/mean_length": 1316.6875, | |
| "completions/mean_terminated_length": 1206.7000732421875, | |
| "completions/min_length": 869.0, | |
| "completions/min_terminated_length": 869.0, | |
| "epoch": 0.08265497808390733, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.223646819331395, | |
| "kl": 0.002506256103515625, | |
| "learning_rate": 8.125e-07, | |
| "loss": -0.0004, | |
| "num_tokens": 3531330.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.9571313858032227, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.027972706586888517, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1908156027057365, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08734775114237132, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1330.0, | |
| "completions/mean_length": 1478.5625, | |
| "completions/mean_terminated_length": 1328.5, | |
| "completions/min_length": 1327.0, | |
| "completions/min_terminated_length": 1327.0, | |
| "epoch": 0.08390732623669381, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.411248138087788, | |
| "kl": 0.00255584716796875, | |
| "learning_rate": 8.249999999999999e-07, | |
| "loss": 0.0085, | |
| "num_tokens": 3591331.0, | |
| "reward": -5.960464477539063e-08, | |
| "reward_std": 0.6705090403556824, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.3499282464198203, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3060898603663511, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921946, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1422.0, | |
| "completions/mean_length": 1495.125, | |
| "completions/mean_terminated_length": 1422.0, | |
| "completions/min_length": 1422.0, | |
| "completions/min_terminated_length": 1422.0, | |
| "epoch": 0.08515967438948027, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.025267713589772, | |
| "kl": 0.002880096435546875, | |
| "learning_rate": 8.375e-07, | |
| "loss": -0.0014, | |
| "num_tokens": 3658421.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9633276462554932, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07580010422442789, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.17700501480681413, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05692750425533113, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1309.0, | |
| "completions/mean_length": 1469.875, | |
| "completions/mean_terminated_length": 1259.0, | |
| "completions/min_length": 1209.0, | |
| "completions/min_terminated_length": 1209.0, | |
| "epoch": 0.08641202254226675, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0466734222423546, | |
| "kl": 0.002544403076171875, | |
| "learning_rate": 8.499999999999999e-07, | |
| "loss": 0.0044, | |
| "num_tokens": 3724899.0, | |
| "reward": 0.0, | |
| "reward_std": 1.0227458477020264, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.002677645774302454, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11990711113827299, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10470416879457552, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1499.0, | |
| "completions/mean_length": 1410.8125, | |
| "completions/mean_terminated_length": 1321.625, | |
| "completions/min_length": 1070.0, | |
| "completions/min_terminated_length": 1070.0, | |
| "epoch": 0.08766437069505323, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.318934314260283, | |
| "kl": 0.002834320068359375, | |
| "learning_rate": 8.625e-07, | |
| "loss": 0.0072, | |
| "num_tokens": 3777184.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 1.0494259595870972, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.024827264373621732, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.036366284403351476, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0859586463881842, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1479.0, | |
| "completions/mean_length": 1399.25, | |
| "completions/mean_terminated_length": 1231.3333740234375, | |
| "completions/min_length": 1009.0, | |
| "completions/min_terminated_length": 1009.0, | |
| "epoch": 0.08891671884783969, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8509264779724033, | |
| "kl": 0.002223968505859375, | |
| "learning_rate": 8.75e-07, | |
| "loss": 0.0037, | |
| "num_tokens": 3836428.0, | |
| "reward": 0.0, | |
| "reward_std": 1.0668516159057617, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.053166199498163626, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12647299276011556, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242309, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1447.0, | |
| "completions/mean_length": 1199.625, | |
| "completions/mean_terminated_length": 1130.3077392578125, | |
| "completions/min_length": 968.0, | |
| "completions/min_terminated_length": 968.0, | |
| "epoch": 0.09016906700062617, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.886201838693034, | |
| "kl": 0.001605987548828125, | |
| "learning_rate": 8.874999999999999e-07, | |
| "loss": -0.0027, | |
| "num_tokens": 3881094.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8761758804321289, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.02026371657719268, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04408943383486411, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14548768561863465, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 0.0, | |
| "epoch": 0.09142141515341265, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.60284832797847, | |
| "kl": 0.00217437744140625, | |
| "learning_rate": 9e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 3940518.0, | |
| "reward": 0.0, | |
| "reward_std": 0.5877071619033813, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.027393406712592036, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0844493241747004, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.067631901304592, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1482.0, | |
| "completions/mean_length": 1269.3125, | |
| "completions/mean_terminated_length": 1192.416748046875, | |
| "completions/min_length": 959.0, | |
| "completions/min_terminated_length": 959.0, | |
| "epoch": 0.09267376330619913, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2612995556206354, | |
| "kl": 0.002330780029296875, | |
| "learning_rate": 9.124999999999999e-07, | |
| "loss": -0.0066, | |
| "num_tokens": 3982827.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 0.924209713935852, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.021465279786927867, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03289535545475229, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11979921473804345, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1419.0, | |
| "completions/mean_length": 1346.4375, | |
| "completions/mean_terminated_length": 1227.0, | |
| "completions/min_length": 1081.0, | |
| "completions/min_terminated_length": 1081.0, | |
| "epoch": 0.09392611145898559, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6195723075826596, | |
| "kl": 0.00183868408203125, | |
| "learning_rate": 9.25e-07, | |
| "loss": -0.0361, | |
| "num_tokens": 4041194.0, | |
| "reward": 1.1175870895385742e-08, | |
| "reward_std": 1.0540246963500977, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.1175870895385742e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03654417489517675, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.055054088822312976, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07588978362901858, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327954292297363, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1492.0, | |
| "completions/mean_length": 1492.0625, | |
| "completions/mean_terminated_length": 1436.5, | |
| "completions/min_length": 1381.0, | |
| "completions/min_terminated_length": 1381.0, | |
| "epoch": 0.09517845961177207, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1883337396909632, | |
| "kl": 0.0028228759765625, | |
| "learning_rate": 9.374999999999999e-07, | |
| "loss": -0.0004, | |
| "num_tokens": 4102531.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7272332906723022, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06657008296291109, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08174957503379145, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11213417888437976, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1428.0, | |
| "completions/mean_length": 1347.875, | |
| "completions/mean_terminated_length": 1152.2857666015625, | |
| "completions/min_length": 807.0, | |
| "completions/min_terminated_length": 807.0, | |
| "epoch": 0.09643080776455855, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.323504050782515, | |
| "kl": 0.002685546875, | |
| "learning_rate": 9.499999999999999e-07, | |
| "loss": -0.012, | |
| "num_tokens": 4154537.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9932632446289062, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12575056940966298, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15133213208857665, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14782371884055634, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1477.0, | |
| "completions/mean_length": 1434.4375, | |
| "completions/mean_terminated_length": 1290.2000732421875, | |
| "completions/min_length": 1178.0, | |
| "completions/min_terminated_length": 1178.0, | |
| "epoch": 0.09768315591734503, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.4382048596496553, | |
| "kl": 0.002773284912109375, | |
| "learning_rate": 9.624999999999999e-07, | |
| "loss": -0.0322, | |
| "num_tokens": 4221464.0, | |
| "reward": -2.60770320892334e-08, | |
| "reward_std": 1.0265973806381226, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.60770320892334e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08170559900334663, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10185399685140464, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.161245154965971, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1500.0, | |
| "completions/mean_length": 1248.3125, | |
| "completions/mean_terminated_length": 1097.300048828125, | |
| "completions/min_length": 870.0, | |
| "completions/min_terminated_length": 870.0, | |
| "epoch": 0.09893550407013149, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.648370216175046, | |
| "kl": 0.0019168853759765625, | |
| "learning_rate": 9.75e-07, | |
| "loss": -0.027, | |
| "num_tokens": 4267669.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.9588196873664856, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07500714246624458, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06993198507995109, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05288001793018134, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1494.0, | |
| "completions/mean_length": 1436.75, | |
| "completions/mean_terminated_length": 1247.0, | |
| "completions/min_length": 1132.0, | |
| "completions/min_terminated_length": 1132.0, | |
| "epoch": 0.10018785222291797, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.879418147641467, | |
| "kl": 0.0019855499267578125, | |
| "learning_rate": 9.875e-07, | |
| "loss": -0.0127, | |
| "num_tokens": 4328465.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9200654029846191, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1453335125370645, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1827536027247548, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09496588081262934, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1488.0, | |
| "completions/mean_length": 1422.1875, | |
| "completions/mean_terminated_length": 1344.375, | |
| "completions/min_length": 1237.0, | |
| "completions/min_terminated_length": 1237.0, | |
| "epoch": 0.10144020037570445, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.635617733673008, | |
| "kl": 0.0017528533935546875, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0046, | |
| "num_tokens": 4373324.0, | |
| "reward": -3.725290298461914e-09, | |
| "reward_std": 1.0682477951049805, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.053201182409366166, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.044798463974146746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07876359377087683, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1459.0, | |
| "completions/mean_length": 1170.5, | |
| "completions/mean_terminated_length": 1020.727294921875, | |
| "completions/min_length": 844.0, | |
| "completions/min_terminated_length": 844.0, | |
| "epoch": 0.10269254852849093, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8578219249339107, | |
| "kl": 0.0019054412841796875, | |
| "learning_rate": 9.999957044004145e-07, | |
| "loss": -0.0353, | |
| "num_tokens": 4419844.0, | |
| "reward": 0.0, | |
| "reward_std": 0.4868781566619873, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1691690312178033, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1856850439917278, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08509254221575908, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1413.0, | |
| "completions/mean_length": 1202.4375, | |
| "completions/mean_terminated_length": 1182.60009765625, | |
| "completions/min_length": 943.0, | |
| "completions/min_terminated_length": 943.0, | |
| "epoch": 0.10394489668127739, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.206094248867889, | |
| "kl": 0.0022640228271484375, | |
| "learning_rate": 9.999828176836682e-07, | |
| "loss": -0.0042, | |
| "num_tokens": 4464763.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 0.9854896068572998, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.11969234946420118, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3068885267137289, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078614, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1490.0, | |
| "completions/mean_length": 1347.25, | |
| "completions/mean_terminated_length": 1228.4444580078125, | |
| "completions/min_length": 872.0, | |
| "completions/min_terminated_length": 872.0, | |
| "epoch": 0.10519724483406387, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.3502401935196273, | |
| "kl": 0.0025177001953125, | |
| "learning_rate": 9.99961340095788e-07, | |
| "loss": -0.0232, | |
| "num_tokens": 4520295.0, | |
| "reward": -7.450580596923828e-09, | |
| "reward_std": 1.0421638488769531, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04940475583906399, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10190244243958202, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12102953419784838, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1485.0, | |
| "completions/mean_length": 1309.6875, | |
| "completions/mean_terminated_length": 1265.769287109375, | |
| "completions/min_length": 859.0, | |
| "completions/min_terminated_length": 859.0, | |
| "epoch": 0.10644959298685035, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.728411933718049, | |
| "kl": 0.001689910888671875, | |
| "learning_rate": 9.99931272046815e-07, | |
| "loss": -0.0142, | |
| "num_tokens": 4576338.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.8622345924377441, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.016984465370970727, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.040579939841277814, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08595864638818418, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1489.0, | |
| "completions/mean_length": 1341.5625, | |
| "completions/mean_terminated_length": 1269.5455322265625, | |
| "completions/min_length": 982.0, | |
| "completions/min_terminated_length": 982.0, | |
| "epoch": 0.10770194113963683, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.200552135647029, | |
| "kl": 0.002315521240234375, | |
| "learning_rate": 9.998926141107945e-07, | |
| "loss": 0.0351, | |
| "num_tokens": 4618667.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.8471476435661316, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.22087111411084098, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24091025740898386, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08153617692869927, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1489.0, | |
| "completions/mean_length": 1394.6875, | |
| "completions/mean_terminated_length": 1259.2857666015625, | |
| "completions/min_length": 1069.0, | |
| "completions/min_terminated_length": 1069.0, | |
| "epoch": 0.10895428929242329, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.177667386837352, | |
| "kl": 0.002468109130859375, | |
| "learning_rate": 9.998453670257666e-07, | |
| "loss": 0.0024, | |
| "num_tokens": 4675550.0, | |
| "reward": 0.0, | |
| "reward_std": 0.3878336548805237, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06448512648276508, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0842294519714606, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12405196043952266, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1363.0, | |
| "completions/mean_length": 1469.6875, | |
| "completions/mean_terminated_length": 1338.3333740234375, | |
| "completions/min_length": 1296.0, | |
| "completions/min_terminated_length": 1296.0, | |
| "epoch": 0.11020663744520977, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.074898456526424, | |
| "kl": 0.00238037109375, | |
| "learning_rate": 9.997895316937517e-07, | |
| "loss": 0.0066, | |
| "num_tokens": 4734649.0, | |
| "reward": -4.470348358154297e-08, | |
| "reward_std": 0.9637711048126221, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -4.470348358154297e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09676546074924117, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06959776462437538, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10886621079036349, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1454.0, | |
| "completions/mean_length": 1303.1875, | |
| "completions/mean_terminated_length": 1237.5833740234375, | |
| "completions/min_length": 1039.0, | |
| "completions/min_terminated_length": 1039.0, | |
| "epoch": 0.11145898559799625, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.3833185192510284, | |
| "kl": 0.001354217529296875, | |
| "learning_rate": 9.997251091807332e-07, | |
| "loss": 0.0171, | |
| "num_tokens": 4789676.0, | |
| "reward": 0.0, | |
| "reward_std": 1.016492486000061, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12777237426683458, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21498123308224262, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11080513425729775, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1348.0, | |
| "completions/mean_length": 1453.0625, | |
| "completions/mean_terminated_length": 1249.666748046875, | |
| "completions/min_length": 1149.0, | |
| "completions/min_terminated_length": 1149.0, | |
| "epoch": 0.11271133375078271, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1689450227648854, | |
| "kl": 0.002933502197265625, | |
| "learning_rate": 9.99652100716637e-07, | |
| "loss": -0.0062, | |
| "num_tokens": 4847781.0, | |
| "reward": 0.0, | |
| "reward_std": 0.64935302734375, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.16229754855451553, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20151739444607794, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6291666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.18373692949230228, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1492.0, | |
| "completions/mean_length": 1450.0, | |
| "completions/mean_terminated_length": 1300.0, | |
| "completions/min_length": 1049.0, | |
| "completions/min_terminated_length": 1049.0, | |
| "epoch": 0.11396368190356919, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0146226006623476, | |
| "kl": 0.002593994140625, | |
| "learning_rate": 9.995705076953075e-07, | |
| "loss": -0.0291, | |
| "num_tokens": 4905421.0, | |
| "reward": 0.0, | |
| "reward_std": 1.0383461713790894, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06052119205813296, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12160618129006116, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09108400680852977, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 910.0, | |
| "completions/mean_length": 1144.0625, | |
| "completions/mean_terminated_length": 788.125, | |
| "completions/min_length": 610.0, | |
| "completions/min_terminated_length": 610.0, | |
| "epoch": 0.11521603005635567, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.228817256723133, | |
| "kl": 0.0014410018920898438, | |
| "learning_rate": 9.994803316744828e-07, | |
| "loss": 0.0105, | |
| "num_tokens": 4950462.0, | |
| "reward": -4.470348358154297e-08, | |
| "reward_std": 0.9390549659729004, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -4.470348358154297e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07564319510568883, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1514996148617109, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15770342536029575, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1471.0, | |
| "completions/max_terminated_length": 1471.0, | |
| "completions/mean_length": 1093.8125, | |
| "completions/mean_terminated_length": 1093.8125, | |
| "completions/min_length": 638.0, | |
| "completions/min_terminated_length": 638.0, | |
| "epoch": 0.11646837820914215, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.6363277397384617, | |
| "kl": 0.002498626708984375, | |
| "learning_rate": 9.993815743757633e-07, | |
| "loss": -0.0484, | |
| "num_tokens": 4983835.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8996579647064209, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0037569304970198007, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07736656048737343, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.2014760347847669, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1494.0, | |
| "completions/mean_length": 1417.75, | |
| "completions/mean_terminated_length": 1335.5, | |
| "completions/min_length": 1111.0, | |
| "completions/min_terminated_length": 1111.0, | |
| "epoch": 0.11772072636192861, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.20990587817039, | |
| "kl": 0.002735137939453125, | |
| "learning_rate": 9.99274237684579e-07, | |
| "loss": 0.004, | |
| "num_tokens": 5030407.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.6368776559829712, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.029770016601004534, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0349532410691535, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10461569884316813, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1347.0, | |
| "completions/max_terminated_length": 1347.0, | |
| "completions/mean_length": 926.8125, | |
| "completions/mean_terminated_length": 926.8125, | |
| "completions/min_length": 631.0, | |
| "completions/min_terminated_length": 631.0, | |
| "epoch": 0.11897307451471509, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7699870834508333, | |
| "kl": 0.0008082389831542969, | |
| "learning_rate": 9.99158323650154e-07, | |
| "loss": -0.0527, | |
| "num_tokens": 5074556.0, | |
| "reward": -3.725290298461914e-09, | |
| "reward_std": 1.0668668746948242, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01722883909028131, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19517428674960768, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0843274042711568, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1393.0, | |
| "completions/mean_length": 1283.5625, | |
| "completions/mean_terminated_length": 1115.2222900390625, | |
| "completions/min_length": 942.0, | |
| "completions/min_terminated_length": 942.0, | |
| "epoch": 0.12022542266750157, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2021886227663034, | |
| "kl": 0.002685546875, | |
| "learning_rate": 9.990338344854676e-07, | |
| "loss": -0.0074, | |
| "num_tokens": 5120597.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.9720104336738586, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.024841432663237503, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.17561297504079998, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08850612031567837, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1486.0, | |
| "completions/mean_length": 1415.3125, | |
| "completions/mean_terminated_length": 1364.5, | |
| "completions/min_length": 1206.0, | |
| "completions/min_terminated_length": 1206.0, | |
| "epoch": 0.12147777082028804, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2246665277704185, | |
| "kl": 0.002559661865234375, | |
| "learning_rate": 9.989007725672113e-07, | |
| "loss": 0.0063, | |
| "num_tokens": 5158170.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.7684129476547241, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.020625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0825, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1586400537905439, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1334.0, | |
| "completions/mean_length": 1175.3125, | |
| "completions/mean_terminated_length": 922.7777709960938, | |
| "completions/min_length": 596.0, | |
| "completions/min_terminated_length": 596.0, | |
| "epoch": 0.12273011897307451, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.169149502657088, | |
| "kl": 0.00231170654296875, | |
| "learning_rate": 9.987591404357437e-07, | |
| "loss": -0.0811, | |
| "num_tokens": 5215647.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9120274782180786, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.005036444545787546, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10234315753446507, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1387777332977422, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1366.0, | |
| "completions/mean_length": 1264.5, | |
| "completions/mean_terminated_length": 1123.2000732421875, | |
| "completions/min_length": 983.0, | |
| "completions/min_terminated_length": 983.0, | |
| "epoch": 0.12398246712586099, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9409598040312765, | |
| "kl": 0.002063751220703125, | |
| "learning_rate": 9.986089407950426e-07, | |
| "loss": -0.0453, | |
| "num_tokens": 5250879.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 1.0199556350708008, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11830339701018143, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.25916185560707883, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 1.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408157, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1437.0, | |
| "completions/mean_length": 1406.9375, | |
| "completions/mean_terminated_length": 1251.8333740234375, | |
| "completions/min_length": 906.0, | |
| "completions/min_terminated_length": 906.0, | |
| "epoch": 0.12523481527864747, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.236489502184281, | |
| "kl": 0.0029754638671875, | |
| "learning_rate": 9.98450176512652e-07, | |
| "loss": 0.0261, | |
| "num_tokens": 5303030.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8868198990821838, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.14501472660672157, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15004116932595393, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1172998689652263, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1468.0, | |
| "completions/mean_length": 1374.0625, | |
| "completions/mean_terminated_length": 1332.0833740234375, | |
| "completions/min_length": 1208.0, | |
| "completions/min_terminated_length": 1208.0, | |
| "epoch": 0.12648716343143393, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7393115321098898, | |
| "kl": 0.0021686553955078125, | |
| "learning_rate": 9.982828506196295e-07, | |
| "loss": 0.0475, | |
| "num_tokens": 5348991.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.744665265083313, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.16115596269847898, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19475646493041288, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07649739768026005, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1472.0, | |
| "completions/mean_length": 1392.25, | |
| "completions/mean_terminated_length": 1284.5, | |
| "completions/min_length": 957.0, | |
| "completions/min_terminated_length": 957.0, | |
| "epoch": 0.12773951158422042, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.465229932517055, | |
| "kl": 0.00170135498046875, | |
| "learning_rate": 9.981069663104853e-07, | |
| "loss": -0.0292, | |
| "num_tokens": 5393291.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.9994131326675415, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.010671914654693294, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.027094219261353553, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194866, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1500.0, | |
| "completions/mean_length": 1388.5625, | |
| "completions/mean_terminated_length": 1277.125, | |
| "completions/min_length": 1062.0, | |
| "completions/min_terminated_length": 1062.0, | |
| "epoch": 0.1289918597370069, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.97886098445916, | |
| "kl": 0.00238800048828125, | |
| "learning_rate": 9.979225269431252e-07, | |
| "loss": 0.0455, | |
| "num_tokens": 5437588.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.0143799781799316, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0018910121903646018, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21804038685357507, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.55, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12292725943057183, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1482.0, | |
| "completions/mean_length": 1159.8125, | |
| "completions/mean_terminated_length": 955.7000122070312, | |
| "completions/min_length": 402.0, | |
| "completions/min_terminated_length": 402.0, | |
| "epoch": 0.13024420788979335, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2070297326725687, | |
| "kl": 0.0024261474609375, | |
| "learning_rate": 9.977295360387827e-07, | |
| "loss": -0.0325, | |
| "num_tokens": 5469273.0, | |
| "reward": 0.0, | |
| "reward_std": 0.848124623298645, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0002889221914715882, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03991849505429317, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.4875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1495053572680653, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1500.0, | |
| "completions/mean_length": 1405.4375, | |
| "completions/mean_terminated_length": 1247.8333740234375, | |
| "completions/min_length": 959.0, | |
| "completions/min_terminated_length": 959.0, | |
| "epoch": 0.13149655604257984, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1693981909983457, | |
| "kl": 0.00269317626953125, | |
| "learning_rate": 9.97527997281954e-07, | |
| "loss": -0.0085, | |
| "num_tokens": 5527744.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.0289491415023804, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07601873282977642, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2329329780235847, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0787635937708768, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1250.0, | |
| "completions/mean_length": 1258.5625, | |
| "completions/mean_terminated_length": 1070.77783203125, | |
| "completions/min_length": 958.0, | |
| "completions/min_terminated_length": 958.0, | |
| "epoch": 0.1327489041953663, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.4731519538264903, | |
| "kl": 0.0015192031860351562, | |
| "learning_rate": 9.973179145203272e-07, | |
| "loss": -0.0122, | |
| "num_tokens": 5571305.0, | |
| "reward": 0.0, | |
| "reward_std": 1.046633243560791, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.027299266065874364, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09683294681842305, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09067647005823631, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327954292297363, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1376.0, | |
| "completions/mean_length": 1361.875, | |
| "completions/mean_terminated_length": 1223.75, | |
| "completions/min_length": 937.0, | |
| "completions/min_terminated_length": 937.0, | |
| "epoch": 0.1340012523481528, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.781242946832243, | |
| "kl": 0.0024871826171875, | |
| "learning_rate": 9.970992917647088e-07, | |
| "loss": -0.0163, | |
| "num_tokens": 5617855.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9318596124649048, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.19798356691808755, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.29651415192877617, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11021863793455328, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1487.0, | |
| "completions/mean_length": 1146.0625, | |
| "completions/mean_terminated_length": 1122.4666748046875, | |
| "completions/min_length": 848.0, | |
| "completions/min_terminated_length": 848.0, | |
| "epoch": 0.13525360050093926, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9997009952780855, | |
| "kl": 0.0022125244140625, | |
| "learning_rate": 9.968721331889465e-07, | |
| "loss": 0.0235, | |
| "num_tokens": 5654992.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 1.0186116695404053, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0558045951815816, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.029030233660680062, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242312, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1390.0, | |
| "completions/mean_length": 1410.4375, | |
| "completions/mean_terminated_length": 1261.166748046875, | |
| "completions/min_length": 1123.0, | |
| "completions/min_terminated_length": 1123.0, | |
| "epoch": 0.13650594865372573, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9682648437410637, | |
| "kl": 0.002681732177734375, | |
| "learning_rate": 9.966364431298509e-07, | |
| "loss": -0.022, | |
| "num_tokens": 5711927.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.0176870822906494, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.26425948065238597, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.28899722395436095, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09428090415820636, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1455.0, | |
| "completions/mean_length": 1212.4375, | |
| "completions/mean_terminated_length": 1081.727294921875, | |
| "completions/min_length": 791.0, | |
| "completions/min_terminated_length": 791.0, | |
| "epoch": 0.13775829680651222, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.323352542220494, | |
| "kl": 0.002532958984375, | |
| "learning_rate": 9.963922260871115e-07, | |
| "loss": -0.0134, | |
| "num_tokens": 5754094.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.9666612148284912, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.051175618061779164, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.039320213077717464, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5833333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14504150108516195, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1486.0, | |
| "completions/mean_length": 1485.25, | |
| "completions/mean_terminated_length": 1421.3333740234375, | |
| "completions/min_length": 1380.0, | |
| "completions/min_terminated_length": 1380.0, | |
| "epoch": 0.13901064495929868, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8360270019245446, | |
| "kl": 0.0024871826171875, | |
| "learning_rate": 9.9613948672321e-07, | |
| "loss": -0.0014, | |
| "num_tokens": 5814162.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.0610442161560059, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.009639880768854782, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045421738289270215, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12292725943057184, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1323.0, | |
| "completions/mean_length": 1434.75, | |
| "completions/mean_terminated_length": 1239.0, | |
| "completions/min_length": 1100.0, | |
| "completions/min_terminated_length": 1100.0, | |
| "epoch": 0.14026299311208515, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.999336260576351, | |
| "kl": 0.0024852752685546875, | |
| "learning_rate": 9.958782298633351e-07, | |
| "loss": -0.0196, | |
| "num_tokens": 5879078.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.7917496562004089, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03413289340922598, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05688585018947227, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1261979632400061, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1290.0, | |
| "completions/mean_length": 1377.4375, | |
| "completions/mean_terminated_length": 1173.166748046875, | |
| "completions/min_length": 998.0, | |
| "completions/min_terminated_length": 998.0, | |
| "epoch": 0.14151534126487164, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.661753098948472, | |
| "kl": 0.0021266937255859375, | |
| "learning_rate": 9.95608460495285e-07, | |
| "loss": -0.0087, | |
| "num_tokens": 5933045.0, | |
| "reward": 2.2351741790771484e-08, | |
| "reward_std": 1.0039006471633911, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05284198848548562, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05437266883758088, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036263, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1187.0, | |
| "completions/mean_length": 1322.25, | |
| "completions/mean_terminated_length": 1093.71435546875, | |
| "completions/min_length": 991.0, | |
| "completions/min_terminated_length": 991.0, | |
| "epoch": 0.1427676894176581, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8780144010263284, | |
| "kl": 0.0020542144775390625, | |
| "learning_rate": 9.953301837693767e-07, | |
| "loss": 0.003, | |
| "num_tokens": 5979113.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.8175742626190186, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.005502994719066203, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06974582191643876, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 1.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0758897836290186, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1488.0, | |
| "completions/mean_length": 1401.1875, | |
| "completions/mean_terminated_length": 1324.3333740234375, | |
| "completions/min_length": 1118.0, | |
| "completions/min_terminated_length": 1118.0, | |
| "epoch": 0.14402003757044457, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.139329899307579, | |
| "kl": 0.002716064453125, | |
| "learning_rate": 9.95043404998345e-07, | |
| "loss": 0.0292, | |
| "num_tokens": 6040452.0, | |
| "reward": 0.0, | |
| "reward_std": 1.0616416931152344, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03748903917915849, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14395002297286164, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1641476300299351, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1452.0, | |
| "completions/mean_length": 1185.375, | |
| "completions/mean_terminated_length": 1140.4285888671875, | |
| "completions/min_length": 804.0, | |
| "completions/min_terminated_length": 804.0, | |
| "epoch": 0.14527238572323106, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.3086990939983667, | |
| "kl": 0.0029144287109375, | |
| "learning_rate": 9.947481296572423e-07, | |
| "loss": -0.014, | |
| "num_tokens": 6090810.0, | |
| "reward": -2.2351741790771484e-08, | |
| "reward_std": 1.0066075325012207, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05014218857813404, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09276403913432626, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12171612389003693, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1433.0, | |
| "completions/mean_length": 1351.9375, | |
| "completions/mean_terminated_length": 1263.0999755859375, | |
| "completions/min_length": 1043.0, | |
| "completions/min_terminated_length": 1043.0, | |
| "epoch": 0.14652473387601753, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.035226656450535, | |
| "kl": 0.002376556396484375, | |
| "learning_rate": 9.944443633833335e-07, | |
| "loss": 0.0179, | |
| "num_tokens": 6148881.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.7348309755325317, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0762897874284947, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12841725021840134, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036262, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1392.0, | |
| "completions/mean_length": 1474.6875, | |
| "completions/mean_terminated_length": 1297.5, | |
| "completions/min_length": 1203.0, | |
| "completions/min_terminated_length": 1203.0, | |
| "epoch": 0.14777708202880402, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9234616863737957, | |
| "kl": 0.0024566650390625, | |
| "learning_rate": 9.94132111975989e-07, | |
| "loss": 0.0031, | |
| "num_tokens": 6213916.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.5194555521011353, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.018562499999999996, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.024749999999999994, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11213417888437974, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1228.0, | |
| "completions/mean_length": 1220.0625, | |
| "completions/mean_terminated_length": 940.125, | |
| "completions/min_length": 820.0, | |
| "completions/min_terminated_length": 820.0, | |
| "epoch": 0.14902943018159048, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7331417657136603, | |
| "kl": 0.002094268798828125, | |
| "learning_rate": 9.93811381396573e-07, | |
| "loss": -0.0031, | |
| "num_tokens": 6257485.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.7746272087097168, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02009986693954008, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07362867807980181, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333336, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1479.0, | |
| "completions/mean_length": 1400.375, | |
| "completions/mean_terminated_length": 1340.5999755859375, | |
| "completions/min_length": 1181.0, | |
| "completions/min_terminated_length": 1181.0, | |
| "epoch": 0.15028177833437695, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.834637825788323, | |
| "kl": 0.003093719482421875, | |
| "learning_rate": 9.934821777683306e-07, | |
| "loss": 0.0269, | |
| "num_tokens": 6319963.0, | |
| "reward": 0.0, | |
| "reward_std": 1.0544224977493286, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.18103321643586406, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14394672405121658, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8083333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036263, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1428.0, | |
| "completions/mean_length": 1325.5, | |
| "completions/mean_terminated_length": 1189.77783203125, | |
| "completions/min_length": 853.0, | |
| "completions/min_terminated_length": 853.0, | |
| "epoch": 0.15153412648716344, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2642836490036453, | |
| "kl": 0.0030364990234375, | |
| "learning_rate": 9.93144507376271e-07, | |
| "loss": -0.005, | |
| "num_tokens": 6385427.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8268899917602539, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1112911236291226, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1569615458099141, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6124999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09803627446568493, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1465.0, | |
| "completions/mean_length": 1305.875, | |
| "completions/mean_terminated_length": 1217.6363525390625, | |
| "completions/min_length": 922.0, | |
| "completions/min_terminated_length": 922.0, | |
| "epoch": 0.1527864746399499, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6641398914857923, | |
| "kl": 0.002033233642578125, | |
| "learning_rate": 9.927983766670462e-07, | |
| "loss": -0.0098, | |
| "num_tokens": 6440177.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.0115642547607422, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06872988161057395, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1025211626906069, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194866, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1338.0, | |
| "completions/mean_length": 1239.6875, | |
| "completions/mean_terminated_length": 1083.5, | |
| "completions/min_length": 886.0, | |
| "completions/min_terminated_length": 886.0, | |
| "epoch": 0.15403882279273637, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9543701078797575, | |
| "kl": 0.0018558502197265625, | |
| "learning_rate": 9.924437922488291e-07, | |
| "loss": 0.0245, | |
| "num_tokens": 6498212.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.6738491654396057, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.038590091343060344, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09510012784467493, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6041666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12524050936172842, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1499.0, | |
| "completions/mean_length": 1403.625, | |
| "completions/mean_terminated_length": 1345.800048828125, | |
| "completions/min_length": 1121.0, | |
| "completions/min_terminated_length": 1121.0, | |
| "epoch": 0.15529117094552286, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.5178341976839556, | |
| "kl": 0.0033111572265625, | |
| "learning_rate": 9.920807608911876e-07, | |
| "loss": 0.0022, | |
| "num_tokens": 6553902.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.8796525597572327, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.013190710670885862, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1480868926971966, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10183501544346313, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 825.0, | |
| "completions/mean_length": 1112.5625, | |
| "completions/mean_terminated_length": 725.125, | |
| "completions/min_length": 613.0, | |
| "completions/min_terminated_length": 613.0, | |
| "epoch": 0.15654351909830932, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.378105432163651, | |
| "kl": 0.0008687973022460938, | |
| "learning_rate": 9.917092895249543e-07, | |
| "loss": -0.0272, | |
| "num_tokens": 6589311.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 0.9441956877708435, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0014329624416098018, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.112902138916422, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12758439472669758, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1404.0, | |
| "completions/mean_length": 1137.0625, | |
| "completions/mean_terminated_length": 1053.3077392578125, | |
| "completions/min_length": 749.0, | |
| "completions/min_terminated_length": 749.0, | |
| "epoch": 0.15779586725109582, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2527212324187826, | |
| "kl": 0.002117156982421875, | |
| "learning_rate": 9.913293852420946e-07, | |
| "loss": -0.0249, | |
| "num_tokens": 6618304.0, | |
| "reward": 2.2351741790771484e-08, | |
| "reward_std": 1.035041093826294, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.007633954846541112, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.032194935573291575, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.103905227473387, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1405.0, | |
| "completions/mean_length": 1462.125, | |
| "completions/mean_terminated_length": 1348.5, | |
| "completions/min_length": 1255.0, | |
| "completions/min_terminated_length": 1255.0, | |
| "epoch": 0.15904821540388228, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.091983329824593, | |
| "kl": 0.00301361083984375, | |
| "learning_rate": 9.909410552955712e-07, | |
| "loss": 0.0155, | |
| "num_tokens": 6681314.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.784981369972229, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12737730164130195, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21747166290242714, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6208333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.093392838174146, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1474.0, | |
| "completions/mean_length": 1378.875, | |
| "completions/mean_terminated_length": 1223.1429443359375, | |
| "completions/min_length": 904.0, | |
| "completions/min_terminated_length": 904.0, | |
| "epoch": 0.16030056355666875, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2503169455658982, | |
| "kl": 0.002620697021484375, | |
| "learning_rate": 9.905443070992068e-07, | |
| "loss": -0.0039, | |
| "num_tokens": 6723448.0, | |
| "reward": -3.3527612686157227e-08, | |
| "reward_std": 1.06490159034729, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -3.3527612686157227e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07877405649297206, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0705921273253386, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1476.0, | |
| "completions/mean_length": 1418.0, | |
| "completions/mean_terminated_length": 1281.3333740234375, | |
| "completions/min_length": 1167.0, | |
| "completions/min_terminated_length": 1167.0, | |
| "epoch": 0.16155291170945524, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.419069025864104, | |
| "kl": 0.003154754638671875, | |
| "learning_rate": 9.901391482275403e-07, | |
| "loss": -0.0084, | |
| "num_tokens": 6774208.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 0.9308052062988281, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06996807244867725, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1266299752409378, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0909822937597079, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1098.0, | |
| "completions/mean_length": 1249.3125, | |
| "completions/mean_terminated_length": 998.625, | |
| "completions/min_length": 929.0, | |
| "completions/min_terminated_length": 929.0, | |
| "epoch": 0.1628052598622417, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.593196585544164, | |
| "kl": 0.001987457275390625, | |
| "learning_rate": 9.897255864156847e-07, | |
| "loss": 0.0036, | |
| "num_tokens": 6807421.0, | |
| "reward": 0.0, | |
| "reward_std": 0.4564354419708252, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0429616858320893, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07600285040401121, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08243965245133134, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1394.0, | |
| "completions/mean_length": 1382.375, | |
| "completions/mean_terminated_length": 1231.1429443359375, | |
| "completions/min_length": 1075.0, | |
| "completions/min_terminated_length": 1075.0, | |
| "epoch": 0.16405760801502817, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0914996686420104, | |
| "kl": 0.002330780029296875, | |
| "learning_rate": 9.893036295591768e-07, | |
| "loss": -0.0116, | |
| "num_tokens": 6866379.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.9815191626548767, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04217953361323695, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06871670933278229, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.102469507659596, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1424.0, | |
| "completions/mean_length": 1347.125, | |
| "completions/mean_terminated_length": 1150.571533203125, | |
| "completions/min_length": 371.0, | |
| "completions/min_terminated_length": 371.0, | |
| "epoch": 0.16530995616781466, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.001502145266533, | |
| "kl": 0.0022430419921875, | |
| "learning_rate": 9.888732857138291e-07, | |
| "loss": -0.04, | |
| "num_tokens": 6912533.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8428164720535278, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.020130872057838745, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04873657297962695, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11417984514369003, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 882.0, | |
| "completions/mean_length": 1177.75, | |
| "completions/mean_terminated_length": 855.5, | |
| "completions/min_length": 795.0, | |
| "completions/min_terminated_length": 795.0, | |
| "epoch": 0.16656230432060112, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.849369584207102, | |
| "kl": 0.00208282470703125, | |
| "learning_rate": 9.884345630955742e-07, | |
| "loss": -0.0097, | |
| "num_tokens": 6966273.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 1.0472090244293213, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08003635148497827, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09874522821696813, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09067647005823629, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1491.0, | |
| "completions/mean_length": 1488.0, | |
| "completions/mean_terminated_length": 1404.0, | |
| "completions/min_length": 1317.0, | |
| "completions/min_terminated_length": 1317.0, | |
| "epoch": 0.16781465247338762, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.688722282572705, | |
| "kl": 0.0026092529296875, | |
| "learning_rate": 9.879874700803082e-07, | |
| "loss": 0.0158, | |
| "num_tokens": 7027657.0, | |
| "reward": 3.166496753692627e-08, | |
| "reward_std": 1.0543937683105469, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 3.166496753692627e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11540214745824308, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.23102363071615145, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042256, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1312.0, | |
| "completions/mean_length": 1146.375, | |
| "completions/mean_terminated_length": 1064.769287109375, | |
| "completions/min_length": 858.0, | |
| "completions/min_terminated_length": 858.0, | |
| "epoch": 0.16906700062617408, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.664759257998968, | |
| "kl": 0.0027923583984375, | |
| "learning_rate": 9.875320152037318e-07, | |
| "loss": -0.0535, | |
| "num_tokens": 7084095.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.8985534906387329, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.23834962043700852, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.27030996076033054, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11417984514369006, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1237.0, | |
| "completions/mean_length": 1274.875, | |
| "completions/mean_terminated_length": 1049.75, | |
| "completions/min_length": 931.0, | |
| "completions/min_terminated_length": 931.0, | |
| "epoch": 0.17031934877896054, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.4849191974864953, | |
| "kl": 0.0015964508056640625, | |
| "learning_rate": 9.870682071611862e-07, | |
| "loss": 0.0064, | |
| "num_tokens": 7133293.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6105766892433167, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02324001170505371, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08829030406958045, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11013459778666118, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1462.0, | |
| "completions/max_terminated_length": 1462.0, | |
| "completions/mean_length": 955.6875, | |
| "completions/mean_terminated_length": 955.6875, | |
| "completions/min_length": 761.0, | |
| "completions/min_terminated_length": 761.0, | |
| "epoch": 0.17157169693174704, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.620633996964256, | |
| "kl": 0.0014123916625976562, | |
| "learning_rate": 9.865960548074874e-07, | |
| "loss": 0.0103, | |
| "num_tokens": 7187688.0, | |
| "reward": 5.960464477539063e-08, | |
| "reward_std": 0.6596803069114685, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 5.960464477539063e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0038102094327885448, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12227248665731598, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194865, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1478.0, | |
| "completions/mean_length": 1322.0, | |
| "completions/mean_terminated_length": 1093.1429443359375, | |
| "completions/min_length": 733.0, | |
| "completions/min_terminated_length": 733.0, | |
| "epoch": 0.1728240450845335, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8032617883799893, | |
| "kl": 0.0021915435791015625, | |
| "learning_rate": 9.861155671567572e-07, | |
| "loss": 0.0513, | |
| "num_tokens": 7236832.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.938301682472229, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06466602322499601, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05158824252677371, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5666666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09428090415820632, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1436.0, | |
| "completions/mean_length": 1335.8125, | |
| "completions/mean_terminated_length": 1237.300048828125, | |
| "completions/min_length": 944.0, | |
| "completions/min_terminated_length": 944.0, | |
| "epoch": 0.17407639323731997, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.6291703245010103, | |
| "kl": 0.00284576416015625, | |
| "learning_rate": 9.856267533822519e-07, | |
| "loss": -0.021, | |
| "num_tokens": 7293301.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7662729024887085, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.08970693607829759, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16534434492549577, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1970147578604578, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1340.0, | |
| "completions/mean_length": 1120.8125, | |
| "completions/mean_terminated_length": 994.4166870117188, | |
| "completions/min_length": 844.0, | |
| "completions/min_terminated_length": 844.0, | |
| "epoch": 0.17532874139010646, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6820256510644604, | |
| "kl": 0.0022602081298828125, | |
| "learning_rate": 9.851296228161857e-07, | |
| "loss": 0.019, | |
| "num_tokens": 7341130.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8969849348068237, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.026873742767844065, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0656536955300479, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1060223596263578, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1404.0, | |
| "completions/mean_length": 1399.75, | |
| "completions/mean_terminated_length": 1232.666748046875, | |
| "completions/min_length": 936.0, | |
| "completions/min_terminated_length": 936.0, | |
| "epoch": 0.17658108954289292, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.372829326628183, | |
| "kl": 0.003108978271484375, | |
| "learning_rate": 9.846241849495535e-07, | |
| "loss": 0.0153, | |
| "num_tokens": 7410982.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 0.9778778553009033, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05033218082218886, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.025136378125956142, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13333333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1488.0, | |
| "completions/mean_length": 1449.5625, | |
| "completions/mean_terminated_length": 1399.125, | |
| "completions/min_length": 1240.0, | |
| "completions/min_terminated_length": 1240.0, | |
| "epoch": 0.17783343769567939, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.396651196247218, | |
| "kl": 0.001613616943359375, | |
| "learning_rate": 9.841104494319492e-07, | |
| "loss": -0.0053, | |
| "num_tokens": 7468879.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6796972155570984, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.021608644866332537, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11519923314511032, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0739118594202782, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1458.0, | |
| "completions/mean_length": 1434.4375, | |
| "completions/mean_terminated_length": 1350.1429443359375, | |
| "completions/min_length": 1215.0, | |
| "completions/min_terminated_length": 1215.0, | |
| "epoch": 0.17908578584846588, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6795502864911453, | |
| "kl": 0.00296783447265625, | |
| "learning_rate": 9.835884260713826e-07, | |
| "loss": 0.0053, | |
| "num_tokens": 7526334.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 0.9401005506515503, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05878136743445916, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15259208491300538, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09583937179043481, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1271.0, | |
| "completions/mean_length": 1081.1875, | |
| "completions/mean_terminated_length": 1021.357177734375, | |
| "completions/min_length": 760.0, | |
| "completions/min_terminated_length": 760.0, | |
| "epoch": 0.18033813400125234, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.4443025787306487, | |
| "kl": 0.0014491081237792969, | |
| "learning_rate": 9.830581248340904e-07, | |
| "loss": 0.0523, | |
| "num_tokens": 7560449.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6386822462081909, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.030072721096349574, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07933031547923879, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07490735018081408, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1438.0, | |
| "completions/mean_length": 1464.875, | |
| "completions/mean_terminated_length": 1312.666748046875, | |
| "completions/min_length": 1143.0, | |
| "completions/min_terminated_length": 1143.0, | |
| "epoch": 0.18159048215403883, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.281062477288285, | |
| "kl": 0.00322723388671875, | |
| "learning_rate": 9.82519555844347e-07, | |
| "loss": 0.0292, | |
| "num_tokens": 7621295.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8289343118667603, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12500933186269494, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10441096539901965, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07097208632298363, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1440.0, | |
| "completions/mean_length": 1436.875, | |
| "completions/mean_terminated_length": 1298.0, | |
| "completions/min_length": 1137.0, | |
| "completions/min_terminated_length": 1137.0, | |
| "epoch": 0.1828428303068253, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.852746135375484, | |
| "kl": 0.0022106170654296875, | |
| "learning_rate": 9.819727293842715e-07, | |
| "loss": -0.0099, | |
| "num_tokens": 7663125.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.9735676646232605, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.008435227123041298, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08786012223776958, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1299572579307862, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1465.0, | |
| "completions/mean_length": 1447.0625, | |
| "completions/mean_terminated_length": 1288.25, | |
| "completions/min_length": 1028.0, | |
| "completions/min_terminated_length": 1028.0, | |
| "epoch": 0.18409517845961176, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7900001366496268, | |
| "kl": 0.002574920654296875, | |
| "learning_rate": 9.814176558936306e-07, | |
| "loss": 0.0107, | |
| "num_tokens": 7727518.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.3397839367389679, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09743503994599206, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16748018946124937, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09953596037316068, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 863.0, | |
| "completions/mean_length": 1135.0625, | |
| "completions/mean_terminated_length": 770.125, | |
| "completions/min_length": 571.0, | |
| "completions/min_terminated_length": 571.0, | |
| "epoch": 0.18534752661239826, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.560665055023378, | |
| "kl": 0.00197601318359375, | |
| "learning_rate": 9.808543459696394e-07, | |
| "loss": -0.0149, | |
| "num_tokens": 7771327.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9778045415878296, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.20066201620356428, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3214780108822807, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0938872452190116, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1456.0, | |
| "completions/mean_length": 1307.375, | |
| "completions/mean_terminated_length": 1157.5555419921875, | |
| "completions/min_length": 336.0, | |
| "completions/min_terminated_length": 336.0, | |
| "epoch": 0.18659987476518472, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.698308959281013, | |
| "kl": 0.003589630126953125, | |
| "learning_rate": 9.802828103667598e-07, | |
| "loss": 0.0049, | |
| "num_tokens": 7824917.0, | |
| "reward": -9.313225746154785e-09, | |
| "reward_std": 0.929603099822998, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -9.313225746154785e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0017376960374372932, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03411053398366144, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1954576775256058, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1390.0, | |
| "completions/mean_length": 1350.8125, | |
| "completions/mean_terminated_length": 1234.77783203125, | |
| "completions/min_length": 897.0, | |
| "completions/min_terminated_length": 897.0, | |
| "epoch": 0.18785222291797118, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.37227831535049, | |
| "kl": 0.003437042236328125, | |
| "learning_rate": 9.797030599964946e-07, | |
| "loss": -0.0282, | |
| "num_tokens": 7879658.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.6427962779998779, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0803417321639054, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11525098223680169, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1253144193766372, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1500.0, | |
| "completions/mean_length": 1446.0625, | |
| "completions/mean_terminated_length": 1327.4000244140625, | |
| "completions/min_length": 1075.0, | |
| "completions/min_terminated_length": 1075.0, | |
| "epoch": 0.18910457107075768, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6387419512111028, | |
| "kl": 0.002285003662109375, | |
| "learning_rate": 9.791151059271787e-07, | |
| "loss": -0.0106, | |
| "num_tokens": 7927819.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9979233145713806, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06657694240337725, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20181152584757237, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194862, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1493.0, | |
| "completions/mean_length": 1422.1875, | |
| "completions/mean_terminated_length": 1344.375, | |
| "completions/min_length": 1078.0, | |
| "completions/min_terminated_length": 1078.0, | |
| "epoch": 0.19035691922354414, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8499814649815485, | |
| "kl": 0.00269317626953125, | |
| "learning_rate": 9.78518959383769e-07, | |
| "loss": -0.0267, | |
| "num_tokens": 7979030.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.6457971334457397, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.032289559957375875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03678022720872768, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12881223774390613, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 0.0, | |
| "epoch": 0.19160926737633063, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7439239666738633, | |
| "kl": 0.00275421142578125, | |
| "learning_rate": 9.779146317476294e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 8039006.0, | |
| "reward": -1.1175870895385742e-08, | |
| "reward_std": 1.0521876811981201, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.1175870895385742e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09760563861386369, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10390475856290554, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05692750425533111, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1500.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 1500.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 1500.0, | |
| "epoch": 0.1928616155291171, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0303320592221237, | |
| "kl": 0.003154754638671875, | |
| "learning_rate": 9.773021345563133e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 8103454.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.0458917617797852, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.25916260149601344, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.18093382728997642, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746357, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1470.0, | |
| "completions/mean_length": 1404.1875, | |
| "completions/mean_terminated_length": 1346.7000732421875, | |
| "completions/min_length": 1252.0, | |
| "completions/min_terminated_length": 1252.0, | |
| "epoch": 0.19411396368190356, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7184034168824684, | |
| "kl": 0.002166748046875, | |
| "learning_rate": 9.766814795033438e-07, | |
| "loss": 0.0074, | |
| "num_tokens": 8157473.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.9921345710754395, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05048016331986036, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1222984521625515, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07187952884282611, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1361.0, | |
| "completions/mean_length": 1382.875, | |
| "completions/mean_terminated_length": 1031.5, | |
| "completions/min_length": 853.0, | |
| "completions/min_terminated_length": 853.0, | |
| "epoch": 0.19536631183469005, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 1.7493199389788998, | |
| "kl": 0.002361297607421875, | |
| "learning_rate": 9.7605267843799e-07, | |
| "loss": -0.0294, | |
| "num_tokens": 8204367.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.7406100630760193, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.4396175531814227, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.42217210131772864, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09098229375970789, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1403.0, | |
| "completions/mean_length": 1252.6875, | |
| "completions/mean_terminated_length": 1104.300048828125, | |
| "completions/min_length": 780.0, | |
| "completions/min_terminated_length": 780.0, | |
| "epoch": 0.19661865998747652, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1812238222319373, | |
| "kl": 0.002895355224609375, | |
| "learning_rate": 9.754157433650416e-07, | |
| "loss": 0.0099, | |
| "num_tokens": 8250426.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.7503967881202698, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09038614901064657, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10393207102574, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14580555290954889, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1470.0, | |
| "completions/mean_length": 1485.9375, | |
| "completions/mean_terminated_length": 1425.0, | |
| "completions/min_length": 1367.0, | |
| "completions/min_terminated_length": 1367.0, | |
| "epoch": 0.19787100814026298, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.274381341557373, | |
| "kl": 0.0019855499267578125, | |
| "learning_rate": 9.74770686444578e-07, | |
| "loss": -0.0039, | |
| "num_tokens": 8312649.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 1.0463612079620361, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01854492153050523, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07355929227115507, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7124999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1296.0, | |
| "completions/mean_length": 1468.6875, | |
| "completions/mean_terminated_length": 1249.5, | |
| "completions/min_length": 1203.0, | |
| "completions/min_terminated_length": 1203.0, | |
| "epoch": 0.19912335629304947, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.037789984063529, | |
| "kl": 0.00295257568359375, | |
| "learning_rate": 9.74117519991739e-07, | |
| "loss": 0.0195, | |
| "num_tokens": 8372460.0, | |
| "reward": 5.960464477539063e-08, | |
| "reward_std": 0.6518849730491638, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 5.960464477539063e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010805361779511215, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10954072593469087, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12041594578792295, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1353.0, | |
| "completions/mean_length": 1422.5625, | |
| "completions/mean_terminated_length": 1293.5, | |
| "completions/min_length": 1240.0, | |
| "completions/min_terminated_length": 1240.0, | |
| "epoch": 0.20037570444583594, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.4914861954275853, | |
| "kl": 0.0020751953125, | |
| "learning_rate": 9.734562564764863e-07, | |
| "loss": -0.0084, | |
| "num_tokens": 8441477.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.0050157308578491, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.20082839440532127, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24021378555176306, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08073734277593311, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1444.0, | |
| "completions/mean_length": 1496.5, | |
| "completions/mean_terminated_length": 1444.0, | |
| "completions/min_length": 1444.0, | |
| "completions/min_terminated_length": 1444.0, | |
| "epoch": 0.2016280525986224, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.96733321823591, | |
| "kl": 0.003032684326171875, | |
| "learning_rate": 9.727869085233683e-07, | |
| "loss": 0.0008, | |
| "num_tokens": 8500525.0, | |
| "reward": 0.0, | |
| "reward_std": 1.0511749982833862, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.30725599890646893, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11503663852918616, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333336, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1417.0, | |
| "completions/mean_length": 1405.9375, | |
| "completions/mean_terminated_length": 1199.0, | |
| "completions/min_length": 992.0, | |
| "completions/min_terminated_length": 992.0, | |
| "epoch": 0.2028804007514089, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1286840232588795, | |
| "kl": 0.003131866455078125, | |
| "learning_rate": 9.721094889112769e-07, | |
| "loss": -0.0017, | |
| "num_tokens": 8561668.0, | |
| "reward": 0.0, | |
| "reward_std": 1.0658842325210571, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.3530029462031852, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3684803684710799, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10036968702787749, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1488.0, | |
| "completions/mean_length": 1408.4375, | |
| "completions/mean_terminated_length": 1290.71435546875, | |
| "completions/min_length": 1074.0, | |
| "completions/min_terminated_length": 1074.0, | |
| "epoch": 0.20413274890419536, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9421524856257575, | |
| "kl": 0.003437042236328125, | |
| "learning_rate": 9.714240105732056e-07, | |
| "loss": -0.0217, | |
| "num_tokens": 8611395.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.8545268774032593, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.004406093333840853, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07521193600811737, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11385500851066223, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1406.0, | |
| "completions/mean_length": 1383.4375, | |
| "completions/mean_terminated_length": 878.3333740234375, | |
| "completions/min_length": 209.0, | |
| "completions/min_terminated_length": 209.0, | |
| "epoch": 0.20538509705698185, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8733824694411965, | |
| "kl": 0.002685546875, | |
| "learning_rate": 9.707304865960003e-07, | |
| "loss": 0.0086, | |
| "num_tokens": 8668282.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 1.019072413444519, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09355282337201007, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11627823063016991, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12619796324000607, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1349.0, | |
| "completions/mean_length": 1461.0625, | |
| "completions/mean_terminated_length": 1292.3333740234375, | |
| "completions/min_length": 1182.0, | |
| "completions/min_terminated_length": 1182.0, | |
| "epoch": 0.20663744520976832, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0251334252111324, | |
| "kl": 0.00315093994140625, | |
| "learning_rate": 9.700289302201118e-07, | |
| "loss": -0.0054, | |
| "num_tokens": 8726843.0, | |
| "reward": 2.2351741790771484e-08, | |
| "reward_std": 0.9717680215835571, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03285324398900216, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1305907322232915, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921946, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1492.0, | |
| "completions/mean_length": 1479.9375, | |
| "completions/mean_terminated_length": 1339.5, | |
| "completions/min_length": 1187.0, | |
| "completions/min_terminated_length": 1187.0, | |
| "epoch": 0.20788979336255478, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1164633460783593, | |
| "kl": 0.00360107421875, | |
| "learning_rate": 9.69319354839341e-07, | |
| "loss": -0.01, | |
| "num_tokens": 8774074.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6172374486923218, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010136480012205995, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05376440319397317, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10741060020797316, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1453.0, | |
| "completions/mean_length": 1443.6875, | |
| "completions/mean_terminated_length": 1371.2857666015625, | |
| "completions/min_length": 1221.0, | |
| "completions/min_terminated_length": 1221.0, | |
| "epoch": 0.20914214151534127, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8681254932066893, | |
| "kl": 0.003143310546875, | |
| "learning_rate": 9.686017740005845e-07, | |
| "loss": -0.0029, | |
| "num_tokens": 8833421.0, | |
| "reward": 0.0, | |
| "reward_std": 1.049817442893982, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.23814174262345672, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24595173419132288, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10470416879457554, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1483.0, | |
| "completions/mean_length": 1498.9375, | |
| "completions/mean_terminated_length": 1483.0, | |
| "completions/min_length": 1483.0, | |
| "completions/min_terminated_length": 1483.0, | |
| "epoch": 0.21039448966812774, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.699748602413328, | |
| "kl": 0.002422332763671875, | |
| "learning_rate": 9.678762014035755e-07, | |
| "loss": 0.001, | |
| "num_tokens": 8896332.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.8306390047073364, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.01676756574749607, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03592053954406261, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078614, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1481.0, | |
| "completions/mean_length": 1463.25, | |
| "completions/mean_terminated_length": 1304.0, | |
| "completions/min_length": 1117.0, | |
| "completions/min_terminated_length": 1117.0, | |
| "epoch": 0.2116468378209142, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9281066984860664, | |
| "kl": 0.0030670166015625, | |
| "learning_rate": 9.67142650900622e-07, | |
| "loss": 0.0284, | |
| "num_tokens": 8960800.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.051703691482544, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02864644527108891, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12265684181148895, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460884, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1385.0, | |
| "completions/mean_length": 1492.8125, | |
| "completions/mean_terminated_length": 1385.0, | |
| "completions/min_length": 1385.0, | |
| "completions/min_terminated_length": 1385.0, | |
| "epoch": 0.2128991859737007, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.719524959962169, | |
| "kl": 0.002513885498046875, | |
| "learning_rate": 9.664011364963427e-07, | |
| "loss": -0.0014, | |
| "num_tokens": 9014901.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6419066190719604, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.026585625959977408, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05774533640389131, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6416666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901158, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1467.0, | |
| "completions/mean_length": 1398.875, | |
| "completions/mean_terminated_length": 1268.857177734375, | |
| "completions/min_length": 977.0, | |
| "completions/min_terminated_length": 977.0, | |
| "epoch": 0.21415153412648716, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2711757628292637, | |
| "kl": 0.003734588623046875, | |
| "learning_rate": 9.656516723474003e-07, | |
| "loss": 0.0199, | |
| "num_tokens": 9082635.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.7075515985488892, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04754440907840732, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19258567827157586, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11155467020454342, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1500.0, | |
| "completions/mean_length": 1372.0, | |
| "completions/mean_terminated_length": 1244.0, | |
| "completions/min_length": 414.0, | |
| "completions/min_terminated_length": 414.0, | |
| "epoch": 0.21540388227927365, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.130031481410935, | |
| "kl": 0.003208160400390625, | |
| "learning_rate": 9.648942727622293e-07, | |
| "loss": -0.0004, | |
| "num_tokens": 9139131.0, | |
| "reward": 4.470348358154297e-08, | |
| "reward_std": 0.8231313824653625, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 4.470348358154297e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.15172830287547154, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10156016936265624, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.825, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11894598836509011, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1441.0, | |
| "completions/mean_length": 1439.125, | |
| "completions/mean_terminated_length": 1337.666748046875, | |
| "completions/min_length": 1231.0, | |
| "completions/min_terminated_length": 1231.0, | |
| "epoch": 0.21665623043206012, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8174656495384003, | |
| "kl": 0.003330230712890625, | |
| "learning_rate": 9.641289522007648e-07, | |
| "loss": 0.0184, | |
| "num_tokens": 9189589.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.9120515584945679, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.08079485341203167, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.404800820644525, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.059004080210452274, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1436.0, | |
| "completions/mean_length": 1416.4375, | |
| "completions/mean_terminated_length": 1232.5999755859375, | |
| "completions/min_length": 961.0, | |
| "completions/min_terminated_length": 961.0, | |
| "epoch": 0.21790857858484658, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.883462614618569, | |
| "kl": 0.003101348876953125, | |
| "learning_rate": 9.633557252741655e-07, | |
| "loss": -0.0209, | |
| "num_tokens": 9242428.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.7592308521270752, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09250187361454984, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24709362304891008, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1630723538573985, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1310.0, | |
| "completions/mean_length": 1153.5625, | |
| "completions/mean_terminated_length": 884.1111450195312, | |
| "completions/min_length": 704.0, | |
| "completions/min_terminated_length": 704.0, | |
| "epoch": 0.21916092673763307, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.067205627199215, | |
| "kl": 0.003124237060546875, | |
| "learning_rate": 9.625746067445344e-07, | |
| "loss": 0.0267, | |
| "num_tokens": 9286885.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8734534978866577, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0040830023789233914, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.007219286680192259, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242308, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1475.0, | |
| "completions/mean_length": 1478.625, | |
| "completions/mean_terminated_length": 1329.0, | |
| "completions/min_length": 1183.0, | |
| "completions/min_terminated_length": 1183.0, | |
| "epoch": 0.22041327489041954, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1276319460691004, | |
| "kl": 0.003200531005859375, | |
| "learning_rate": 9.61785611524638e-07, | |
| "loss": -0.0146, | |
| "num_tokens": 9345695.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7759820222854614, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.008401002667427777, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08187937939788012, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11547005383792516, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1363.0, | |
| "completions/mean_length": 1251.3125, | |
| "completions/mean_terminated_length": 1215.7857666015625, | |
| "completions/min_length": 1017.0, | |
| "completions/min_terminated_length": 1017.0, | |
| "epoch": 0.221665623043206, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6069318364125738, | |
| "kl": 0.0021648406982421875, | |
| "learning_rate": 9.609887546776213e-07, | |
| "loss": -0.0061, | |
| "num_tokens": 9382804.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8300349712371826, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04134925667146179, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05624626120552443, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07698003589195014, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1424.0, | |
| "completions/mean_length": 1388.0, | |
| "completions/mean_terminated_length": 1244.0, | |
| "completions/min_length": 998.0, | |
| "completions/min_terminated_length": 998.0, | |
| "epoch": 0.2229179711959925, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.156831352479139, | |
| "kl": 0.00347137451171875, | |
| "learning_rate": 9.601840514167194e-07, | |
| "loss": -0.0001, | |
| "num_tokens": 9443532.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9561296701431274, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02300302439349743, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06503983162022253, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.775, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13305526559931294, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1495.0, | |
| "completions/mean_length": 1296.75, | |
| "completions/mean_terminated_length": 1204.3636474609375, | |
| "completions/min_length": 963.0, | |
| "completions/min_terminated_length": 963.0, | |
| "epoch": 0.22417031934877896, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2566872824431337, | |
| "kl": 0.003185272216796875, | |
| "learning_rate": 9.593715171049677e-07, | |
| "loss": -0.0019, | |
| "num_tokens": 9493936.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.9979840517044067, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04123772744400983, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.055545285602727666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5708333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921942, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1482.0, | |
| "completions/mean_length": 1462.0625, | |
| "completions/mean_terminated_length": 1348.25, | |
| "completions/min_length": 1185.0, | |
| "completions/min_terminated_length": 1185.0, | |
| "epoch": 0.22542266750156542, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.118655152417983, | |
| "kl": 0.003711700439453125, | |
| "learning_rate": 9.585511672549087e-07, | |
| "loss": -0.0119, | |
| "num_tokens": 9547913.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6055276393890381, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.2990419990496254, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.5212506601592531, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11547005383792518, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1483.0, | |
| "completions/mean_length": 1253.875, | |
| "completions/mean_terminated_length": 1062.4444580078125, | |
| "completions/min_length": 742.0, | |
| "completions/min_terminated_length": 742.0, | |
| "epoch": 0.2266750156543519, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.453588710121719, | |
| "kl": 0.003208160400390625, | |
| "learning_rate": 9.577230175282956e-07, | |
| "loss": -0.0189, | |
| "num_tokens": 9590383.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 1.026740550994873, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.22982623849797099, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.35491751307206737, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194866, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1431.0, | |
| "completions/mean_length": 1397.3125, | |
| "completions/mean_terminated_length": 1294.625, | |
| "completions/min_length": 1209.0, | |
| "completions/min_terminated_length": 1209.0, | |
| "epoch": 0.22792736380713838, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8117171390856717, | |
| "kl": 0.00273895263671875, | |
| "learning_rate": 9.568870837357933e-07, | |
| "loss": 0.0049, | |
| "num_tokens": 9635180.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9024027585983276, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.015236533423952495, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05515104905405319, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1270024788326182, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1479.0, | |
| "completions/mean_length": 1389.4375, | |
| "completions/mean_terminated_length": 1247.2857666015625, | |
| "completions/min_length": 1029.0, | |
| "completions/min_terminated_length": 1029.0, | |
| "epoch": 0.22917971195992487, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.146143558726643, | |
| "kl": 0.003173828125, | |
| "learning_rate": 9.56043381836677e-07, | |
| "loss": 0.0244, | |
| "num_tokens": 9691707.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.6937527656555176, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06475936323780643, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07867382027532054, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07781745019952505, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1496.0, | |
| "completions/mean_length": 1331.75, | |
| "completions/mean_terminated_length": 1275.666748046875, | |
| "completions/min_length": 857.0, | |
| "completions/min_terminated_length": 857.0, | |
| "epoch": 0.23043206011271133, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.445265358440665, | |
| "kl": 0.003719329833984375, | |
| "learning_rate": 9.551919279385267e-07, | |
| "loss": 0.0321, | |
| "num_tokens": 9741247.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 0.9354739785194397, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011286604414356131, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06370003732540648, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09878896324620107, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1381.0, | |
| "completions/mean_length": 1312.6875, | |
| "completions/mean_terminated_length": 1269.4615478515625, | |
| "completions/min_length": 994.0, | |
| "completions/min_terminated_length": 994.0, | |
| "epoch": 0.2316844082654978, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2176769971519983, | |
| "kl": 0.0028533935546875, | |
| "learning_rate": 9.543327382969203e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 9800986.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8514897227287292, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06189917187460071, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09461702207527528, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09067647005823631, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1437.0, | |
| "completions/max_terminated_length": 1437.0, | |
| "completions/mean_length": 1251.9375, | |
| "completions/mean_terminated_length": 1251.9375, | |
| "completions/min_length": 1139.0, | |
| "completions/min_terminated_length": 1139.0, | |
| "epoch": 0.2329367564182843, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 1.8078703137230523, | |
| "kl": 0.0009489059448242188, | |
| "learning_rate": 9.534658293151226e-07, | |
| "loss": 0.0206, | |
| "num_tokens": 9844961.0, | |
| "reward": -2.2351741790771484e-08, | |
| "reward_std": 1.0031490325927734, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1837486103073024, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2121586351571871, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.675, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11642832797715322, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 0.0, | |
| "epoch": 0.23418910457107076, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.341222957754127, | |
| "kl": 0.0020427703857421875, | |
| "learning_rate": 9.525912175437733e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 9904889.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8993015289306641, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0626094048175301, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14524912930313416, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1102186379345533, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1459.0, | |
| "completions/mean_length": 1401.75, | |
| "completions/mean_terminated_length": 1303.5, | |
| "completions/min_length": 946.0, | |
| "completions/min_terminated_length": 946.0, | |
| "epoch": 0.23544145272385722, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7109926581212966, | |
| "kl": 0.0028514862060546875, | |
| "learning_rate": 9.5170891968057e-07, | |
| "loss": 0.0103, | |
| "num_tokens": 9960061.0, | |
| "reward": 0.0, | |
| "reward_std": 0.4977339506149292, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.007690022648520695, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11369344635650466, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11279282877125754, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1497.0, | |
| "completions/mean_length": 1399.3125, | |
| "completions/mean_terminated_length": 1321.0, | |
| "completions/min_length": 1270.0, | |
| "completions/min_terminated_length": 1270.0, | |
| "epoch": 0.2366938008766437, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.2019750861669105, | |
| "kl": 0.0019474029541015625, | |
| "learning_rate": 9.508189525699498e-07, | |
| "loss": 0.0016, | |
| "num_tokens": 10018474.0, | |
| "reward": 0.0, | |
| "reward_std": 0.40811485052108765, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09626746004308685, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11868608664564458, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8416666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11385500851066221, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1067.0, | |
| "completions/mean_length": 1176.1875, | |
| "completions/mean_terminated_length": 852.375, | |
| "completions/min_length": 641.0, | |
| "completions/min_terminated_length": 641.0, | |
| "epoch": 0.23794614902943018, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0536777064675973, | |
| "kl": 0.00238037109375, | |
| "learning_rate": 9.499213332027676e-07, | |
| "loss": -0.0079, | |
| "num_tokens": 10055509.0, | |
| "reward": -5.960464477539063e-08, | |
| "reward_std": 0.5494594573974609, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09604975311367514, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10254148239725733, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07084150279686702, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1421.0, | |
| "completions/max_terminated_length": 1421.0, | |
| "completions/mean_length": 980.5, | |
| "completions/mean_terminated_length": 980.5, | |
| "completions/min_length": 598.0, | |
| "completions/min_terminated_length": 598.0, | |
| "epoch": 0.23919849718221667, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.5775077184845583, | |
| "kl": 0.003173828125, | |
| "learning_rate": 9.490160787159716e-07, | |
| "loss": -0.0435, | |
| "num_tokens": 10088493.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7993010878562927, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.017021331786918385, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08925782815695868, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5791666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10809803506625447, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1491.0, | |
| "completions/mean_length": 1205.125, | |
| "completions/mean_terminated_length": 1028.2000732421875, | |
| "completions/min_length": 700.0, | |
| "completions/min_terminated_length": 700.0, | |
| "epoch": 0.24045084533500313, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.472937551782796, | |
| "kl": 0.003765106201171875, | |
| "learning_rate": 9.481032063922764e-07, | |
| "loss": 0.0801, | |
| "num_tokens": 10134447.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9049590826034546, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.028540941769550358, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.047797358383350766, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10183501544346314, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1462.0, | |
| "completions/mean_length": 1342.1875, | |
| "completions/mean_terminated_length": 1219.4444580078125, | |
| "completions/min_length": 823.0, | |
| "completions/min_terminated_length": 823.0, | |
| "epoch": 0.2417031934877896, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0654183912224107, | |
| "kl": 0.00345611572265625, | |
| "learning_rate": 9.471827336598332e-07, | |
| "loss": -0.0116, | |
| "num_tokens": 10182434.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.894692599773407, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.10903944916375954, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15533453332102554, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06540472290116196, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 0.0, | |
| "epoch": 0.2429555416405761, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7779664849113415, | |
| "kl": 0.003116607666015625, | |
| "learning_rate": 9.462546780918966e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 10244530.0, | |
| "reward": 0.0, | |
| "reward_std": 0.967013955116272, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.053086024723834134, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06887877561253418, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07888106377466157, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1470.0, | |
| "completions/mean_length": 1454.9375, | |
| "completions/mean_terminated_length": 1397.0001220703125, | |
| "completions/min_length": 1265.0, | |
| "completions/min_terminated_length": 1265.0, | |
| "epoch": 0.24420788979336255, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1642061803468913, | |
| "kl": 0.003826141357421875, | |
| "learning_rate": 9.453190574064893e-07, | |
| "loss": -0.0047, | |
| "num_tokens": 10299345.0, | |
| "reward": 1.862645149230957e-08, | |
| "reward_std": 1.04762601852417, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.862645149230957e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04354357070732585, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08903133853741613, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08777074514725114, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1492.0, | |
| "completions/mean_length": 1337.0625, | |
| "completions/mean_terminated_length": 1282.75, | |
| "completions/min_length": 999.0, | |
| "completions/min_terminated_length": 999.0, | |
| "epoch": 0.24546023794614902, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0019183290119758, | |
| "kl": 0.003467559814453125, | |
| "learning_rate": 9.443758894660638e-07, | |
| "loss": 0.0284, | |
| "num_tokens": 10358514.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6315692067146301, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07697389081957594, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12645427286420413, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10671873729054746, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1413.0, | |
| "completions/mean_length": 1494.5625, | |
| "completions/mean_terminated_length": 1413.0, | |
| "completions/min_length": 1413.0, | |
| "completions/min_terminated_length": 1413.0, | |
| "epoch": 0.2467125860989355, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8267138784190755, | |
| "kl": 0.002933502197265625, | |
| "learning_rate": 9.434251922771616e-07, | |
| "loss": 0.0078, | |
| "num_tokens": 10411171.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.884939432144165, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01747490695405262, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06365932956310252, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6416666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12141145226353543, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1418.0, | |
| "completions/mean_length": 1410.375, | |
| "completions/mean_terminated_length": 1261.0, | |
| "completions/min_length": 803.0, | |
| "completions/min_terminated_length": 803.0, | |
| "epoch": 0.24796493425172197, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1021608965378076, | |
| "kl": 0.003887176513671875, | |
| "learning_rate": 9.424669839900691e-07, | |
| "loss": 0.0143, | |
| "num_tokens": 10469257.0, | |
| "reward": -5.21540641784668e-08, | |
| "reward_std": 1.061091661453247, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -5.21540641784668e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0551289409217747, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2097823559795121, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08421753138505424, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1407.0, | |
| "completions/mean_length": 1370.9375, | |
| "completions/mean_terminated_length": 1205.0, | |
| "completions/min_length": 1019.0, | |
| "completions/min_terminated_length": 1019.0, | |
| "epoch": 0.24921728240450847, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6919185602949653, | |
| "kl": 0.002506256103515625, | |
| "learning_rate": 9.415012828984714e-07, | |
| "loss": 0.0067, | |
| "num_tokens": 10523624.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.7187443971633911, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03211836693332174, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13737955494238535, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.775, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07649739768026005, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1361.0, | |
| "completions/mean_length": 1390.8125, | |
| "completions/mean_terminated_length": 1281.625, | |
| "completions/min_length": 1177.0, | |
| "completions/min_terminated_length": 1177.0, | |
| "epoch": 0.25046963055729493, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.450252869715986, | |
| "kl": 0.00223541259765625, | |
| "learning_rate": 9.405281074391022e-07, | |
| "loss": -0.0098, | |
| "num_tokens": 10579429.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.4103597402572632, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1406289464666968, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15985873234433481, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08933913745655643, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1486.0, | |
| "completions/mean_length": 1295.8125, | |
| "completions/mean_terminated_length": 1266.6429443359375, | |
| "completions/min_length": 994.0, | |
| "completions/min_terminated_length": 994.0, | |
| "epoch": 0.2517219787100814, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.642709475322054, | |
| "kl": 0.0023345947265625, | |
| "learning_rate": 9.395474761913939e-07, | |
| "loss": 0.014, | |
| "num_tokens": 10628866.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.7710261940956116, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04348868814755175, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0830759853911682, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14504150108516198, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 201 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1494.0, | |
| "completions/mean_length": 1357.125, | |
| "completions/mean_terminated_length": 1324.1539306640625, | |
| "completions/min_length": 1030.0, | |
| "completions/min_terminated_length": 1030.0, | |
| "epoch": 0.25297432686286786, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.216143536482822, | |
| "kl": 0.004241943359375, | |
| "learning_rate": 9.3855940787712e-07, | |
| "loss": -0.0086, | |
| "num_tokens": 10670092.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6420686841011047, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03687807737633173, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16934247164490465, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14343665526661611, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327954292297363, | |
| "step": 202 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1434.0, | |
| "completions/mean_length": 1278.625, | |
| "completions/mean_terminated_length": 1227.5384521484375, | |
| "completions/min_length": 970.0, | |
| "completions/min_terminated_length": 970.0, | |
| "epoch": 0.2542266750156543, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.4244156852349814, | |
| "kl": 0.0052642822265625, | |
| "learning_rate": 9.375639213600401e-07, | |
| "loss": -0.0436, | |
| "num_tokens": 10728350.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.9011333584785461, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09818030402455966, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07523729893672071, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13080944580232393, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 203 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1490.0, | |
| "completions/mean_length": 1443.9375, | |
| "completions/mean_terminated_length": 1371.857177734375, | |
| "completions/min_length": 1252.0, | |
| "completions/min_terminated_length": 1252.0, | |
| "epoch": 0.25547902316844084, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.5077076210411278, | |
| "kl": 0.00457000732421875, | |
| "learning_rate": 9.365610356455384e-07, | |
| "loss": 0.0019, | |
| "num_tokens": 10791365.0, | |
| "reward": 4.470348358154297e-08, | |
| "reward_std": 0.8847507238388062, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 4.470348358154297e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.015580215905333485, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06751943458738671, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.135263802609184, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 204 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1351.0, | |
| "completions/mean_length": 1391.8125, | |
| "completions/mean_terminated_length": 1067.25, | |
| "completions/min_length": 264.0, | |
| "completions/min_terminated_length": 264.0, | |
| "epoch": 0.2567313713212273, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.827913375094597, | |
| "kl": 0.003643035888671875, | |
| "learning_rate": 9.355507698802613e-07, | |
| "loss": -0.0786, | |
| "num_tokens": 10852330.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 1.0562589168548584, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09460135777577211, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12653992925605045, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11122216672215289, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1397.0, | |
| "completions/mean_length": 1426.6875, | |
| "completions/mean_terminated_length": 1265.4000244140625, | |
| "completions/min_length": 1092.0, | |
| "completions/min_terminated_length": 1092.0, | |
| "epoch": 0.2579837194740138, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 4.109416757035146, | |
| "kl": 0.00577545166015625, | |
| "learning_rate": 9.345331433517522e-07, | |
| "loss": 0.0289, | |
| "num_tokens": 10918837.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 0.9729784727096558, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04706903609226349, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08099201475868337, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11917929226045818, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 206 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1478.0, | |
| "completions/mean_length": 1464.1875, | |
| "completions/mean_terminated_length": 1356.75, | |
| "completions/min_length": 1188.0, | |
| "completions/min_terminated_length": 1188.0, | |
| "epoch": 0.25923606762680024, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1385572152974905, | |
| "kl": 0.003864288330078125, | |
| "learning_rate": 9.335081754880825e-07, | |
| "loss": 0.0082, | |
| "num_tokens": 10974608.0, | |
| "reward": -5.960464477539063e-08, | |
| "reward_std": 0.5515385270118713, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06366384054522155, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10778487016156474, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8291666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1002773930432755, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 207 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1487.0, | |
| "completions/mean_length": 1448.625, | |
| "completions/mean_terminated_length": 1363.0, | |
| "completions/min_length": 1103.0, | |
| "completions/min_terminated_length": 1103.0, | |
| "epoch": 0.2604884157795867, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2758893957680097, | |
| "kl": 0.004230499267578125, | |
| "learning_rate": 9.32475885857481e-07, | |
| "loss": -0.0053, | |
| "num_tokens": 11033482.0, | |
| "reward": 0.0, | |
| "reward_std": 0.5894155502319336, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05038277241462744, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07116397984833597, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07097208632298363, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 208 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1463.0, | |
| "completions/mean_length": 1455.75, | |
| "completions/mean_terminated_length": 1264.0, | |
| "completions/min_length": 1089.0, | |
| "completions/min_terminated_length": 1089.0, | |
| "epoch": 0.2617407639323732, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6461488967254634, | |
| "kl": 0.0025310516357421875, | |
| "learning_rate": 9.31436294167961e-07, | |
| "loss": -0.0132, | |
| "num_tokens": 11098902.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.6484573483467102, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1468978313797672, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24420746920563674, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1164283279771532, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 209 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1413.0, | |
| "completions/mean_length": 1237.1875, | |
| "completions/mean_terminated_length": 1079.5, | |
| "completions/min_length": 742.0, | |
| "completions/min_terminated_length": 742.0, | |
| "epoch": 0.2629931120851597, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.313685913897086, | |
| "kl": 0.003513336181640625, | |
| "learning_rate": 9.303894202669428e-07, | |
| "loss": 0.0531, | |
| "num_tokens": 11148649.0, | |
| "reward": -1.1175870895385742e-08, | |
| "reward_std": 0.990402102470398, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.1175870895385742e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0009552414586071921, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0038209658344287682, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1433.0, | |
| "completions/mean_length": 1327.125, | |
| "completions/mean_terminated_length": 1154.25, | |
| "completions/min_length": 970.0, | |
| "completions/min_terminated_length": 970.0, | |
| "epoch": 0.26424546023794615, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0546042142648298, | |
| "kl": 0.0033416748046875, | |
| "learning_rate": 9.293352841408759e-07, | |
| "loss": -0.0213, | |
| "num_tokens": 11207483.0, | |
| "reward": 0.0, | |
| "reward_std": 0.5952367186546326, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.035691884267146166, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07657424493915134, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.16324260518672248, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 211 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1375.0, | |
| "completions/mean_length": 1306.25, | |
| "completions/mean_terminated_length": 1190.0, | |
| "completions/min_length": 1071.0, | |
| "completions/min_terminated_length": 1071.0, | |
| "epoch": 0.2654978083907326, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.123766104646228, | |
| "kl": 0.0015621185302734375, | |
| "learning_rate": 9.282739059148566e-07, | |
| "loss": -0.0237, | |
| "num_tokens": 11255703.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9929344654083252, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.025706850415670862, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11233922174981649, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07685966046898342, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 212 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1287.0, | |
| "completions/mean_length": 1126.125, | |
| "completions/mean_terminated_length": 956.1818237304688, | |
| "completions/min_length": 674.0, | |
| "completions/min_terminated_length": 674.0, | |
| "epoch": 0.2667501565435191, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.488984850056713, | |
| "kl": 0.0038604736328125, | |
| "learning_rate": 9.272053058522444e-07, | |
| "loss": -0.0253, | |
| "num_tokens": 11294505.0, | |
| "reward": 0.0, | |
| "reward_std": 0.5554646253585815, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.23003407087469527, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20616326736471785, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6291666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15000000000000002, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 213 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1279.0, | |
| "completions/max_terminated_length": 1279.0, | |
| "completions/mean_length": 883.5, | |
| "completions/mean_terminated_length": 883.5, | |
| "completions/min_length": 673.0, | |
| "completions/min_terminated_length": 673.0, | |
| "epoch": 0.2680025046963056, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2969967424976456, | |
| "kl": 0.002185821533203125, | |
| "learning_rate": 9.261295043542747e-07, | |
| "loss": 0.0085, | |
| "num_tokens": 11325305.0, | |
| "reward": 2.421438694000244e-08, | |
| "reward_std": 1.039635419845581, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.421438694000244e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06031083797758491, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16643314604295306, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11603000888978231, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 214 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1499.0, | |
| "completions/mean_length": 1423.8125, | |
| "completions/mean_terminated_length": 1296.8333740234375, | |
| "completions/min_length": 987.0, | |
| "completions/min_terminated_length": 987.0, | |
| "epoch": 0.26925485284909206, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.683342974010468, | |
| "kl": 0.0028839111328125, | |
| "learning_rate": 9.250465219596699e-07, | |
| "loss": 0.0, | |
| "num_tokens": 11384166.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6987115144729614, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.015736024702926166, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06158481768754947, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1057600358603626, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1390.0, | |
| "completions/max_terminated_length": 1390.0, | |
| "completions/mean_length": 1160.6875, | |
| "completions/mean_terminated_length": 1160.6875, | |
| "completions/min_length": 865.0, | |
| "completions/min_terminated_length": 865.0, | |
| "epoch": 0.27050720100187853, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.800146584559223, | |
| "kl": 0.004093170166015625, | |
| "learning_rate": 9.239563793442462e-07, | |
| "loss": 0.0174, | |
| "num_tokens": 11441313.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.8806728720664978, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0032805949907051112, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0510781770746922, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11894598836509009, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 216 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1450.0, | |
| "completions/mean_length": 1307.0625, | |
| "completions/mean_terminated_length": 1279.5, | |
| "completions/min_length": 985.0, | |
| "completions/min_terminated_length": 985.0, | |
| "epoch": 0.271759549154665, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.806350656696486, | |
| "kl": 0.002574920654296875, | |
| "learning_rate": 9.228590973205201e-07, | |
| "loss": -0.0377, | |
| "num_tokens": 11499258.0, | |
| "reward": -7.450580596923828e-09, | |
| "reward_std": 1.0440177917480469, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0614237528104428, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06944213481803516, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09953596037316066, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 217 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 0.0, | |
| "epoch": 0.27301189730745146, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.4434394608429915, | |
| "kl": 0.002834320068359375, | |
| "learning_rate": 9.2175469683731e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 11554162.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9512232542037964, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.006459758393578777, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09841534495892398, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13601470508735444, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 218 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1378.0, | |
| "completions/mean_length": 1139.4375, | |
| "completions/mean_terminated_length": 1115.4000244140625, | |
| "completions/min_length": 793.0, | |
| "completions/min_terminated_length": 793.0, | |
| "epoch": 0.2742642454602379, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.183780267914974, | |
| "kl": 0.0016641616821289062, | |
| "learning_rate": 9.206431989793374e-07, | |
| "loss": 0.0171, | |
| "num_tokens": 11599913.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7830429077148438, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.006944415247763777, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03283008559006156, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 219 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1295.0, | |
| "completions/mean_length": 1346.5, | |
| "completions/mean_terminated_length": 1149.1429443359375, | |
| "completions/min_length": 1004.0, | |
| "completions/min_terminated_length": 1004.0, | |
| "epoch": 0.27551659361302444, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.3703074221103817, | |
| "kl": 0.00464630126953125, | |
| "learning_rate": 9.195246249668232e-07, | |
| "loss": -0.0007, | |
| "num_tokens": 11664265.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.7190686464309692, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.3476598454237376, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.4301665677025463, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08255189164891873, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1436.0, | |
| "completions/mean_length": 1309.5625, | |
| "completions/mean_terminated_length": 1119.125, | |
| "completions/min_length": 955.0, | |
| "completions/min_terminated_length": 955.0, | |
| "epoch": 0.2767689417658109, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.622730015507812, | |
| "kl": 0.0024242401123046875, | |
| "learning_rate": 9.183989961550832e-07, | |
| "loss": -0.0219, | |
| "num_tokens": 11719922.0, | |
| "reward": 0.0, | |
| "reward_std": 0.72877037525177, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0952471076969717, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12586824643040787, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08606629658238704, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 221 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1392.0, | |
| "completions/mean_length": 1485.875, | |
| "completions/mean_terminated_length": 1387.0, | |
| "completions/min_length": 1382.0, | |
| "completions/min_terminated_length": 1382.0, | |
| "epoch": 0.27802128991859737, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.782237650167012, | |
| "kl": 0.004253387451171875, | |
| "learning_rate": 9.172663340341204e-07, | |
| "loss": -0.0028, | |
| "num_tokens": 11778680.0, | |
| "reward": 0.0, | |
| "reward_std": 1.0009106397628784, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.012701224890322388, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.02815604341593864, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7124999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408155, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 222 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1462.0, | |
| "completions/mean_length": 1443.0625, | |
| "completions/mean_terminated_length": 1317.800048828125, | |
| "completions/min_length": 1121.0, | |
| "completions/min_terminated_length": 1121.0, | |
| "epoch": 0.27927363807138383, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.998694336626448, | |
| "kl": 0.003566741943359375, | |
| "learning_rate": 9.161266602282147e-07, | |
| "loss": -0.0055, | |
| "num_tokens": 11838169.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9211122989654541, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04490957636365446, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09152261044011904, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10741060020797315, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 223 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1450.0, | |
| "completions/mean_length": 1488.875, | |
| "completions/mean_terminated_length": 1411.0, | |
| "completions/min_length": 1372.0, | |
| "completions/min_terminated_length": 1372.0, | |
| "epoch": 0.2805259862241703, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9658149533875187, | |
| "kl": 0.004146575927734375, | |
| "learning_rate": 9.149799964955093e-07, | |
| "loss": 0.008, | |
| "num_tokens": 11899975.0, | |
| "reward": -3.725290298461914e-09, | |
| "reward_std": 1.0432794094085693, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.012174573886332358, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04195711207506097, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07685966046898342, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 224 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1359.0, | |
| "completions/mean_length": 1409.8125, | |
| "completions/mean_terminated_length": 1139.25, | |
| "completions/min_length": 925.0, | |
| "completions/min_terminated_length": 925.0, | |
| "epoch": 0.2817783343769568, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.936718458392348, | |
| "kl": 0.00342559814453125, | |
| "learning_rate": 9.138263647275969e-07, | |
| "loss": -0.0033, | |
| "num_tokens": 11941164.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.0000627040863037, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.15080494449355206, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08565387051258783, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09339283817414601, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1475.0, | |
| "completions/mean_length": 1183.3125, | |
| "completions/mean_terminated_length": 1110.2308349609375, | |
| "completions/min_length": 786.0, | |
| "completions/min_terminated_length": 786.0, | |
| "epoch": 0.2830306825297433, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.658827402816175, | |
| "kl": 0.00414276123046875, | |
| "learning_rate": 9.126657869491e-07, | |
| "loss": 0.0126, | |
| "num_tokens": 11992657.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9479507207870483, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.017415102975537073, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.031912571116253466, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07649739768026002, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 226 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1484.0, | |
| "completions/mean_length": 1499.0, | |
| "completions/mean_terminated_length": 1484.0, | |
| "completions/min_length": 1484.0, | |
| "completions/min_terminated_length": 1484.0, | |
| "epoch": 0.28428303068252975, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1261193198966652, | |
| "kl": 0.0041351318359375, | |
| "learning_rate": 9.114982853172521e-07, | |
| "loss": 0.0009, | |
| "num_tokens": 12054529.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.0018526315689087, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.012434236974245455, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045472914513713596, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10292032157252812, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 227 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 0.0, | |
| "epoch": 0.2855353788353162, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.4904650551200485, | |
| "kl": 0.00292205810546875, | |
| "learning_rate": 9.103238821214727e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 12114017.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.4001474976539612, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.19495499044861478, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.26273237351903383, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.059628479399994425, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 228 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1329.0, | |
| "completions/mean_length": 1472.3125, | |
| "completions/mean_terminated_length": 1278.5, | |
| "completions/min_length": 1228.0, | |
| "completions/min_terminated_length": 1228.0, | |
| "epoch": 0.2867877269881027, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9738846657010085, | |
| "kl": 0.00360107421875, | |
| "learning_rate": 9.09142599782944e-07, | |
| "loss": -0.0048, | |
| "num_tokens": 12167838.0, | |
| "reward": 0.0, | |
| "reward_std": 0.981914758682251, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.2657549523204851, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.31826899071497716, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6333333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08777074514725108, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 229 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1488.0, | |
| "completions/mean_length": 1397.75, | |
| "completions/mean_terminated_length": 1266.2857666015625, | |
| "completions/min_length": 987.0, | |
| "completions/min_terminated_length": 987.0, | |
| "epoch": 0.28804007514088914, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9008571656686524, | |
| "kl": 0.003810882568359375, | |
| "learning_rate": 9.07954460854181e-07, | |
| "loss": -0.0435, | |
| "num_tokens": 12219114.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 0.9752408266067505, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 1.7226310978600795e-05, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.017137695280743562, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08255189164891871, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1462.0, | |
| "completions/mean_length": 1381.0625, | |
| "completions/mean_terminated_length": 1262.125, | |
| "completions/min_length": 1118.0, | |
| "completions/min_terminated_length": 1118.0, | |
| "epoch": 0.28929242329367566, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.579522085731086, | |
| "kl": 0.00286102294921875, | |
| "learning_rate": 9.067594880186016e-07, | |
| "loss": 0.0118, | |
| "num_tokens": 12283627.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8155025839805603, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.4072348540230938, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.33458166056964905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11474609652039004, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 231 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1402.0, | |
| "completions/mean_length": 1432.375, | |
| "completions/mean_terminated_length": 1229.5, | |
| "completions/min_length": 1085.0, | |
| "completions/min_terminated_length": 1085.0, | |
| "epoch": 0.2905447714464621, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.3504781665775463, | |
| "kl": 0.00449371337890625, | |
| "learning_rate": 9.055577040900944e-07, | |
| "loss": 0.0198, | |
| "num_tokens": 12334705.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.0079009532928467, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.007641388631451263, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1447140199531734, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1067187372905475, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 232 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1439.0, | |
| "completions/mean_length": 1411.4375, | |
| "completions/mean_terminated_length": 1322.875, | |
| "completions/min_length": 1169.0, | |
| "completions/min_terminated_length": 1169.0, | |
| "epoch": 0.2917971195992486, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.3485586422127005, | |
| "kl": 0.004638671875, | |
| "learning_rate": 9.043491320125814e-07, | |
| "loss": 0.0213, | |
| "num_tokens": 12389648.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.8743376731872559, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.002227354544120855, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08926616854117143, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13109227736669002, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 233 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1492.0, | |
| "completions/mean_length": 1474.0, | |
| "completions/mean_terminated_length": 1430.666748046875, | |
| "completions/min_length": 1263.0, | |
| "completions/min_terminated_length": 1263.0, | |
| "epoch": 0.29304946775203505, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8494643618437947, | |
| "kl": 0.00304412841796875, | |
| "learning_rate": 9.031337948595817e-07, | |
| "loss": 0.0093, | |
| "num_tokens": 12456272.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7424121499061584, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04108305878098174, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1161369232371233, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408158, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 234 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1477.0, | |
| "completions/mean_length": 1141.4375, | |
| "completions/mean_terminated_length": 1117.533447265625, | |
| "completions/min_length": 557.0, | |
| "completions/min_terminated_length": 557.0, | |
| "epoch": 0.2943018159048215, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.2293142678990754, | |
| "kl": 0.0016422271728515625, | |
| "learning_rate": 9.019117158337695e-07, | |
| "loss": 0.0038, | |
| "num_tokens": 12498031.0, | |
| "reward": -5.960464477539063e-08, | |
| "reward_std": 0.6336873769760132, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.009688556708469433, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05746171503021093, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1529342632927262, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 235 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1360.0, | |
| "completions/mean_length": 1298.0, | |
| "completions/mean_terminated_length": 1140.888916015625, | |
| "completions/min_length": 942.0, | |
| "completions/min_terminated_length": 942.0, | |
| "epoch": 0.29555416405760804, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.714021525456579, | |
| "kl": 0.00262451171875, | |
| "learning_rate": 9.006829182665325e-07, | |
| "loss": -0.0167, | |
| "num_tokens": 12548119.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6711900234222412, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07748680022506171, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09355524405080126, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09953596037316063, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 236 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1489.0, | |
| "completions/mean_length": 1300.5625, | |
| "completions/mean_terminated_length": 1254.5384521484375, | |
| "completions/min_length": 1067.0, | |
| "completions/min_terminated_length": 1067.0, | |
| "epoch": 0.2968065122103945, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.982742616917611, | |
| "kl": 0.003223419189453125, | |
| "learning_rate": 8.99447425617525e-07, | |
| "loss": 0.0208, | |
| "num_tokens": 12596288.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 1.049065351486206, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.24719836974150322, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.26811306631065646, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08850612031567837, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 237 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1490.0, | |
| "completions/mean_length": 1029.3125, | |
| "completions/mean_terminated_length": 997.9334106445312, | |
| "completions/min_length": 658.0, | |
| "completions/min_terminated_length": 658.0, | |
| "epoch": 0.29805886036318097, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8416489290807947, | |
| "kl": 0.002620697021484375, | |
| "learning_rate": 8.982052614742218e-07, | |
| "loss": 0.011, | |
| "num_tokens": 12642901.0, | |
| "reward": 0.0, | |
| "reward_std": 1.0193631649017334, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07904007503321656, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05617218071571685, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08243965245133134, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 238 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1465.0, | |
| "completions/mean_length": 1096.1875, | |
| "completions/mean_terminated_length": 782.1111450195312, | |
| "completions/min_length": 444.0, | |
| "completions/min_terminated_length": 444.0, | |
| "epoch": 0.29931120851596743, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.052640590300802, | |
| "kl": 0.003078460693359375, | |
| "learning_rate": 8.96956449551466e-07, | |
| "loss": 0.0293, | |
| "num_tokens": 12685520.0, | |
| "reward": 3.725290298461914e-08, | |
| "reward_std": 1.0355110168457031, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0007223476637822487, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045707258037314374, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6416666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1164283279771532, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 239 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1352.0, | |
| "completions/mean_length": 1176.375, | |
| "completions/mean_terminated_length": 1101.6923828125, | |
| "completions/min_length": 795.0, | |
| "completions/min_terminated_length": 795.0, | |
| "epoch": 0.3005635566687539, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.409137882904874, | |
| "kl": 0.00447845458984375, | |
| "learning_rate": 8.957010136910177e-07, | |
| "loss": 0.0027, | |
| "num_tokens": 12732478.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.0500978231430054, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03744221002235665, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07008909373099989, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09574271077563382, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1235.0, | |
| "completions/mean_length": 1412.4375, | |
| "completions/mean_terminated_length": 799.5, | |
| "completions/min_length": 364.0, | |
| "completions/min_terminated_length": 364.0, | |
| "epoch": 0.3018159048215404, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.773019820684676, | |
| "kl": 0.003604888916015625, | |
| "learning_rate": 8.944389778610978e-07, | |
| "loss": -0.0118, | |
| "num_tokens": 12801637.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 1.0457574129104614, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0662282436201746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07940471297587236, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.22273551829717486, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 241 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1189.0, | |
| "completions/mean_length": 1244.8125, | |
| "completions/mean_terminated_length": 989.625, | |
| "completions/min_length": 844.0, | |
| "completions/min_terminated_length": 844.0, | |
| "epoch": 0.3030682529743269, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2644500704934876, | |
| "kl": 0.00386810302734375, | |
| "learning_rate": 8.931703661559313e-07, | |
| "loss": -0.0143, | |
| "num_tokens": 12856914.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.8334095478057861, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.056274055481427915, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06634909249021953, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07888106377466157, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 242 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1470.0, | |
| "completions/mean_length": 1456.9375, | |
| "completions/mean_terminated_length": 1327.75, | |
| "completions/min_length": 1033.0, | |
| "completions/min_terminated_length": 1033.0, | |
| "epoch": 0.30432060112711334, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6637299937398455, | |
| "kl": 0.003570556640625, | |
| "learning_rate": 8.918952027952867e-07, | |
| "loss": 0.0284, | |
| "num_tokens": 12917977.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6500852704048157, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.022227592869964712, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.035807130460280175, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.16324260518672246, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 243 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1087.0, | |
| "completions/mean_length": 1474.1875, | |
| "completions/mean_terminated_length": 1087.0, | |
| "completions/min_length": 1087.0, | |
| "completions/min_terminated_length": 1087.0, | |
| "epoch": 0.3055729492798998, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9975900171544794, | |
| "kl": 0.003841400146484375, | |
| "learning_rate": 8.906135121240139e-07, | |
| "loss": -0.0025, | |
| "num_tokens": 12975724.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 1.0467472076416016, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.042561575382490995, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12106724719901756, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042258, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 244 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1411.0, | |
| "completions/mean_length": 1228.9375, | |
| "completions/mean_terminated_length": 1066.300048828125, | |
| "completions/min_length": 227.0, | |
| "completions/min_terminated_length": 227.0, | |
| "epoch": 0.3068252974326863, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9921353466243374, | |
| "kl": 0.00344085693359375, | |
| "learning_rate": 8.89325318611579e-07, | |
| "loss": -0.1088, | |
| "num_tokens": 13028715.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8063486218452454, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04616985070885913, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.17900764914436168, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0718795288428261, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 245 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1489.0, | |
| "completions/mean_length": 1367.3125, | |
| "completions/mean_terminated_length": 1307.0, | |
| "completions/min_length": 1073.0, | |
| "completions/min_terminated_length": 1073.0, | |
| "epoch": 0.30807764558547274, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.02852662262937, | |
| "kl": 0.00337982177734375, | |
| "learning_rate": 8.880306468515979e-07, | |
| "loss": 0.0285, | |
| "num_tokens": 13077528.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.4837535619735718, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.010209232644034694, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1481102929172379, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.16947631758514883, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 246 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1491.0, | |
| "completions/mean_length": 1327.5625, | |
| "completions/mean_terminated_length": 1270.0833740234375, | |
| "completions/min_length": 1026.0, | |
| "completions/min_terminated_length": 1026.0, | |
| "epoch": 0.30932999373825926, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 5.433838129064804, | |
| "kl": 0.009929656982421875, | |
| "learning_rate": 8.867295215613659e-07, | |
| "loss": 0.0288, | |
| "num_tokens": 13145409.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.691638708114624, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06467589999789795, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0938792951394418, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0824396524513313, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 247 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1490.0, | |
| "completions/mean_length": 1467.9375, | |
| "completions/mean_terminated_length": 1397.4000244140625, | |
| "completions/min_length": 1284.0, | |
| "completions/min_terminated_length": 1284.0, | |
| "epoch": 0.3105823418910457, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9888443937256404, | |
| "kl": 0.0040283203125, | |
| "learning_rate": 8.85421967581386e-07, | |
| "loss": 0.0184, | |
| "num_tokens": 13198848.0, | |
| "reward": -2.2351741790771484e-08, | |
| "reward_std": 0.9693495035171509, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.032938770819161474, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.162768145864506, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.675, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1803289175881631, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 248 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1483.0, | |
| "completions/mean_length": 1498.9375, | |
| "completions/mean_terminated_length": 1483.0, | |
| "completions/min_length": 1483.0, | |
| "completions/min_terminated_length": 1483.0, | |
| "epoch": 0.3118346900438322, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.721188390535492, | |
| "kl": 0.003574371337890625, | |
| "learning_rate": 8.841080098748959e-07, | |
| "loss": 0.0006, | |
| "num_tokens": 13257207.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9934348464012146, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.054418946541605284, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14760181642272932, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12345339501504504, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 249 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1351.0, | |
| "completions/mean_length": 1359.3125, | |
| "completions/mean_terminated_length": 1178.4285888671875, | |
| "completions/min_length": 890.0, | |
| "completions/min_terminated_length": 890.0, | |
| "epoch": 0.31308703819661865, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2668088351359708, | |
| "kl": 0.005401611328125, | |
| "learning_rate": 8.827876735273893e-07, | |
| "loss": -0.03, | |
| "num_tokens": 13314820.0, | |
| "reward": -3.725290298461914e-09, | |
| "reward_std": 1.0606722831726074, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1087294165966756, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07396732734066605, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.775, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10000000000000002, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1458.0, | |
| "completions/mean_length": 1409.3125, | |
| "completions/mean_terminated_length": 1258.166748046875, | |
| "completions/min_length": 1036.0, | |
| "completions/min_terminated_length": 1036.0, | |
| "epoch": 0.3143393863494051, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0088889020769733, | |
| "kl": 0.003604888916015625, | |
| "learning_rate": 8.814609837461385e-07, | |
| "loss": 0.0432, | |
| "num_tokens": 13381449.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6518675088882446, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.005307542092858496, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.017828779266863094, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1112221667221529, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 251 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1472.0, | |
| "completions/mean_length": 1289.9375, | |
| "completions/mean_terminated_length": 1126.5555419921875, | |
| "completions/min_length": 1031.0, | |
| "completions/min_terminated_length": 1031.0, | |
| "epoch": 0.31559173450219163, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.2400933230709956, | |
| "kl": 0.0020122528076171875, | |
| "learning_rate": 8.801279658597131e-07, | |
| "loss": 0.0011, | |
| "num_tokens": 13430872.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8595645427703857, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12188488436675578, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.32049499716061297, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901162, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 252 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1234.0, | |
| "completions/mean_length": 1483.375, | |
| "completions/mean_terminated_length": 1234.0, | |
| "completions/min_length": 1234.0, | |
| "completions/min_terminated_length": 1234.0, | |
| "epoch": 0.3168440826549781, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0575565927809905, | |
| "kl": 0.004093170166015625, | |
| "learning_rate": 8.787886453174951e-07, | |
| "loss": -0.0053, | |
| "num_tokens": 13479446.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.9997775554656982, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.051598953607968394, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06761287588738078, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12583057392117916, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 253 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1317.0, | |
| "completions/max_terminated_length": 1317.0, | |
| "completions/mean_length": 1068.0625, | |
| "completions/mean_terminated_length": 1068.0625, | |
| "completions/min_length": 758.0, | |
| "completions/min_terminated_length": 758.0, | |
| "epoch": 0.31809643080776456, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.56780625703575, | |
| "kl": 0.00376129150390625, | |
| "learning_rate": 8.77443047689195e-07, | |
| "loss": -0.0249, | |
| "num_tokens": 13534791.0, | |
| "reward": 3.725290298461914e-09, | |
| "reward_std": 1.0647456645965576, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12037176129735677, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15720532676467985, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08681611046941137, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 254 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1149.0, | |
| "completions/mean_length": 1264.8125, | |
| "completions/mean_terminated_length": 1029.625, | |
| "completions/min_length": 999.0, | |
| "completions/min_terminated_length": 999.0, | |
| "epoch": 0.319348778960551, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 1.9002914944914675, | |
| "kl": 0.001689910888671875, | |
| "learning_rate": 8.760911986643621e-07, | |
| "loss": 0.0079, | |
| "num_tokens": 13585044.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 1.0633113384246826, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11099520216632296, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11289406797895053, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11013459778666118, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 255 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1356.0, | |
| "completions/max_terminated_length": 1356.0, | |
| "completions/mean_length": 1039.625, | |
| "completions/mean_terminated_length": 1039.625, | |
| "completions/min_length": 816.0, | |
| "completions/min_terminated_length": 816.0, | |
| "epoch": 0.3206011271133375, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 1.7437212883045385, | |
| "kl": 0.0007447004318237305, | |
| "learning_rate": 8.747331240518946e-07, | |
| "loss": -0.0359, | |
| "num_tokens": 13622654.0, | |
| "reward": 0.0, | |
| "reward_std": 0.80560302734375, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05283560581406991, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1274858045865064, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11213417888437975, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 256 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1468.0, | |
| "completions/mean_length": 1147.25, | |
| "completions/mean_terminated_length": 1123.7333984375, | |
| "completions/min_length": 871.0, | |
| "completions/min_terminated_length": 871.0, | |
| "epoch": 0.32185347526612396, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.829288365048447, | |
| "kl": 0.00440216064453125, | |
| "learning_rate": 8.73368849779547e-07, | |
| "loss": -0.0586, | |
| "num_tokens": 13666658.0, | |
| "reward": 2.2351741790771484e-08, | |
| "reward_std": 1.0113918781280518, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.004399012913845209, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.022002579783276802, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15104573749303493, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 257 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1453.0, | |
| "completions/mean_length": 1205.0, | |
| "completions/mean_terminated_length": 1136.923095703125, | |
| "completions/min_length": 934.0, | |
| "completions/min_terminated_length": 934.0, | |
| "epoch": 0.3231058234189105, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.5508475650699274, | |
| "kl": 0.00417327880859375, | |
| "learning_rate": 8.719984018934348e-07, | |
| "loss": -0.0198, | |
| "num_tokens": 13713002.0, | |
| "reward": -7.450580596923828e-09, | |
| "reward_std": 0.9411071538925171, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.049397690395078006, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14628546425305664, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13381856152046848, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 258 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1379.0, | |
| "completions/mean_length": 1393.9375, | |
| "completions/mean_terminated_length": 1287.875, | |
| "completions/min_length": 1155.0, | |
| "completions/min_terminated_length": 1155.0, | |
| "epoch": 0.32435817157169694, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1259216947519164, | |
| "kl": 0.004150390625, | |
| "learning_rate": 8.706218065575374e-07, | |
| "loss": 0.0051, | |
| "num_tokens": 13765289.0, | |
| "reward": -5.960464477539063e-08, | |
| "reward_std": 0.7700310945510864, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011427243535616135, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12374645217812205, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8458333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13045504405165223, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 259 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1404.0, | |
| "completions/mean_length": 1161.25, | |
| "completions/mean_terminated_length": 1007.2727661132812, | |
| "completions/min_length": 872.0, | |
| "completions/min_terminated_length": 872.0, | |
| "epoch": 0.3256105197244834, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.3286130814036103, | |
| "kl": 0.00212058424949646, | |
| "learning_rate": 8.692390900531985e-07, | |
| "loss": 0.0569, | |
| "num_tokens": 13819269.0, | |
| "reward": 0.0, | |
| "reward_std": 0.45210930705070496, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.23343450769100488, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.33023521153193414, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14981470036162822, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 260 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1162.0, | |
| "completions/max_terminated_length": 1162.0, | |
| "completions/mean_length": 974.1875, | |
| "completions/mean_terminated_length": 974.1875, | |
| "completions/min_length": 675.0, | |
| "completions/min_terminated_length": 675.0, | |
| "epoch": 0.32686286787726987, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.6973290112944848, | |
| "kl": 0.0045013427734375, | |
| "learning_rate": 8.678502787786249e-07, | |
| "loss": -0.0481, | |
| "num_tokens": 13849256.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.8221656084060669, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01926574676180823, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.028234090328970243, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042256, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 261 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1350.0, | |
| "completions/mean_length": 1254.3125, | |
| "completions/mean_terminated_length": 1106.9000244140625, | |
| "completions/min_length": 850.0, | |
| "completions/min_terminated_length": 850.0, | |
| "epoch": 0.32811521603005633, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8513068805319826, | |
| "kl": 0.003444671630859375, | |
| "learning_rate": 8.664553992483812e-07, | |
| "loss": -0.0294, | |
| "num_tokens": 13886621.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.8134012818336487, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04343925396813008, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08880475360320686, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11979921473804349, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 262 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1419.0, | |
| "completions/mean_length": 1267.5, | |
| "completions/mean_terminated_length": 1161.8182373046875, | |
| "completions/min_length": 936.0, | |
| "completions/min_terminated_length": 936.0, | |
| "epoch": 0.32936756418284285, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8893558213524178, | |
| "kl": 0.003467559814453125, | |
| "learning_rate": 8.650544780928851e-07, | |
| "loss": -0.0196, | |
| "num_tokens": 13935477.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.748847246170044, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03399978669526769, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0913917502530681, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8291666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1060223596263578, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 263 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1467.0, | |
| "completions/mean_length": 1393.75, | |
| "completions/mean_terminated_length": 1287.5, | |
| "completions/min_length": 1085.0, | |
| "completions/min_terminated_length": 1085.0, | |
| "epoch": 0.3306199123356293, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2549999172638953, | |
| "kl": 0.00475311279296875, | |
| "learning_rate": 8.63647542057898e-07, | |
| "loss": -0.0273, | |
| "num_tokens": 13998809.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.9957271814346313, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.032845331546287986, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1426354161680431, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08255189164891871, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 264 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1476.0, | |
| "completions/mean_length": 1465.625, | |
| "completions/mean_terminated_length": 1316.666748046875, | |
| "completions/min_length": 1156.0, | |
| "completions/min_terminated_length": 1156.0, | |
| "epoch": 0.3318722604884158, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.761811279986975, | |
| "kl": 0.004451751708984375, | |
| "learning_rate": 8.622346180040149e-07, | |
| "loss": 0.0022, | |
| "num_tokens": 14063899.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9594628810882568, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.003469042362222641, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06436545386363138, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11917929226045819, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 265 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1351.0, | |
| "completions/mean_length": 1490.6875, | |
| "completions/mean_terminated_length": 1351.0, | |
| "completions/min_length": 1351.0, | |
| "completions/min_terminated_length": 1351.0, | |
| "epoch": 0.33312460864120225, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.812294289039839, | |
| "kl": 0.00394439697265625, | |
| "learning_rate": 8.608157329061513e-07, | |
| "loss": -0.0088, | |
| "num_tokens": 14117462.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8838216066360474, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.040579408270268943, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06117251081342495, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10809803506625451, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 266 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1498.0, | |
| "completions/mean_length": 1466.375, | |
| "completions/mean_terminated_length": 1410.3333740234375, | |
| "completions/min_length": 1160.0, | |
| "completions/min_terminated_length": 1160.0, | |
| "epoch": 0.3343769567939887, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1377512293638716, | |
| "kl": 0.00469207763671875, | |
| "learning_rate": 8.59390913853028e-07, | |
| "loss": 0.0227, | |
| "num_tokens": 14167892.0, | |
| "reward": 0.0, | |
| "reward_std": 0.5932345390319824, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.095152127303474, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13689770081097544, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 1.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10886621079036349, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 267 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 0.0, | |
| "epoch": 0.33562930494677523, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.006613104134553, | |
| "kl": 0.004669189453125, | |
| "learning_rate": 8.579601880466547e-07, | |
| "loss": 0.0002, | |
| "num_tokens": 14229372.0, | |
| "reward": 0.0, | |
| "reward_std": 1.0337638854980469, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04680772992368523, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14029739799038618, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10886621079036349, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 268 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1468.0, | |
| "completions/mean_length": 1392.9375, | |
| "completions/mean_terminated_length": 1214.5, | |
| "completions/min_length": 1000.0, | |
| "completions/min_terminated_length": 1000.0, | |
| "epoch": 0.3368816530995617, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7703650077113675, | |
| "kl": 0.0036773681640625, | |
| "learning_rate": 8.565235828018099e-07, | |
| "loss": 0.0013, | |
| "num_tokens": 14289123.0, | |
| "reward": 0.0, | |
| "reward_std": 1.0367697477340698, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07296543210522512, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07812168004547569, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078611, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 269 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1487.0, | |
| "completions/mean_length": 1464.5625, | |
| "completions/mean_terminated_length": 1358.25, | |
| "completions/min_length": 1214.0, | |
| "completions/min_terminated_length": 1214.0, | |
| "epoch": 0.33813400125234816, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.773518375849927, | |
| "kl": 0.0041961669921875, | |
| "learning_rate": 8.550811255455198e-07, | |
| "loss": -0.0021, | |
| "num_tokens": 14352892.0, | |
| "reward": -2.2351741790771484e-08, | |
| "reward_std": 0.9712283611297607, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06869379781464208, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0930651391561654, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12881223774390613, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 270 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 0.0, | |
| "epoch": 0.3393863494051346, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.79098774129514, | |
| "kl": 0.003692626953125, | |
| "learning_rate": 8.536328438165346e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 14414740.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.0314404964447021, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0126400376983615, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11352147882865961, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07252075054258102, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 271 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1366.0, | |
| "completions/mean_length": 1491.625, | |
| "completions/mean_terminated_length": 1366.0, | |
| "completions/min_length": 1366.0, | |
| "completions/min_terminated_length": 1366.0, | |
| "epoch": 0.3406386975579211, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.3328071582670127, | |
| "kl": 0.002582550048828125, | |
| "learning_rate": 8.521787652648026e-07, | |
| "loss": -0.0005, | |
| "num_tokens": 14475390.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 1.066014051437378, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04816321266725149, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.23143656867957818, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10292032157252809, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 272 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1274.0, | |
| "completions/max_terminated_length": 1274.0, | |
| "completions/mean_length": 966.0, | |
| "completions/mean_terminated_length": 966.0, | |
| "completions/min_length": 818.0, | |
| "completions/min_terminated_length": 818.0, | |
| "epoch": 0.34189104571070755, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.623716761400387, | |
| "kl": 0.00237274169921875, | |
| "learning_rate": 8.507189176509429e-07, | |
| "loss": 0.0118, | |
| "num_tokens": 14519830.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8626605868339539, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09591776756938776, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0561472451616448, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460884, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 273 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1356.0, | |
| "completions/mean_length": 1441.8125, | |
| "completions/mean_terminated_length": 1267.25, | |
| "completions/min_length": 1188.0, | |
| "completions/min_terminated_length": 1188.0, | |
| "epoch": 0.3431433938634941, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6860149670271087, | |
| "kl": 0.003993988037109375, | |
| "learning_rate": 8.492533288457142e-07, | |
| "loss": 0.0176, | |
| "num_tokens": 14562059.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7108601331710815, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08617312005850387, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09760563193409819, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10878112581387149, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 274 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1485.0, | |
| "completions/mean_length": 1495.3125, | |
| "completions/mean_terminated_length": 1462.5, | |
| "completions/min_length": 1440.0, | |
| "completions/min_terminated_length": 1440.0, | |
| "epoch": 0.34439574201628054, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.40909669375685, | |
| "kl": 0.00632476806640625, | |
| "learning_rate": 8.477820268294844e-07, | |
| "loss": 0.0006, | |
| "num_tokens": 14626280.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9581431150436401, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0518540297916247, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05711745364940273, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07876359377087681, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 275 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1494.0, | |
| "completions/mean_length": 1498.8125, | |
| "completions/mean_terminated_length": 1490.5, | |
| "completions/min_length": 1487.0, | |
| "completions/min_terminated_length": 1487.0, | |
| "epoch": 0.345648090169067, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.606304176627287, | |
| "kl": 0.003498077392578125, | |
| "learning_rate": 8.463050396916945e-07, | |
| "loss": 0.0, | |
| "num_tokens": 14686461.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.8919962048530579, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.16232941024284467, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.38333692394397534, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901162, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 276 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1387.0, | |
| "completions/mean_length": 1296.25, | |
| "completions/mean_terminated_length": 1034.2857666015625, | |
| "completions/min_length": 430.0, | |
| "completions/min_terminated_length": 430.0, | |
| "epoch": 0.34690043832185347, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.4727706531128004, | |
| "kl": 0.0047149658203125, | |
| "learning_rate": 8.44822395630324e-07, | |
| "loss": -0.0713, | |
| "num_tokens": 14729641.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.0214866399765015, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.006892221922202982, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.027501536576714, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746357, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 277 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1486.0, | |
| "completions/mean_length": 1469.6875, | |
| "completions/mean_terminated_length": 1338.3333740234375, | |
| "completions/min_length": 1234.0, | |
| "completions/min_terminated_length": 1234.0, | |
| "epoch": 0.34815278647463993, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.079175222753427, | |
| "kl": 0.004638671875, | |
| "learning_rate": 8.433341229513516e-07, | |
| "loss": 0.0011, | |
| "num_tokens": 14784988.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 1.011260986328125, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.016744175612928278, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10326622112744127, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09098229375970787, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 278 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1403.0, | |
| "completions/max_terminated_length": 1403.0, | |
| "completions/mean_length": 926.75, | |
| "completions/mean_terminated_length": 926.75, | |
| "completions/min_length": 629.0, | |
| "completions/min_terminated_length": 629.0, | |
| "epoch": 0.34940513462742645, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.3928033221665683, | |
| "kl": 0.0011532902717590332, | |
| "learning_rate": 8.41840250068215e-07, | |
| "loss": 0.0325, | |
| "num_tokens": 14819992.0, | |
| "reward": 0.0, | |
| "reward_std": 1.027898907661438, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0026503222290372498, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06993382935974904, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.059472994182545084, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 279 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1429.0, | |
| "completions/mean_length": 1439.4375, | |
| "completions/mean_terminated_length": 1338.5, | |
| "completions/min_length": 1219.0, | |
| "completions/min_terminated_length": 1219.0, | |
| "epoch": 0.3506574827802129, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.958647342200965, | |
| "kl": 0.00386810302734375, | |
| "learning_rate": 8.403408055012688e-07, | |
| "loss": 0.0226, | |
| "num_tokens": 14868223.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.9957724213600159, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010003602936438873, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10122225063918935, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.575, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14782371884055634, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 280 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1497.0, | |
| "completions/mean_length": 1458.3125, | |
| "completions/mean_terminated_length": 1366.5999755859375, | |
| "completions/min_length": 1197.0, | |
| "completions/min_terminated_length": 1197.0, | |
| "epoch": 0.3519098309329994, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2796328783487394, | |
| "kl": 0.0066375732421875, | |
| "learning_rate": 8.388358178772394e-07, | |
| "loss": -0.0218, | |
| "num_tokens": 14927820.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.8733463287353516, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.014896438499357663, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0518786397936184, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13655822255780922, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 281 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1483.0, | |
| "completions/mean_length": 1253.0625, | |
| "completions/mean_terminated_length": 1140.8182373046875, | |
| "completions/min_length": 807.0, | |
| "completions/min_terminated_length": 807.0, | |
| "epoch": 0.35316217908578584, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1237440148175604, | |
| "kl": 0.004863739013671875, | |
| "learning_rate": 8.373253159286788e-07, | |
| "loss": -0.0073, | |
| "num_tokens": 14982213.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7831696271896362, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06306545956559828, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05774599513752542, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14580555290954889, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 282 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1496.0, | |
| "completions/mean_length": 1441.875, | |
| "completions/mean_terminated_length": 1345.0, | |
| "completions/min_length": 1205.0, | |
| "completions/min_terminated_length": 1205.0, | |
| "epoch": 0.3544145272385723, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.904795417720074, | |
| "kl": 0.00449371337890625, | |
| "learning_rate": 8.35809328493416e-07, | |
| "loss": -0.0205, | |
| "num_tokens": 15040715.0, | |
| "reward": 3.725290298461914e-09, | |
| "reward_std": 1.0330736637115479, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.012980308714010343, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0673415334549809, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042258, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 283 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1163.0, | |
| "completions/mean_length": 1267.125, | |
| "completions/mean_terminated_length": 1034.25, | |
| "completions/min_length": 623.0, | |
| "completions/min_terminated_length": 623.0, | |
| "epoch": 0.35566687539135877, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 1.9530736863410463, | |
| "kl": 0.0021953582763671875, | |
| "learning_rate": 8.342878845140067e-07, | |
| "loss": 0.0243, | |
| "num_tokens": 15099253.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.060163974761963, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.030749215825924263, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045245562410845486, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09179284245476839, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 284 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1380.0, | |
| "completions/mean_length": 1273.375, | |
| "completions/mean_terminated_length": 1046.75, | |
| "completions/min_length": 856.0, | |
| "completions/min_terminated_length": 856.0, | |
| "epoch": 0.3569192235441453, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9643132058260195, | |
| "kl": 0.00377655029296875, | |
| "learning_rate": 8.327610130371804e-07, | |
| "loss": -0.0085, | |
| "num_tokens": 15156899.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.9131340980529785, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05994073836967858, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16156243225331035, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06885303726590962, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 285 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1133.0, | |
| "completions/mean_length": 1196.4375, | |
| "completions/mean_terminated_length": 892.875, | |
| "completions/min_length": 726.0, | |
| "completions/min_terminated_length": 726.0, | |
| "epoch": 0.35817157169693176, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.4448564572392772, | |
| "kl": 0.005279541015625, | |
| "learning_rate": 8.312287432132857e-07, | |
| "loss": -0.0008, | |
| "num_tokens": 15210234.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6276436448097229, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08401960696737835, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.31657785119011167, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11800816042090449, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 286 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1480.0, | |
| "completions/mean_length": 1118.0, | |
| "completions/mean_terminated_length": 1029.84619140625, | |
| "completions/min_length": 768.0, | |
| "completions/min_terminated_length": 768.0, | |
| "epoch": 0.3594239198497182, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.649351006275311, | |
| "kl": 0.004192352294921875, | |
| "learning_rate": 8.296911042957347e-07, | |
| "loss": 0.0474, | |
| "num_tokens": 15254266.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 1.0014917850494385, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05039245601276097, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0698277819618762, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08850612031567838, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 287 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1495.0, | |
| "completions/mean_length": 1408.8125, | |
| "completions/mean_terminated_length": 1317.625, | |
| "completions/min_length": 1145.0, | |
| "completions/min_terminated_length": 1145.0, | |
| "epoch": 0.3606762680025047, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.3753456567317306, | |
| "kl": 0.003170013427734375, | |
| "learning_rate": 8.281481256404427e-07, | |
| "loss": -0.0065, | |
| "num_tokens": 15310551.0, | |
| "reward": -7.450580596923828e-09, | |
| "reward_std": 1.0467666387557983, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.011685861651235842, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.020940553119970465, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901161, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 288 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1287.0, | |
| "completions/mean_length": 1308.5, | |
| "completions/mean_terminated_length": 1117.0, | |
| "completions/min_length": 1059.0, | |
| "completions/min_terminated_length": 1059.0, | |
| "epoch": 0.36192861615529115, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.7116355558289365, | |
| "kl": 0.0031452178955078125, | |
| "learning_rate": 8.265998367052699e-07, | |
| "loss": -0.0148, | |
| "num_tokens": 15357047.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7932579517364502, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05102504905151101, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.046974298933007336, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.102469507659596, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 289 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1322.0, | |
| "completions/mean_length": 1303.375, | |
| "completions/mean_terminated_length": 1150.4444580078125, | |
| "completions/min_length": 910.0, | |
| "completions/min_terminated_length": 910.0, | |
| "epoch": 0.36318096430807767, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6471576640483025, | |
| "kl": 0.002368927001953125, | |
| "learning_rate": 8.25046267049458e-07, | |
| "loss": -0.0155, | |
| "num_tokens": 15419477.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.9311500191688538, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.17596829941789516, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.18219217687822756, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12412657816683506, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 290 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 0.0, | |
| "epoch": 0.36443331246086413, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.972210343638841, | |
| "kl": 0.004058837890625, | |
| "learning_rate": 8.234874463330651e-07, | |
| "loss": 0.0002, | |
| "num_tokens": 15481293.0, | |
| "reward": 0.0, | |
| "reward_std": 0.6159095764160156, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06449275539626861, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07616565949841655, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10852547064066473, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 291 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1348.0, | |
| "completions/mean_length": 1140.125, | |
| "completions/mean_terminated_length": 1020.1666870117188, | |
| "completions/min_length": 215.0, | |
| "completions/min_terminated_length": 215.0, | |
| "epoch": 0.3656856606136506, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.7273022702751355, | |
| "kl": 0.00434112548828125, | |
| "learning_rate": 8.219234043164007e-07, | |
| "loss": -0.0148, | |
| "num_tokens": 15538271.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.8317296504974365, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.002159562349982134, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.040954201238117, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1261979632400061, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 292 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1495.0, | |
| "completions/mean_length": 1346.875, | |
| "completions/mean_terminated_length": 1227.77783203125, | |
| "completions/min_length": 1055.0, | |
| "completions/min_terminated_length": 1055.0, | |
| "epoch": 0.36693800876643706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.479167145868108, | |
| "kl": 0.005218505859375, | |
| "learning_rate": 8.203541708594571e-07, | |
| "loss": -0.0154, | |
| "num_tokens": 15584509.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.0486056804656982, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.005770089344222506, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07441253794038902, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078614, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 293 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1474.0, | |
| "completions/mean_length": 1284.5625, | |
| "completions/mean_terminated_length": 1186.6363525390625, | |
| "completions/min_length": 979.0, | |
| "completions/min_terminated_length": 979.0, | |
| "epoch": 0.3681903569192235, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.139340219060384, | |
| "kl": 0.00438690185546875, | |
| "learning_rate": 8.18779775921339e-07, | |
| "loss": 0.0201, | |
| "num_tokens": 15631742.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 1.0337092876434326, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.061225139692727595, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0882517727987926, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08153617692869927, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 294 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1388.0, | |
| "completions/mean_length": 1493.0, | |
| "completions/mean_terminated_length": 1388.0, | |
| "completions/min_length": 1388.0, | |
| "completions/min_terminated_length": 1388.0, | |
| "epoch": 0.36944270507201, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.4977483724240614, | |
| "kl": 0.0029296875, | |
| "learning_rate": 8.17200249559692e-07, | |
| "loss": -0.0007, | |
| "num_tokens": 15698798.0, | |
| "reward": 0.0, | |
| "reward_std": 0.4475941061973572, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.24246809612484624, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.35195872278638696, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05900408021045224, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 295 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1412.0, | |
| "completions/mean_length": 1458.625, | |
| "completions/mean_terminated_length": 1367.5999755859375, | |
| "completions/min_length": 1330.0, | |
| "completions/min_terminated_length": 1330.0, | |
| "epoch": 0.3706950532247965, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.867561636937004, | |
| "kl": 0.004486083984375, | |
| "learning_rate": 8.156156219301287e-07, | |
| "loss": -0.0096, | |
| "num_tokens": 15766096.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.9567909240722656, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09166855392489899, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11839819598536988, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13709958532503408, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 296 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1384.0, | |
| "completions/mean_length": 1323.6875, | |
| "completions/mean_terminated_length": 1217.9000244140625, | |
| "completions/min_length": 1037.0, | |
| "completions/min_terminated_length": 1037.0, | |
| "epoch": 0.371947401377583, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.38013590229778, | |
| "kl": 0.00470733642578125, | |
| "learning_rate": 8.140259232856521e-07, | |
| "loss": -0.0394, | |
| "num_tokens": 15817547.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.9704372882843018, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05488740961091947, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10481500155411475, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13158576980363348, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 297 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1423.0, | |
| "completions/mean_length": 1495.1875, | |
| "completions/mean_terminated_length": 1423.0, | |
| "completions/min_length": 1423.0, | |
| "completions/min_terminated_length": 1423.0, | |
| "epoch": 0.37319974953036944, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0382889306606327, | |
| "kl": 0.004367828369140625, | |
| "learning_rate": 8.124311839760797e-07, | |
| "loss": -0.0027, | |
| "num_tokens": 15868646.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.8351828455924988, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03419630895774928, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1367852358309971, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09418264367902598, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 298 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1344.0, | |
| "completions/mean_length": 1399.875, | |
| "completions/mean_terminated_length": 1179.5999755859375, | |
| "completions/min_length": 1011.0, | |
| "completions/min_terminated_length": 1011.0, | |
| "epoch": 0.3744520976831559, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.362094019703377, | |
| "kl": 0.003170013427734375, | |
| "learning_rate": 8.108314344474623e-07, | |
| "loss": 0.0162, | |
| "num_tokens": 15934516.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 0.9300060868263245, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.026608295676684646, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05700271984957867, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09803627446568497, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 299 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1385.0, | |
| "completions/mean_length": 1310.1875, | |
| "completions/mean_terminated_length": 1120.375, | |
| "completions/min_length": 849.0, | |
| "completions/min_terminated_length": 849.0, | |
| "epoch": 0.37570444583594237, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2267030639366325, | |
| "kl": 0.004962921142578125, | |
| "learning_rate": 8.092267052415044e-07, | |
| "loss": 0.0104, | |
| "num_tokens": 15981759.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9144766330718994, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.13077711907103481, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1576724545552638, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.55, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08944271909999157, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 300 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1495.0, | |
| "completions/mean_length": 1499.6875, | |
| "completions/mean_terminated_length": 1495.0, | |
| "completions/min_length": 1495.0, | |
| "completions/min_terminated_length": 1495.0, | |
| "epoch": 0.3769567939887289, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.857067953077971, | |
| "kl": 0.004589080810546875, | |
| "learning_rate": 8.076170269949795e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 16032986.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.8725603818893433, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.01691320115670981, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.057867471716033625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194864, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 301 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1476.0, | |
| "completions/mean_length": 1402.5, | |
| "completions/mean_terminated_length": 1305.0, | |
| "completions/min_length": 1100.0, | |
| "completions/min_terminated_length": 1100.0, | |
| "epoch": 0.37820914214151535, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0631024256482773, | |
| "kl": 0.00476837158203125, | |
| "learning_rate": 8.060024304391464e-07, | |
| "loss": -0.0059, | |
| "num_tokens": 16075122.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 1.0385990142822266, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0014002892068640102, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04032032793331211, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11917929226045819, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 302 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1480.0, | |
| "completions/mean_length": 1234.625, | |
| "completions/mean_terminated_length": 1196.71435546875, | |
| "completions/min_length": 1007.0, | |
| "completions/min_terminated_length": 1007.0, | |
| "epoch": 0.3794614902943018, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.625794290754689, | |
| "kl": 0.0052490234375, | |
| "learning_rate": 8.043829463991619e-07, | |
| "loss": -0.0729, | |
| "num_tokens": 16137860.0, | |
| "reward": 0.0, | |
| "reward_std": 0.7281184196472168, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.15255108490634472, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0853071621433351, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08944271909999162, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 303 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1479.0, | |
| "completions/mean_length": 1229.0, | |
| "completions/mean_terminated_length": 1105.8182373046875, | |
| "completions/min_length": 759.0, | |
| "completions/min_terminated_length": 759.0, | |
| "epoch": 0.3807138384470883, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.260598284534238, | |
| "kl": 0.00495147705078125, | |
| "learning_rate": 8.027586057934928e-07, | |
| "loss": -0.0588, | |
| "num_tokens": 16193676.0, | |
| "reward": 7.450580596923828e-09, | |
| "reward_std": 1.0218051671981812, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.00276089248932003, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03710765345598682, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14168300559373406, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 304 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1381.0, | |
| "completions/mean_length": 1335.75, | |
| "completions/mean_terminated_length": 1208.0, | |
| "completions/min_length": 1052.0, | |
| "completions/min_terminated_length": 1052.0, | |
| "epoch": 0.38196618659987475, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.305649016415018, | |
| "kl": 0.00536346435546875, | |
| "learning_rate": 8.011294396333247e-07, | |
| "loss": 0.035, | |
| "num_tokens": 16241520.0, | |
| "reward": 2.2351741790771484e-08, | |
| "reward_std": 1.0677435398101807, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0046395341039948005, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04123648809292501, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12995725793078622, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 305 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1387.0, | |
| "completions/mean_length": 1287.0, | |
| "completions/mean_terminated_length": 1159.2000732421875, | |
| "completions/min_length": 871.0, | |
| "completions/min_terminated_length": 871.0, | |
| "epoch": 0.38321853475266127, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.727887770111367, | |
| "kl": 0.00641632080078125, | |
| "learning_rate": 7.99495479021971e-07, | |
| "loss": -0.022, | |
| "num_tokens": 16295288.0, | |
| "reward": -4.470348358154297e-08, | |
| "reward_std": 1.053145408630371, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -4.470348358154297e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.038974548522257506, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10020848772744548, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12224747213928168, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 306 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1449.0, | |
| "completions/mean_length": 1242.0625, | |
| "completions/mean_terminated_length": 1124.8182373046875, | |
| "completions/min_length": 732.0, | |
| "completions/min_terminated_length": 732.0, | |
| "epoch": 0.38447088290544773, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.5550785119927446, | |
| "kl": 0.0034637451171875, | |
| "learning_rate": 7.978567551542785e-07, | |
| "loss": -0.0756, | |
| "num_tokens": 16333129.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.6722694635391235, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06133805044031608, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07932499651372282, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194865, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 307 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.8125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1431.0, | |
| "completions/mean_length": 1466.0625, | |
| "completions/mean_terminated_length": 1319.0, | |
| "completions/min_length": 1256.0, | |
| "completions/min_terminated_length": 1256.0, | |
| "epoch": 0.3857232310582342, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.914391532004015, | |
| "kl": 0.00472259521484375, | |
| "learning_rate": 7.962132993160318e-07, | |
| "loss": -0.0031, | |
| "num_tokens": 16393066.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.5695419311523438, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03688578385137459, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05735193102645086, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6208333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14548768561863462, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 308 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.875, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1300.0, | |
| "completions/mean_length": 1468.125, | |
| "completions/mean_terminated_length": 1245.0, | |
| "completions/min_length": 1190.0, | |
| "completions/min_terminated_length": 1190.0, | |
| "epoch": 0.38697557921102066, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.462122287718944, | |
| "kl": 0.003597259521484375, | |
| "learning_rate": 7.945651428833566e-07, | |
| "loss": -0.0086, | |
| "num_tokens": 16455300.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9045326113700867, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07094748829476913, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07518616664712767, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036263, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 309 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1460.0, | |
| "completions/mean_length": 1250.0625, | |
| "completions/mean_terminated_length": 1055.6666259765625, | |
| "completions/min_length": 953.0, | |
| "completions/min_terminated_length": 953.0, | |
| "epoch": 0.3882279273638071, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.9451996655491754, | |
| "kl": 0.003414154052734375, | |
| "learning_rate": 7.929123173221197e-07, | |
| "loss": 0.016, | |
| "num_tokens": 16510829.0, | |
| "reward": -2.60770320892334e-08, | |
| "reward_std": 0.9780210256576538, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.60770320892334e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.11998443212330577, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.273643343882272, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13743685418725535, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154, | |
| "step": 310 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1482.0, | |
| "completions/mean_length": 1477.3125, | |
| "completions/mean_terminated_length": 1409.25, | |
| "completions/min_length": 1344.0, | |
| "completions/min_terminated_length": 1344.0, | |
| "epoch": 0.3894802755165936, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.5088718729093884, | |
| "kl": 0.0039215087890625, | |
| "learning_rate": 7.91254854187329e-07, | |
| "loss": 0.0109, | |
| "num_tokens": 16557338.0, | |
| "reward": -2.9802322387695312e-08, | |
| "reward_std": 0.8606460094451904, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11084663306324073, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10033388109681571, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07490735018081414, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 311 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1250.0, | |
| "completions/mean_length": 1070.625, | |
| "completions/mean_terminated_length": 1042.0, | |
| "completions/min_length": 692.0, | |
| "completions/min_terminated_length": 692.0, | |
| "epoch": 0.3907326236693801, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.633658673586784, | |
| "kl": 0.004913330078125, | |
| "learning_rate": 7.895927851225315e-07, | |
| "loss": -0.0045, | |
| "num_tokens": 16585492.0, | |
| "reward": 0.0, | |
| "reward_std": 0.8763086795806885, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.010857263566407607, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06826631403415188, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8333333333333334, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12171612389003693, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 312 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1453.0, | |
| "completions/mean_length": 1316.9375, | |
| "completions/mean_terminated_length": 1207.0999755859375, | |
| "completions/min_length": 926.0, | |
| "completions/min_terminated_length": 926.0, | |
| "epoch": 0.3919849718221666, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.5743987456607456, | |
| "kl": 0.00490570068359375, | |
| "learning_rate": 7.879261418592072e-07, | |
| "loss": -0.0521, | |
| "num_tokens": 16629555.0, | |
| "reward": 1.4901161193847656e-08, | |
| "reward_std": 0.9046754240989685, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07072576648968745, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14285699045244268, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06885303726590966, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 313 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1433.0, | |
| "completions/mean_length": 1382.4375, | |
| "completions/mean_terminated_length": 1329.0, | |
| "completions/min_length": 1093.0, | |
| "completions/min_terminated_length": 1093.0, | |
| "epoch": 0.39323731997495304, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.1526667079717625, | |
| "kl": 0.002288818359375, | |
| "learning_rate": 7.862549562161661e-07, | |
| "loss": -0.0277, | |
| "num_tokens": 16682250.0, | |
| "reward": -7.450580596923828e-09, | |
| "reward_std": 1.0446007251739502, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1297401874034389, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1781696946469639, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06871842709362772, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 314 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1463.0, | |
| "completions/mean_length": 1497.6875, | |
| "completions/mean_terminated_length": 1463.0, | |
| "completions/min_length": 1463.0, | |
| "completions/min_terminated_length": 1463.0, | |
| "epoch": 0.3944896681277395, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.0308641968752266, | |
| "kl": 0.005645751953125, | |
| "learning_rate": 7.845792600989385e-07, | |
| "loss": -0.0009, | |
| "num_tokens": 16736925.0, | |
| "reward": 0.0, | |
| "reward_std": 1.0489060878753662, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0036366895025502417, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.014546758010200967, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10183501544346313, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 315 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1361.0, | |
| "completions/mean_length": 1448.0, | |
| "completions/mean_terminated_length": 1292.0, | |
| "completions/min_length": 1213.0, | |
| "completions/min_terminated_length": 1213.0, | |
| "epoch": 0.39574201628052597, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.5606215816363824, | |
| "kl": 0.003444671630859375, | |
| "learning_rate": 7.828990854991669e-07, | |
| "loss": -0.0016, | |
| "num_tokens": 16805501.0, | |
| "reward": -1.4901161193847656e-08, | |
| "reward_std": 1.0115642547607422, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.021560930387654664, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.031239915717000032, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08595864638818418, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 316 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 1500.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 1500.0, | |
| "completions/min_terminated_length": 0.0, | |
| "epoch": 0.3969943644333125, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.8884496894175316, | |
| "kl": 0.00472259521484375, | |
| "learning_rate": 7.812144644939948e-07, | |
| "loss": 0.0002, | |
| "num_tokens": 16868629.0, | |
| "reward": 2.9802322387695312e-08, | |
| "reward_std": 0.9699341058731079, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.024920589109913467, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09830967886668995, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 317 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1493.0, | |
| "completions/mean_length": 1401.8125, | |
| "completions/mean_terminated_length": 1342.9000244140625, | |
| "completions/min_length": 1196.0, | |
| "completions/min_terminated_length": 1196.0, | |
| "epoch": 0.39824671258609895, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.4263971987387944, | |
| "kl": 0.00539398193359375, | |
| "learning_rate": 7.795254292454546e-07, | |
| "loss": -0.0029, | |
| "num_tokens": 16930194.0, | |
| "reward": 3.725290298461914e-09, | |
| "reward_std": 1.058499813079834, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.003584071693735027, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06412609719118169, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 318 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1327.0, | |
| "completions/mean_length": 1266.875, | |
| "completions/mean_terminated_length": 1085.5555419921875, | |
| "completions/min_length": 632.0, | |
| "completions/min_terminated_length": 632.0, | |
| "epoch": 0.3994990607388854, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.6400441912231263, | |
| "kl": 0.00444793701171875, | |
| "learning_rate": 7.778320119998535e-07, | |
| "loss": -0.121, | |
| "num_tokens": 16979440.0, | |
| "reward": -9.313225746154785e-09, | |
| "reward_std": 1.0413284301757812, | |
| "rewards/wordcountpos_reward_ecommerce/mean": -9.313225746154785e-09, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.005981072426200435, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04425931042175955, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8166666666666667, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0926962382871743, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 319 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1494.0, | |
| "completions/mean_length": 1350.1875, | |
| "completions/mean_terminated_length": 1260.300048828125, | |
| "completions/min_length": 1061.0, | |
| "completions/min_terminated_length": 1061.0, | |
| "epoch": 0.4007514088916719, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.2125264454232823, | |
| "kl": 0.00494384765625, | |
| "learning_rate": 7.761342450871578e-07, | |
| "loss": -0.0401, | |
| "num_tokens": 17023723.0, | |
| "reward": 0.0, | |
| "reward_std": 0.9411365389823914, | |
| "rewards/wordcountpos_reward_ecommerce/mean": 0.0, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08138630489162721, | |
| "rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09236477000312811, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125, | |
| "rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14548768561863465, | |
| "rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259, | |
| "step": 320 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 799, | |
| "num_input_tokens_seen": 17023723, | |
| "num_train_epochs": 1, | |
| "save_steps": 80, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |