{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 9.998758997269794, "eval_steps": 500, "global_step": 5030, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 51.5, "completions/mean_length": 9.343750238418579, "completions/min_length": 2.0, "epoch": 0.0019856043683296105, "grad_norm": 8.833409236542238, "kl": 0.0, "learning_rate": 1.9607843137254902e-08, "loss": -0.1307593137025833, "memory(GiB)": 90.14, "reward": 0.16666666977107525, "reward_std": 0.2535768188536167, "rewards/CineAccuracyORM/mean": 0.09375000093132257, "rewards/CineAccuracyORM/std": 0.19780732691287994, "rewards/Format/mean": 0.0729166679084301, "rewards/Format/std": 0.17735834047198296, "step": 1, "train_speed(iter/s)": 0.005831 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 29.375, "completions/mean_length": 7.989583492279053, "completions/min_length": 2.0, "epoch": 0.003971208736659221, "grad_norm": 8.360215664061299, "kl": 0.0, "learning_rate": 3.9215686274509804e-08, "loss": -0.1537517011165619, "memory(GiB)": 93.19, "reward": 0.2083333395421505, "reward_std": 0.2977868393063545, "rewards/CineAccuracyORM/mean": 0.11458333767950535, "rewards/CineAccuracyORM/std": 0.17456800863146782, "rewards/Format/mean": 0.09375000186264515, "rewards/Format/std": 0.16673530638217926, "step": 2, "train_speed(iter/s)": 0.009915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 33.5, "completions/mean_length": 4.6250001192092896, "completions/min_length": 2.0, "epoch": 0.005956813104988831, "grad_norm": 7.288747169581666, "kl": 2.47955322265625e-05, "learning_rate": 5.88235294117647e-08, "loss": -0.09165790677070618, "memory(GiB)": 93.19, "reward": 0.0833333358168602, "reward_std": 0.20412414148449898, "rewards/CineAccuracyORM/mean": 0.0416666679084301, "rewards/CineAccuracyORM/std": 0.14433756470680237, "rewards/Format/mean": 0.0416666679084301, "rewards/Format/std": 0.14433756470680237, "step": 3, "train_speed(iter/s)": 0.013195 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 11.625, "completions/mean_length": 3.4375, "completions/min_length": 2.0, "epoch": 0.007942417473318442, "grad_norm": 2.279249541632776, "kl": -0.00013136863708496094, "learning_rate": 7.843137254901961e-08, "loss": -0.03810252994298935, "memory(GiB)": 93.19, "reward": 0.0416666679084301, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.02083333395421505, "rewards/CineAccuracyORM/std": 0.04865618050098419, "rewards/Format/mean": 0.02083333395421505, "rewards/Format/std": 0.04865618050098419, "step": 4, "train_speed(iter/s)": 0.015446 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 65.375, "completions/mean_length": 17.635416984558105, "completions/min_length": 2.0, "epoch": 0.009928021841648052, "grad_norm": 7.483078403985399, "kl": 0.0003783702850341797, "learning_rate": 9.80392156862745e-08, "loss": -0.18313568830490112, "memory(GiB)": 94.21, "reward": 0.354166679084301, "reward_std": 0.2472660318017006, "rewards/CineAccuracyORM/mean": 0.1666666679084301, "rewards/CineAccuracyORM/std": 0.17461250349879265, "rewards/Format/mean": 0.18750000558793545, "rewards/Format/std": 0.2311013862490654, "step": 5, "train_speed(iter/s)": 0.017389 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 60.375, "completions/mean_length": 14.072916865348816, "completions/min_length": 2.0, "epoch": 0.011913626209977662, "grad_norm": 8.836452160215568, "kl": 0.00021839141845703125, "learning_rate": 1.176470588235294e-07, "loss": -0.13913874328136444, "memory(GiB)": 94.21, "reward": 0.23958334140479565, "reward_std": 0.2653941735625267, "rewards/CineAccuracyORM/mean": 0.10416667070239782, "rewards/CineAccuracyORM/std": 0.19776283204555511, "rewards/Format/mean": 0.13541667070239782, "rewards/Format/std": 0.2542962096631527, "step": 6, "train_speed(iter/s)": 0.018869 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 25.75, "completions/mean_length": 6.96875, "completions/min_length": 2.0, "epoch": 0.013899230578307272, "grad_norm": 2.5665338568614255, "kl": 0.00019860267639160156, "learning_rate": 1.3725490196078432e-07, "loss": -0.08070332556962967, "memory(GiB)": 94.21, "reward": 0.1354166716337204, "reward_std": 0.12991482764482498, "rewards/CineAccuracyORM/mean": 0.0729166679084301, "rewards/CineAccuracyORM/std": 0.11807912588119507, "rewards/Format/mean": 0.0625, "rewards/Format/std": 0.11306675523519516, "step": 7, "train_speed(iter/s)": 0.020182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 77.25, "completions/mean_length": 13.302083730697632, "completions/min_length": 2.0, "epoch": 0.015884834946636884, "grad_norm": 12.586083776732455, "kl": 0.00010991096496582031, "learning_rate": 1.5686274509803921e-07, "loss": -0.18610158562660217, "memory(GiB)": 94.21, "reward": 0.28125000558793545, "reward_std": 0.3487507253885269, "rewards/CineAccuracyORM/mean": 0.16666666883975267, "rewards/CineAccuracyORM/std": 0.315886452794075, "rewards/Format/mean": 0.11458333488553762, "rewards/Format/std": 0.26997610926628113, "step": 8, "train_speed(iter/s)": 0.020938 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.375, "completions/mean_length": 17.937500715255737, "completions/min_length": 2.0, "epoch": 0.017870439314966492, "grad_norm": 8.726468045240763, "kl": 0.00038111209869384766, "learning_rate": 1.764705882352941e-07, "loss": -0.19536086916923523, "memory(GiB)": 94.21, "reward": 0.28125001303851604, "reward_std": 0.2841503508388996, "rewards/CineAccuracyORM/mean": 0.11458333674818277, "rewards/CineAccuracyORM/std": 0.2056400291621685, "rewards/Format/mean": 0.1666666679084301, "rewards/Format/std": 0.23389171808958054, "step": 9, "train_speed(iter/s)": 0.021565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 46.25, "completions/mean_length": 11.604167103767395, "completions/min_length": 2.0, "epoch": 0.019856043683296104, "grad_norm": 3.708710818056766, "kl": -1.0102987289428711e-05, "learning_rate": 1.96078431372549e-07, "loss": -0.08594602346420288, "memory(GiB)": 94.21, "reward": 0.11458333767950535, "reward_std": 0.14109627529978752, "rewards/CineAccuracyORM/mean": 0.031250000931322575, "rewards/CineAccuracyORM/std": 0.08474057167768478, "rewards/Format/mean": 0.08333333674818277, "rewards/Format/std": 0.14910665154457092, "step": 10, "train_speed(iter/s)": 0.022199 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 23.75, "completions/mean_length": 5.770833492279053, "completions/min_length": 2.0, "epoch": 0.021841648051625712, "grad_norm": 6.596021037229982, "kl": 0.0005369186401367188, "learning_rate": 2.156862745098039e-07, "loss": -0.11402159929275513, "memory(GiB)": 94.21, "reward": 0.12500000186264515, "reward_std": 0.18404607847332954, "rewards/CineAccuracyORM/mean": 0.06250000093132257, "rewards/CineAccuracyORM/std": 0.14127394929528236, "rewards/Format/mean": 0.06250000093132257, "rewards/Format/std": 0.14127394929528236, "step": 11, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 49.0, "completions/mean_length": 10.583333611488342, "completions/min_length": 2.0, "epoch": 0.023827252419955324, "grad_norm": 9.710736479035985, "kl": 0.0021132230758666992, "learning_rate": 2.352941176470588e-07, "loss": -0.14895620942115784, "memory(GiB)": 94.21, "reward": 0.250000006519258, "reward_std": 0.26867386512458324, "rewards/CineAccuracyORM/mean": 0.11458333767950535, "rewards/CineAccuracyORM/std": 0.1980806104838848, "rewards/Format/mean": 0.13541667256504297, "rewards/Format/std": 0.23698533326387405, "step": 12, "train_speed(iter/s)": 0.023357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 74.625, "completions/mean_length": 18.5208338201046, "completions/min_length": 2.0, "epoch": 0.025812856788284936, "grad_norm": 14.74385141030088, "kl": 0.005173683166503906, "learning_rate": 2.549019607843137e-07, "loss": -0.22722145915031433, "memory(GiB)": 94.21, "reward": 0.5000000186264515, "reward_std": 0.5557370781898499, "rewards/CineAccuracyORM/mean": 0.26041667722165585, "rewards/CineAccuracyORM/std": 0.4162924289703369, "rewards/Format/mean": 0.23958333767950535, "rewards/Format/std": 0.41219309717416763, "step": 13, "train_speed(iter/s)": 0.02377 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 58.625, "completions/mean_length": 17.67708432674408, "completions/min_length": 2.0, "epoch": 0.027798461156614544, "grad_norm": 6.691494016340618, "kl": 0.00567626953125, "learning_rate": 2.7450980392156863e-07, "loss": -0.12714232504367828, "memory(GiB)": 94.21, "reward": 0.2916666716337204, "reward_std": 0.2648143917322159, "rewards/CineAccuracyORM/mean": 0.14583334140479565, "rewards/CineAccuracyORM/std": 0.17738834023475647, "rewards/Format/mean": 0.14583333861082792, "rewards/Format/std": 0.1657295897603035, "step": 14, "train_speed(iter/s)": 0.024162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.5, "completions/mean_length": 12.56250062584877, "completions/min_length": 2.0, "epoch": 0.029784065524944156, "grad_norm": 11.826028163769157, "kl": 0.002541065216064453, "learning_rate": 2.941176470588235e-07, "loss": -0.18597036600112915, "memory(GiB)": 94.21, "reward": 0.21875000838190317, "reward_std": 0.2941893860697746, "rewards/CineAccuracyORM/mean": 0.11458333488553762, "rewards/CineAccuracyORM/std": 0.24570107460021973, "rewards/Format/mean": 0.10416666977107525, "rewards/Format/std": 0.25453949347138405, "step": 15, "train_speed(iter/s)": 0.024488 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.25, "completions/mean_length": 26.000000953674316, "completions/min_length": 2.0, "epoch": 0.03176966989327377, "grad_norm": 19.22682565973066, "kl": 0.0291595458984375, "learning_rate": 3.1372549019607843e-07, "loss": -0.28655847907066345, "memory(GiB)": 94.21, "reward": 0.6979166772216558, "reward_std": 0.5280137322843075, "rewards/CineAccuracyORM/mean": 0.33333333395421505, "rewards/CineAccuracyORM/std": 0.38107289373874664, "rewards/Format/mean": 0.3645833386108279, "rewards/Format/std": 0.4106335826218128, "step": 16, "train_speed(iter/s)": 0.024635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.375, "completions/mean_length": 35.55208444595337, "completions/min_length": 2.0, "epoch": 0.03375527426160337, "grad_norm": 16.161087880095785, "kl": 0.03133392333984375, "learning_rate": 3.333333333333333e-07, "loss": -0.2801506519317627, "memory(GiB)": 94.21, "reward": 0.8333333693444729, "reward_std": 0.6809760481119156, "rewards/CineAccuracyORM/mean": 0.39583334047347307, "rewards/CineAccuracyORM/std": 0.4389779530465603, "rewards/Format/mean": 0.4375000111758709, "rewards/Format/std": 0.4703202247619629, "step": 17, "train_speed(iter/s)": 0.024738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.125, "completions/mean_length": 43.281251430511475, "completions/min_length": 2.0, "epoch": 0.035740878629932984, "grad_norm": 16.578350883090533, "kl": 0.067230224609375, "learning_rate": 3.529411764705882e-07, "loss": -0.32550498843193054, "memory(GiB)": 94.21, "reward": 0.9479166865348816, "reward_std": 0.6519781649112701, "rewards/CineAccuracyORM/mean": 0.4479166744276881, "rewards/CineAccuracyORM/std": 0.45145706087350845, "rewards/Format/mean": 0.5000000074505806, "rewards/Format/std": 0.4876159466803074, "step": 18, "train_speed(iter/s)": 0.02466 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 56.229168176651, "completions/min_length": 4.625, "epoch": 0.037726482998262596, "grad_norm": 8.071834804214191, "kl": 0.07891845703125, "learning_rate": 3.7254901960784315e-07, "loss": -0.1669750064611435, "memory(GiB)": 94.21, "reward": 1.5312500223517418, "reward_std": 0.2910274975001812, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.19398127868771553, "rewards/Format/mean": 0.7708333507180214, "rewards/Format/std": 0.3783007487654686, "step": 19, "train_speed(iter/s)": 0.024471 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 46.822917461395264, "completions/min_length": 5.625, "epoch": 0.03971208736659221, "grad_norm": 9.923832627018575, "kl": 0.0755157470703125, "learning_rate": 3.92156862745098e-07, "loss": -0.21674101054668427, "memory(GiB)": 94.21, "reward": 1.2395833656191826, "reward_std": 0.4177897088229656, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.34121063724160194, "rewards/Format/mean": 0.6354166828095913, "rewards/Format/std": 0.4222923330962658, "step": 20, "train_speed(iter/s)": 0.024436 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 35.95833429694176, "completions/min_length": 2.0, "epoch": 0.04169769173492182, "grad_norm": 24.745996197714177, "kl": 0.077484130859375, "learning_rate": 4.117647058823529e-07, "loss": -0.30436521768569946, "memory(GiB)": 94.21, "reward": 0.9895833488553762, "reward_std": 0.6039165481925011, "rewards/CineAccuracyORM/mean": 0.4791666781529784, "rewards/CineAccuracyORM/std": 0.4370591938495636, "rewards/Format/mean": 0.5104166818782687, "rewards/Format/std": 0.45182302221655846, "step": 21, "train_speed(iter/s)": 0.02441 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.875, "completions/mean_length": 63.91666841506958, "completions/min_length": 24.125, "epoch": 0.043683296103251425, "grad_norm": 1.0997888211451068, "kl": 0.174560546875, "learning_rate": 4.313725490196078e-07, "loss": -0.015414511784911156, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 22, "train_speed(iter/s)": 0.024545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.25, "completions/mean_length": 66.44791889190674, "completions/min_length": 25.25, "epoch": 0.045668900471581036, "grad_norm": 0.009539260932387994, "kl": 0.18096923828125, "learning_rate": 4.5098039215686274e-07, "loss": 0.00018086486670654267, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 23, "train_speed(iter/s)": 0.024559 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 72.69791889190674, "completions/min_length": 28.25, "epoch": 0.04765450483991065, "grad_norm": 0.007386712773020203, "kl": 0.14459228515625, "learning_rate": 4.705882352941176e-07, "loss": 0.0001446278765797615, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 24, "train_speed(iter/s)": 0.024555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.875, "completions/mean_length": 61.95833492279053, "completions/min_length": 24.625, "epoch": 0.04964010920824026, "grad_norm": 0.00875220203675299, "kl": 0.1748046875, "learning_rate": 4.901960784313725e-07, "loss": 0.00017489976016804576, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 25, "train_speed(iter/s)": 0.024602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 66.00000143051147, "completions/min_length": 25.25, "epoch": 0.05162571357656987, "grad_norm": 1.9358129459128381, "kl": 0.17364501953125, "learning_rate": 5.098039215686274e-07, "loss": 0.0022797263227403164, "memory(GiB)": 94.21, "reward": 1.8125000149011612, "reward_std": 0.05103103816509247, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.23100870847702026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 26, "train_speed(iter/s)": 0.024617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.5, "completions/mean_length": 71.32291841506958, "completions/min_length": 18.5, "epoch": 0.05361131794489948, "grad_norm": 0.00795613264759887, "kl": 0.1551513671875, "learning_rate": 5.294117647058823e-07, "loss": 0.00015511378296650946, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 27, "train_speed(iter/s)": 0.024504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.0, "completions/mean_length": 67.26041889190674, "completions/min_length": 31.625, "epoch": 0.05559692231322909, "grad_norm": 0.008135823299937521, "kl": 0.18121337890625, "learning_rate": 5.490196078431373e-07, "loss": 0.00018121811444871128, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 28, "train_speed(iter/s)": 0.024483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 71.23958492279053, "completions/min_length": 29.0, "epoch": 0.0575825266815587, "grad_norm": 0.0072313349721089765, "kl": 0.13995361328125, "learning_rate": 5.686274509803921e-07, "loss": 0.00014012031897436827, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 29, "train_speed(iter/s)": 0.024428 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.625, "completions/mean_length": 69.34375238418579, "completions/min_length": 33.5, "epoch": 0.05956813104988831, "grad_norm": 0.007334074235255649, "kl": 0.14501953125, "learning_rate": 5.88235294117647e-07, "loss": 0.0001449078117730096, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 30, "train_speed(iter/s)": 0.024441 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 63.281251430511475, "completions/min_length": 26.625, "epoch": 0.06155373541821792, "grad_norm": 0.009302543997764104, "kl": 0.2073974609375, "learning_rate": 6.078431372549019e-07, "loss": 0.00020755574223585427, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 31, "train_speed(iter/s)": 0.0244 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.125, "completions/mean_length": 67.78125238418579, "completions/min_length": 25.875, "epoch": 0.06353933978654754, "grad_norm": 0.007934062675082865, "kl": 0.19287109375, "learning_rate": 6.274509803921569e-07, "loss": 0.00019275880185887218, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 32, "train_speed(iter/s)": 0.024458 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.625, "completions/mean_length": 79.22917079925537, "completions/min_length": 27.625, "epoch": 0.06552494415487714, "grad_norm": 0.006838073989216734, "kl": 0.1513671875, "learning_rate": 6.470588235294117e-07, "loss": 0.00015125676873140037, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 33, "train_speed(iter/s)": 0.024334 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 77.739586353302, "completions/min_length": 39.5, "epoch": 0.06751054852320675, "grad_norm": 0.006296733856276834, "kl": 0.13775634765625, "learning_rate": 6.666666666666666e-07, "loss": 0.00013771496014669538, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 34, "train_speed(iter/s)": 0.024298 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 83.29166793823242, "completions/min_length": 40.75, "epoch": 0.06949615289153636, "grad_norm": 0.005644199777163406, "kl": 0.1414794921875, "learning_rate": 6.862745098039216e-07, "loss": 0.0001416189334122464, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 35, "train_speed(iter/s)": 0.024321 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 73.78125095367432, "completions/min_length": 32.75, "epoch": 0.07148175725986597, "grad_norm": 1.3475220550057878, "kl": 0.17987060546875, "learning_rate": 7.058823529411765e-07, "loss": -0.002007975010201335, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 36, "train_speed(iter/s)": 0.024252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.75, "completions/mean_length": 68.67708587646484, "completions/min_length": 27.625, "epoch": 0.07346736162819559, "grad_norm": 1.378099520868228, "kl": 0.19500732421875, "learning_rate": 7.254901960784313e-07, "loss": 0.01300447341054678, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 37, "train_speed(iter/s)": 0.024175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 75.84375143051147, "completions/min_length": 36.25, "epoch": 0.07545296599652519, "grad_norm": 0.010631492564245152, "kl": 0.15240478515625, "learning_rate": 7.450980392156863e-07, "loss": 0.00015249915304593742, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 38, "train_speed(iter/s)": 0.024119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 69.36458539962769, "completions/min_length": 32.875, "epoch": 0.0774385703648548, "grad_norm": 0.007479684874167, "kl": 0.17919921875, "learning_rate": 7.647058823529411e-07, "loss": 0.0001796395517885685, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 39, "train_speed(iter/s)": 0.024105 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.625, "completions/mean_length": 70.46875286102295, "completions/min_length": 32.125, "epoch": 0.07942417473318442, "grad_norm": 0.007549139015510863, "kl": 0.1712646484375, "learning_rate": 7.84313725490196e-07, "loss": 0.00017136195674538612, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 40, "train_speed(iter/s)": 0.023999 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.875, "completions/mean_length": 69.66666889190674, "completions/min_length": 29.5, "epoch": 0.08140977910151402, "grad_norm": 0.007635037188927877, "kl": 0.17462158203125, "learning_rate": 8.03921568627451e-07, "loss": 0.00017471713363192976, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 41, "train_speed(iter/s)": 0.024045 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 76.26041889190674, "completions/min_length": 37.75, "epoch": 0.08339538346984364, "grad_norm": 0.006637911382664406, "kl": 0.1431884765625, "learning_rate": 8.235294117647058e-07, "loss": 0.00014315629960037768, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 42, "train_speed(iter/s)": 0.023979 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 76.08333587646484, "completions/min_length": 33.125, "epoch": 0.08538098783817324, "grad_norm": 0.007545569232213225, "kl": 0.16400146484375, "learning_rate": 8.431372549019608e-07, "loss": 0.00016389989468734711, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 43, "train_speed(iter/s)": 0.023853 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.25, "completions/mean_length": 70.80208587646484, "completions/min_length": 29.75, "epoch": 0.08736659220650285, "grad_norm": 0.007971807973221248, "kl": 0.187744140625, "learning_rate": 8.627450980392156e-07, "loss": 0.00018761036335490644, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 44, "train_speed(iter/s)": 0.023811 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 77.07291984558105, "completions/min_length": 39.25, "epoch": 0.08935219657483247, "grad_norm": 0.0071067330941543775, "kl": 0.14849853515625, "learning_rate": 8.823529411764705e-07, "loss": 0.00014854694018140435, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 45, "train_speed(iter/s)": 0.023768 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 73.270836353302, "completions/min_length": 32.375, "epoch": 0.09133780094316207, "grad_norm": 0.007376222395453255, "kl": 0.1529541015625, "learning_rate": 9.019607843137255e-07, "loss": 0.00015288867871277034, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 46, "train_speed(iter/s)": 0.023741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.75, "completions/mean_length": 61.11458492279053, "completions/min_length": 32.75, "epoch": 0.09332340531149169, "grad_norm": 0.008943850465861202, "kl": 0.18115234375, "learning_rate": 9.215686274509803e-07, "loss": 0.00018107870710082352, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 47, "train_speed(iter/s)": 0.023778 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.125, "completions/mean_length": 79.77083587646484, "completions/min_length": 33.375, "epoch": 0.0953090096798213, "grad_norm": 0.007256615320610051, "kl": 0.1475830078125, "learning_rate": 9.411764705882352e-07, "loss": 0.00014765582454856485, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 48, "train_speed(iter/s)": 0.023708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.375, "completions/mean_length": 81.05208587646484, "completions/min_length": 39.375, "epoch": 0.0972946140481509, "grad_norm": 0.007071796756037824, "kl": 0.129150390625, "learning_rate": 9.607843137254902e-07, "loss": 0.00012925347255077213, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 49, "train_speed(iter/s)": 0.023616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 71.208336353302, "completions/min_length": 31.375, "epoch": 0.09928021841648052, "grad_norm": 0.008868009201440628, "kl": 0.16436767578125, "learning_rate": 9.80392156862745e-07, "loss": 0.0001643826690269634, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 50, "train_speed(iter/s)": 0.023591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.625, "completions/mean_length": 62.500001430511475, "completions/min_length": 29.625, "epoch": 0.10126582278481013, "grad_norm": 2.8657160529905243, "kl": 0.1888427734375, "learning_rate": 1e-06, "loss": -0.0009202882647514343, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 51, "train_speed(iter/s)": 0.023603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.5, "completions/mean_length": 70.00000286102295, "completions/min_length": 32.75, "epoch": 0.10325142715313974, "grad_norm": 0.008463049210465283, "kl": 0.15093994140625, "learning_rate": 9.9999990046966e-07, "loss": 0.00015097255527507514, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 52, "train_speed(iter/s)": 0.023579 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.125, "completions/mean_length": 74.71875190734863, "completions/min_length": 37.875, "epoch": 0.10523703152146935, "grad_norm": 0.007915017657529928, "kl": 0.13067626953125, "learning_rate": 9.9999960187868e-07, "loss": 0.00013078741903882474, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 53, "train_speed(iter/s)": 0.023519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 73.16666793823242, "completions/min_length": 33.75, "epoch": 0.10722263588979895, "grad_norm": 0.008342352123523848, "kl": 0.142333984375, "learning_rate": 9.999991042271788e-07, "loss": 0.00014238506264518946, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 54, "train_speed(iter/s)": 0.023532 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.5, "completions/mean_length": 70.78125143051147, "completions/min_length": 36.125, "epoch": 0.10920824025812857, "grad_norm": 0.00871038345150832, "kl": 0.17596435546875, "learning_rate": 9.999984075153545e-07, "loss": 0.00017616058175917715, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 55, "train_speed(iter/s)": 0.023589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 81.28125190734863, "completions/min_length": 40.25, "epoch": 0.11119384462645818, "grad_norm": 1.6665108646221753, "kl": 0.11895751953125, "learning_rate": 9.999975117434842e-07, "loss": -0.0060371882282197475, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 56, "train_speed(iter/s)": 0.023557 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 70.81250190734863, "completions/min_length": 32.25, "epoch": 0.11317944899478778, "grad_norm": 1.7144506327915527, "kl": 0.17333984375, "learning_rate": 9.99996416911925e-07, "loss": 0.0006940675666555762, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 57, "train_speed(iter/s)": 0.023512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.125, "completions/mean_length": 85.10416984558105, "completions/min_length": 38.25, "epoch": 0.1151650533631174, "grad_norm": 0.00724976652524824, "kl": 0.1123046875, "learning_rate": 9.999951230211125e-07, "loss": 0.00011247491784160957, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 58, "train_speed(iter/s)": 0.023468 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 64.27083539962769, "completions/min_length": 30.625, "epoch": 0.117150657731447, "grad_norm": 0.009724558694402126, "kl": 0.1688232421875, "learning_rate": 9.99993630071562e-07, "loss": 0.00016898219473659992, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 59, "train_speed(iter/s)": 0.023438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.375, "completions/mean_length": 71.0729193687439, "completions/min_length": 31.0, "epoch": 0.11913626209977662, "grad_norm": 0.008708744885255611, "kl": 0.13507080078125, "learning_rate": 9.999919380638675e-07, "loss": 0.00013499600754585117, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 60, "train_speed(iter/s)": 0.023439 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.75, "completions/mean_length": 85.77083396911621, "completions/min_length": 37.25, "epoch": 0.12112186646810623, "grad_norm": 0.007643034017040172, "kl": 0.0946044921875, "learning_rate": 9.99990046998703e-07, "loss": 9.463238529860973e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 61, "train_speed(iter/s)": 0.0234 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 82.5729193687439, "completions/min_length": 35.25, "epoch": 0.12310747083643583, "grad_norm": 0.008057492803880658, "kl": 0.1072998046875, "learning_rate": 9.999879568768213e-07, "loss": 0.00010734701936598867, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 62, "train_speed(iter/s)": 0.023381 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 72.43750190734863, "completions/min_length": 33.375, "epoch": 0.12509307520476545, "grad_norm": 0.7000140733772771, "kl": 0.134521484375, "learning_rate": 9.999856676990543e-07, "loss": 0.014437368139624596, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 63, "train_speed(iter/s)": 0.023404 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 83.71875190734863, "completions/min_length": 35.875, "epoch": 0.12707867957309507, "grad_norm": 0.008002377526796563, "kl": 0.10614013671875, "learning_rate": 9.99983179466314e-07, "loss": 0.00010616659710649401, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 64, "train_speed(iter/s)": 0.023413 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 74.02083539962769, "completions/min_length": 39.125, "epoch": 0.12906428394142466, "grad_norm": 0.008723755406961395, "kl": 0.115203857421875, "learning_rate": 9.9998049217959e-07, "loss": 0.00011538701073732227, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 65, "train_speed(iter/s)": 0.023392 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 74.53125143051147, "completions/min_length": 33.875, "epoch": 0.13104988830975428, "grad_norm": 0.00918228653621971, "kl": 0.11761474609375, "learning_rate": 9.99977605839953e-07, "loss": 0.00011754984734579921, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 66, "train_speed(iter/s)": 0.023386 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.5, "completions/mean_length": 67.22916793823242, "completions/min_length": 34.875, "epoch": 0.1330354926780839, "grad_norm": 0.0097638059421632, "kl": 0.12744140625, "learning_rate": 9.999745204485517e-07, "loss": 0.0001275779795832932, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 67, "train_speed(iter/s)": 0.023388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.875, "completions/mean_length": 72.06250286102295, "completions/min_length": 33.75, "epoch": 0.1350210970464135, "grad_norm": 0.009237344961798894, "kl": 0.092041015625, "learning_rate": 9.99971236006615e-07, "loss": 9.19430167414248e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 68, "train_speed(iter/s)": 0.023388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.625, "completions/mean_length": 76.62500286102295, "completions/min_length": 35.375, "epoch": 0.1370067014147431, "grad_norm": 0.00843833223047032, "kl": 0.077301025390625, "learning_rate": 9.999677525154495e-07, "loss": 7.73863575886935e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 69, "train_speed(iter/s)": 0.023398 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 78.12500190734863, "completions/min_length": 39.25, "epoch": 0.13899230578307273, "grad_norm": 0.014993373918113943, "kl": 0.08154296875, "learning_rate": 9.99964069976443e-07, "loss": 8.15772800706327e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 70, "train_speed(iter/s)": 0.023378 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.875, "completions/mean_length": 77.33333587646484, "completions/min_length": 34.5, "epoch": 0.14097791015140235, "grad_norm": 0.008776763393947107, "kl": 0.0693359375, "learning_rate": 9.999601883910613e-07, "loss": 6.930225936230272e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 71, "train_speed(iter/s)": 0.023341 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.375, "completions/mean_length": 66.85416984558105, "completions/min_length": 30.375, "epoch": 0.14296351451973194, "grad_norm": 0.01027322713928672, "kl": 0.087493896484375, "learning_rate": 9.999561077608495e-07, "loss": 8.75980913406238e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 72, "train_speed(iter/s)": 0.023366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.125, "completions/mean_length": 85.00000286102295, "completions/min_length": 35.25, "epoch": 0.14494911888806156, "grad_norm": 0.0077692777675985635, "kl": 0.0548095703125, "learning_rate": 9.999518280874326e-07, "loss": 5.491119736689143e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 73, "train_speed(iter/s)": 0.023346 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 75.75000286102295, "completions/min_length": 35.5, "epoch": 0.14693472325639118, "grad_norm": 0.9484565619998777, "kl": 0.1073455810546875, "learning_rate": 9.999473493725142e-07, "loss": 0.01214763056486845, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 74, "train_speed(iter/s)": 0.023349 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.375, "completions/mean_length": 69.5729193687439, "completions/min_length": 25.125, "epoch": 0.14892032762472077, "grad_norm": 0.009753262896379912, "kl": 0.0610809326171875, "learning_rate": 9.999426716178771e-07, "loss": 6.102630868554115e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 75, "train_speed(iter/s)": 0.023325 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 71.53125190734863, "completions/min_length": 31.5, "epoch": 0.15090593199305039, "grad_norm": 2.0107202959631496, "kl": 0.058013916015625, "learning_rate": 9.999377948253843e-07, "loss": 0.0037022705655544996, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 76, "train_speed(iter/s)": 0.023327 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 75.51041889190674, "completions/min_length": 34.625, "epoch": 0.15289153636138, "grad_norm": 0.008656955877994034, "kl": 0.04874420166015625, "learning_rate": 9.999327189969767e-07, "loss": 4.8715279262978584e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 77, "train_speed(iter/s)": 0.023346 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.875, "completions/mean_length": 74.12500333786011, "completions/min_length": 34.875, "epoch": 0.1548771407297096, "grad_norm": 0.007265926721153918, "kl": 0.044189453125, "learning_rate": 9.999274441346755e-07, "loss": 4.4221189455129206e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 78, "train_speed(iter/s)": 0.023349 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.375, "completions/mean_length": 65.87500190734863, "completions/min_length": 29.875, "epoch": 0.1568627450980392, "grad_norm": 0.010017192688670995, "kl": 0.044769287109375, "learning_rate": 9.999219702405802e-07, "loss": 4.470112617127597e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 79, "train_speed(iter/s)": 0.023366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.625, "completions/mean_length": 67.26041793823242, "completions/min_length": 30.375, "epoch": 0.15884834946636883, "grad_norm": 0.006441999280046141, "kl": 0.039794921875, "learning_rate": 9.999162973168709e-07, "loss": 3.9837083022575825e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 80, "train_speed(iter/s)": 0.023393 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 73.85416841506958, "completions/min_length": 28.375, "epoch": 0.16083395383469842, "grad_norm": 0.00781500278880752, "kl": 0.042877197265625, "learning_rate": 9.999104253658055e-07, "loss": 4.2875220970017835e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 81, "train_speed(iter/s)": 0.023389 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.75, "completions/mean_length": 82.1041693687439, "completions/min_length": 33.0, "epoch": 0.16281955820302804, "grad_norm": 0.005676629435187418, "kl": 0.05523681640625, "learning_rate": 9.99904354389722e-07, "loss": 5.515553857549094e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 82, "train_speed(iter/s)": 0.023334 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.125, "completions/mean_length": 73.83333444595337, "completions/min_length": 37.25, "epoch": 0.16480516257135766, "grad_norm": 0.004333843743146698, "kl": 0.0418243408203125, "learning_rate": 9.998980843910374e-07, "loss": 4.1765895730350167e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 83, "train_speed(iter/s)": 0.023327 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 81.47916889190674, "completions/min_length": 34.5, "epoch": 0.16679076693968728, "grad_norm": 0.005410702417640253, "kl": 0.027149200439453125, "learning_rate": 9.998916153722476e-07, "loss": 2.7147478249389678e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 84, "train_speed(iter/s)": 0.023309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.25, "completions/mean_length": 73.30208396911621, "completions/min_length": 33.75, "epoch": 0.16877637130801687, "grad_norm": 0.002806379142786781, "kl": 0.0284881591796875, "learning_rate": 9.998849473359283e-07, "loss": 2.8479480533860624e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 85, "train_speed(iter/s)": 0.023309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 79.60416889190674, "completions/min_length": 38.625, "epoch": 0.1707619756763465, "grad_norm": 0.002732902384908916, "kl": 0.016204833984375, "learning_rate": 9.998780802847344e-07, "loss": 1.6214426068472676e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 86, "train_speed(iter/s)": 0.023312 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 77.81250238418579, "completions/min_length": 38.0, "epoch": 0.1727475800446761, "grad_norm": 0.016243090387746868, "kl": 0.033344268798828125, "learning_rate": 9.998710142213994e-07, "loss": 3.334351640660316e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 87, "train_speed(iter/s)": 0.023321 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.125, "completions/mean_length": 66.70833492279053, "completions/min_length": 31.0, "epoch": 0.1747331844130057, "grad_norm": 1.9790585771315492, "kl": 0.0699310302734375, "learning_rate": 9.998637491487367e-07, "loss": 0.003212481737136841, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.17046868056058884, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 88, "train_speed(iter/s)": 0.023363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 73.56250238418579, "completions/min_length": 30.125, "epoch": 0.17671878878133532, "grad_norm": 0.016190785316558057, "kl": 0.0405426025390625, "learning_rate": 9.998562850696387e-07, "loss": 4.052785880048759e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 89, "train_speed(iter/s)": 0.023362 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 74.72916984558105, "completions/min_length": 34.125, "epoch": 0.17870439314966494, "grad_norm": 0.08120243183969493, "kl": 0.11138916015625, "learning_rate": 9.998486219870767e-07, "loss": 0.0001112430909415707, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 90, "train_speed(iter/s)": 0.023352 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.875, "completions/mean_length": 72.13541793823242, "completions/min_length": 30.75, "epoch": 0.18068999751799453, "grad_norm": 1.848356040125397, "kl": 0.031524658203125, "learning_rate": 9.998407599041019e-07, "loss": 0.009597557596862316, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 91, "train_speed(iter/s)": 0.023357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.375, "completions/mean_length": 76.03125095367432, "completions/min_length": 32.125, "epoch": 0.18267560188632415, "grad_norm": 0.0054572906759311635, "kl": 0.0155792236328125, "learning_rate": 9.998326988238443e-07, "loss": 1.5576528312521987e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 92, "train_speed(iter/s)": 0.023342 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 74.31250238418579, "completions/min_length": 37.125, "epoch": 0.18466120625465376, "grad_norm": 0.007428558860000597, "kl": 0.03792572021484375, "learning_rate": 9.998244387495128e-07, "loss": 3.800111880991608e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 93, "train_speed(iter/s)": 0.023366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.75, "completions/mean_length": 79.26041889190674, "completions/min_length": 35.125, "epoch": 0.18664681062298338, "grad_norm": 0.009430591551939916, "kl": 0.0423126220703125, "learning_rate": 9.998159796843967e-07, "loss": 4.230852209730074e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 94, "train_speed(iter/s)": 0.023352 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.75, "completions/mean_length": 84.44791984558105, "completions/min_length": 32.25, "epoch": 0.18863241499131297, "grad_norm": 0.0031344675307674466, "kl": 0.02513885498046875, "learning_rate": 9.99807321631863e-07, "loss": 2.5113808078458533e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 95, "train_speed(iter/s)": 0.023319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.0, "completions/mean_length": 69.864586353302, "completions/min_length": 32.25, "epoch": 0.1906180193596426, "grad_norm": 0.9794287990894858, "kl": 0.022918701171875, "learning_rate": 9.99798464595359e-07, "loss": 0.007392986677587032, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 96, "train_speed(iter/s)": 0.023334 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 81.91666746139526, "completions/min_length": 38.0, "epoch": 0.1926036237279722, "grad_norm": 0.003960916632761671, "kl": 0.04085540771484375, "learning_rate": 9.997894085784107e-07, "loss": 4.087508932570927e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 97, "train_speed(iter/s)": 0.023347 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 75.95833587646484, "completions/min_length": 36.0, "epoch": 0.1945892280963018, "grad_norm": 1.3083302896777618, "kl": 0.018798828125, "learning_rate": 9.997801535846237e-07, "loss": 0.005059582181274891, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 98, "train_speed(iter/s)": 0.023299 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.0, "completions/mean_length": 84.53125476837158, "completions/min_length": 42.25, "epoch": 0.19657483246463142, "grad_norm": 0.0030215346529137454, "kl": 0.0220489501953125, "learning_rate": 9.997706996176825e-07, "loss": 2.202572431997396e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 99, "train_speed(iter/s)": 0.02326 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.25, "completions/mean_length": 86.72916793823242, "completions/min_length": 35.75, "epoch": 0.19856043683296104, "grad_norm": 0.008493777978797476, "kl": 0.039031982421875, "learning_rate": 9.997610466813509e-07, "loss": 3.904445475200191e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 100, "train_speed(iter/s)": 0.023264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.125, "completions/mean_length": 86.77083587646484, "completions/min_length": 36.75, "epoch": 0.20054604120129063, "grad_norm": 0.0033261305896777147, "kl": 0.02559661865234375, "learning_rate": 9.997511947794718e-07, "loss": 2.5546782126184553e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 101, "train_speed(iter/s)": 0.02324 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 80.07291793823242, "completions/min_length": 33.75, "epoch": 0.20253164556962025, "grad_norm": 0.0050221345405609625, "kl": 0.031707763671875, "learning_rate": 9.99741143915968e-07, "loss": 3.17314152198378e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 102, "train_speed(iter/s)": 0.023258 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/mean_length": 69.17708539962769, "completions/min_length": 31.25, "epoch": 0.20451724993794987, "grad_norm": 0.005426878797230674, "kl": 0.018100738525390625, "learning_rate": 9.997308940948404e-07, "loss": 1.811725815059617e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 103, "train_speed(iter/s)": 0.023243 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 72.34375286102295, "completions/min_length": 26.0, "epoch": 0.2065028543062795, "grad_norm": 0.002885475700919475, "kl": 0.02909088134765625, "learning_rate": 9.997204453201696e-07, "loss": 2.9084389097988605e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 104, "train_speed(iter/s)": 0.023234 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 77.37500238418579, "completions/min_length": 35.0, "epoch": 0.20848845867460908, "grad_norm": 0.784014990022842, "kl": 0.8624954223632812, "learning_rate": 9.997097975961162e-07, "loss": 0.0008623730391263962, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 105, "train_speed(iter/s)": 0.023243 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 73.20833539962769, "completions/min_length": 34.75, "epoch": 0.2104740630429387, "grad_norm": 0.004153006792887623, "kl": 0.040924072265625, "learning_rate": 9.996989509269185e-07, "loss": 4.089593858225271e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 106, "train_speed(iter/s)": 0.023249 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.375, "completions/mean_length": 82.07291889190674, "completions/min_length": 35.125, "epoch": 0.21245966741126832, "grad_norm": 1.2569571450902168, "kl": 0.0576629638671875, "learning_rate": 9.996879053168951e-07, "loss": -0.012867176905274391, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 107, "train_speed(iter/s)": 0.023245 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.375, "completions/mean_length": 83.13541984558105, "completions/min_length": 35.75, "epoch": 0.2144452717795979, "grad_norm": 0.0021027888710954037, "kl": 0.0270843505859375, "learning_rate": 9.996766607704436e-07, "loss": 2.7072226657764986e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 108, "train_speed(iter/s)": 0.02325 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 77.18750238418579, "completions/min_length": 31.25, "epoch": 0.21643087614792753, "grad_norm": 0.7112216195025038, "kl": 0.05426025390625, "learning_rate": 9.996652172920405e-07, "loss": -4.5799341023666784e-05, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 109, "train_speed(iter/s)": 0.023234 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.625, "completions/mean_length": 86.58333683013916, "completions/min_length": 33.0, "epoch": 0.21841648051625714, "grad_norm": 0.0018519723391530849, "kl": 0.02147674560546875, "learning_rate": 9.99653574886242e-07, "loss": 2.149765714420937e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 110, "train_speed(iter/s)": 0.023224 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 83.78125286102295, "completions/min_length": 33.75, "epoch": 0.22040208488458674, "grad_norm": 0.004536140829723313, "kl": 0.02535247802734375, "learning_rate": 9.99641733557683e-07, "loss": 2.539155502745416e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 111, "train_speed(iter/s)": 0.023221 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.625, "completions/mean_length": 89.65625286102295, "completions/min_length": 37.25, "epoch": 0.22238768925291635, "grad_norm": 0.0018853550139549076, "kl": 0.02152252197265625, "learning_rate": 9.996296933110775e-07, "loss": 2.150795262423344e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 112, "train_speed(iter/s)": 0.023179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 76.06250095367432, "completions/min_length": 36.125, "epoch": 0.22437329362124597, "grad_norm": 0.004650605904984792, "kl": 0.02893829345703125, "learning_rate": 9.996174541512194e-07, "loss": 2.893549026339315e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 113, "train_speed(iter/s)": 0.023167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.5, "completions/mean_length": 74.51041889190674, "completions/min_length": 34.25, "epoch": 0.22635889798957556, "grad_norm": 0.004502458143847963, "kl": 0.03432464599609375, "learning_rate": 9.996050160829812e-07, "loss": 3.434516111155972e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 114, "train_speed(iter/s)": 0.023162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.125, "completions/mean_length": 85.05208587646484, "completions/min_length": 38.25, "epoch": 0.22834450235790518, "grad_norm": 0.003187846579359576, "kl": 0.05316925048828125, "learning_rate": 9.995923791113149e-07, "loss": 5.3155214118305594e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 115, "train_speed(iter/s)": 0.023159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.25, "completions/mean_length": 78.47916984558105, "completions/min_length": 32.25, "epoch": 0.2303301067262348, "grad_norm": 0.005922228518706139, "kl": 0.03337860107421875, "learning_rate": 9.995795432412512e-07, "loss": 3.339227623655461e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 116, "train_speed(iter/s)": 0.023122 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 74.11458587646484, "completions/min_length": 32.875, "epoch": 0.23231571109456442, "grad_norm": 0.006251164047718008, "kl": 0.02973175048828125, "learning_rate": 9.995665084779008e-07, "loss": 2.9748269298579544e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 117, "train_speed(iter/s)": 0.023124 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.125, "completions/mean_length": 85.07291793823242, "completions/min_length": 35.875, "epoch": 0.234301315462894, "grad_norm": 1.3624861529703818, "kl": 0.030242919921875, "learning_rate": 9.995532748264528e-07, "loss": -0.002606650348752737, "memory(GiB)": 94.21, "reward": 1.802083358168602, "reward_std": 0.0765465535223484, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.26709309965372086, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 118, "train_speed(iter/s)": 0.023096 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.625, "completions/mean_length": 78.11458539962769, "completions/min_length": 29.0, "epoch": 0.23628691983122363, "grad_norm": 0.8400324532484467, "kl": 0.0632781982421875, "learning_rate": 9.995398422921758e-07, "loss": 0.006895631551742554, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 119, "train_speed(iter/s)": 0.023069 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 81.72916793823242, "completions/min_length": 36.875, "epoch": 0.23827252419955325, "grad_norm": 0.004239886164230574, "kl": 0.02639007568359375, "learning_rate": 9.995262108804176e-07, "loss": 2.6379761038697325e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 120, "train_speed(iter/s)": 0.023076 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.625, "completions/mean_length": 84.35416889190674, "completions/min_length": 31.75, "epoch": 0.24025812856788284, "grad_norm": 0.001949576903914849, "kl": 0.016693115234375, "learning_rate": 9.995123805966055e-07, "loss": 1.6695452359272167e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 121, "train_speed(iter/s)": 0.023062 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.25, "completions/mean_length": 69.76041793823242, "completions/min_length": 39.125, "epoch": 0.24224373293621246, "grad_norm": 0.006273383546754521, "kl": 0.02678680419921875, "learning_rate": 9.99498351446245e-07, "loss": 2.679082354006823e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 122, "train_speed(iter/s)": 0.023075 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.875, "completions/mean_length": 83.07291841506958, "completions/min_length": 31.5, "epoch": 0.24422933730454208, "grad_norm": 0.002229431612515994, "kl": 0.03147125244140625, "learning_rate": 9.99484123434922e-07, "loss": 3.1477738957619295e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 123, "train_speed(iter/s)": 0.023072 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 83.75000190734863, "completions/min_length": 28.125, "epoch": 0.24621494167287167, "grad_norm": 0.0023781479884869595, "kl": 0.023468017578125, "learning_rate": 9.994696965683008e-07, "loss": 2.3481863536289893e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 124, "train_speed(iter/s)": 0.023043 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.125, "completions/mean_length": 82.833336353302, "completions/min_length": 36.875, "epoch": 0.2482005460412013, "grad_norm": 0.0033789079677525925, "kl": 0.0220489501953125, "learning_rate": 9.994550708521249e-07, "loss": 2.2042582713766024e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 125, "train_speed(iter/s)": 0.023048 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.625, "completions/mean_length": 81.13541889190674, "completions/min_length": 34.875, "epoch": 0.2501861504095309, "grad_norm": 0.01273019257020008, "kl": 0.03211212158203125, "learning_rate": 9.994402462922168e-07, "loss": 3.207269764970988e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 126, "train_speed(iter/s)": 0.023021 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 79.23958683013916, "completions/min_length": 30.5, "epoch": 0.2521717547778605, "grad_norm": 0.003132205556229125, "kl": 0.03112030029296875, "learning_rate": 9.994252228944792e-07, "loss": 3.1085153750609607e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 127, "train_speed(iter/s)": 0.023006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 74.01041841506958, "completions/min_length": 26.875, "epoch": 0.25415735914619014, "grad_norm": 0.0052449185528267266, "kl": 0.043609619140625, "learning_rate": 9.994100006648929e-07, "loss": 4.3588974222075194e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 128, "train_speed(iter/s)": 0.022997 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 79.82291889190674, "completions/min_length": 38.5, "epoch": 0.2561429635145197, "grad_norm": 1.4595321664608227, "kl": 0.0400543212890625, "learning_rate": 9.993945796095182e-07, "loss": -0.009771152399480343, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 129, "train_speed(iter/s)": 0.023005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 81.37500286102295, "completions/min_length": 34.625, "epoch": 0.2581285678828493, "grad_norm": 0.0028472569708675247, "kl": 0.0347442626953125, "learning_rate": 9.993789597344946e-07, "loss": 3.474685217952356e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 130, "train_speed(iter/s)": 0.023014 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.625, "completions/mean_length": 87.67708587646484, "completions/min_length": 34.5, "epoch": 0.26011417225117894, "grad_norm": 1.1348476239911196, "kl": 0.0557098388671875, "learning_rate": 9.993631410460404e-07, "loss": 0.013155095279216766, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 131, "train_speed(iter/s)": 0.022986 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.75, "completions/mean_length": 80.54167032241821, "completions/min_length": 34.625, "epoch": 0.26209977661950856, "grad_norm": 0.002179782815721632, "kl": 0.04027557373046875, "learning_rate": 9.993471235504537e-07, "loss": 4.025837915833108e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 132, "train_speed(iter/s)": 0.022971 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 72.16666841506958, "completions/min_length": 32.25, "epoch": 0.2640853809878382, "grad_norm": 0.005984471816592713, "kl": 0.0272216796875, "learning_rate": 9.993309072541115e-07, "loss": 2.720650809351355e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 133, "train_speed(iter/s)": 0.022957 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.625, "completions/mean_length": 73.77083587646484, "completions/min_length": 26.0, "epoch": 0.2660709853561678, "grad_norm": 0.007462330711181815, "kl": 0.0359954833984375, "learning_rate": 9.993144921634693e-07, "loss": 3.602053766371682e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 134, "train_speed(iter/s)": 0.022952 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 76.86458444595337, "completions/min_length": 32.5, "epoch": 0.2680565897244974, "grad_norm": 0.010850988612707155, "kl": 0.03931427001953125, "learning_rate": 9.992978782850628e-07, "loss": 3.928934529540129e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 135, "train_speed(iter/s)": 0.022964 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.125, "completions/mean_length": 79.54167079925537, "completions/min_length": 32.125, "epoch": 0.270042194092827, "grad_norm": 0.003653760470591168, "kl": 0.022106170654296875, "learning_rate": 9.992810656255062e-07, "loss": 2.2124680981505662e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 136, "train_speed(iter/s)": 0.022952 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 77.42708683013916, "completions/min_length": 42.5, "epoch": 0.2720277984611566, "grad_norm": 0.005826066885775745, "kl": 0.029163360595703125, "learning_rate": 9.992640541914931e-07, "loss": 2.9201857614680193e-05, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 137, "train_speed(iter/s)": 0.022963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.125, "completions/mean_length": 78.97916889190674, "completions/min_length": 35.25, "epoch": 0.2740134028294862, "grad_norm": 2.440091205265988, "kl": 0.02922821044921875, "learning_rate": 9.992468439897958e-07, "loss": -0.001077904598787427, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 138, "train_speed(iter/s)": 0.022968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.75, "completions/mean_length": 93.23958587646484, "completions/min_length": 43.375, "epoch": 0.27599900719781584, "grad_norm": 0.0029068940782511665, "kl": 0.01699066162109375, "learning_rate": 9.992294350272665e-07, "loss": 1.6966134353424422e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 139, "train_speed(iter/s)": 0.02297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 81.77083587646484, "completions/min_length": 34.0, "epoch": 0.27798461156614546, "grad_norm": 1.5684642870902674, "kl": 0.02487945556640625, "learning_rate": 9.992118273108356e-07, "loss": 0.009041143581271172, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 140, "train_speed(iter/s)": 0.022948 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.5, "completions/mean_length": 81.79166841506958, "completions/min_length": 35.125, "epoch": 0.2799702159344751, "grad_norm": 0.002408231301548354, "kl": 0.021148681640625, "learning_rate": 9.991940208475134e-07, "loss": 2.1152703993720934e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 141, "train_speed(iter/s)": 0.022936 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 74.14583587646484, "completions/min_length": 34.875, "epoch": 0.2819558203028047, "grad_norm": 0.001945442237554167, "kl": 0.02030181884765625, "learning_rate": 9.991760156443892e-07, "loss": 2.029309507634025e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 142, "train_speed(iter/s)": 0.022942 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 84.09375190734863, "completions/min_length": 36.75, "epoch": 0.28394142467113426, "grad_norm": 0.0021499238761391714, "kl": 0.0267333984375, "learning_rate": 9.991578117086306e-07, "loss": 2.6741268811747432e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 143, "train_speed(iter/s)": 0.022935 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 74.41666793823242, "completions/min_length": 33.625, "epoch": 0.2859270290394639, "grad_norm": 0.005360105407463123, "kl": 0.03845977783203125, "learning_rate": 9.991394090474855e-07, "loss": 3.8434896850958467e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 144, "train_speed(iter/s)": 0.022924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 80.79166889190674, "completions/min_length": 37.0, "epoch": 0.2879126334077935, "grad_norm": 0.00730347960850382, "kl": 0.039794921875, "learning_rate": 9.991208076682805e-07, "loss": 3.9786933484720066e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 145, "train_speed(iter/s)": 0.022921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 79.53125286102295, "completions/min_length": 36.125, "epoch": 0.2898982377761231, "grad_norm": 0.6769544801856519, "kl": 0.03891754150390625, "learning_rate": 9.991020075784209e-07, "loss": 0.00011350711429258808, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 146, "train_speed(iter/s)": 0.022918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.875, "completions/mean_length": 84.75000238418579, "completions/min_length": 37.75, "epoch": 0.29188384214445273, "grad_norm": 0.003463262564598902, "kl": 0.0317230224609375, "learning_rate": 9.990830087853915e-07, "loss": 3.175391975673847e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 147, "train_speed(iter/s)": 0.0229 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.75, "completions/mean_length": 66.58333492279053, "completions/min_length": 31.5, "epoch": 0.29386944651278235, "grad_norm": 0.006143614130758875, "kl": 0.0285797119140625, "learning_rate": 9.99063811296756e-07, "loss": 2.8581631340784952e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 148, "train_speed(iter/s)": 0.022904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 86.20833587646484, "completions/min_length": 39.625, "epoch": 0.2958550508811119, "grad_norm": 0.0028823911907623215, "kl": 0.0189056396484375, "learning_rate": 9.990444151201577e-07, "loss": 1.8928169083665125e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 149, "train_speed(iter/s)": 0.022882 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.125, "completions/mean_length": 71.09375381469727, "completions/min_length": 32.125, "epoch": 0.29784065524944153, "grad_norm": 0.006682389422446879, "kl": 0.0361175537109375, "learning_rate": 9.990248202633183e-07, "loss": 3.612569344113581e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 150, "train_speed(iter/s)": 0.022889 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.75, "completions/mean_length": 76.31250095367432, "completions/min_length": 35.75, "epoch": 0.29982625961777115, "grad_norm": 0.003806402044783724, "kl": 0.0270233154296875, "learning_rate": 9.990050267340389e-07, "loss": 2.702613710425794e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 151, "train_speed(iter/s)": 0.022902 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 69.41666889190674, "completions/min_length": 31.0, "epoch": 0.30181186398610077, "grad_norm": 0.7748031523798731, "kl": 0.0301666259765625, "learning_rate": 9.989850345402e-07, "loss": -0.006249020807445049, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 152, "train_speed(iter/s)": 0.022891 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 81.18750143051147, "completions/min_length": 38.875, "epoch": 0.3037974683544304, "grad_norm": 0.0020422359628149258, "kl": 0.0211029052734375, "learning_rate": 9.989648436897607e-07, "loss": 2.112751462846063e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 153, "train_speed(iter/s)": 0.022881 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 81.15625238418579, "completions/min_length": 34.875, "epoch": 0.30578307272276, "grad_norm": 1.1158877039086597, "kl": 0.024139404296875, "learning_rate": 9.989444541907596e-07, "loss": 0.0025634984485805035, "memory(GiB)": 94.21, "reward": 1.9583333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9583333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 154, "train_speed(iter/s)": 0.022875 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.125, "completions/mean_length": 79.55208683013916, "completions/min_length": 31.625, "epoch": 0.3077686770910896, "grad_norm": 0.027379737913495964, "kl": 0.04638671875, "learning_rate": 9.98923866051314e-07, "loss": 4.642191925086081e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 155, "train_speed(iter/s)": 0.022866 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.625, "completions/mean_length": 78.86458587646484, "completions/min_length": 39.25, "epoch": 0.3097542814594192, "grad_norm": 0.004745359305371207, "kl": 0.024932861328125, "learning_rate": 9.989030792796205e-07, "loss": 2.4921868316596374e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 156, "train_speed(iter/s)": 0.022869 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.875, "completions/mean_length": 78.40625238418579, "completions/min_length": 33.5, "epoch": 0.3117398858277488, "grad_norm": 0.004681803725018852, "kl": 0.026885986328125, "learning_rate": 9.98882093883955e-07, "loss": 2.6866193366004154e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 157, "train_speed(iter/s)": 0.022864 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.125, "completions/mean_length": 84.48958683013916, "completions/min_length": 33.375, "epoch": 0.3137254901960784, "grad_norm": 0.016892752767291387, "kl": 0.033935546875, "learning_rate": 9.988609098726718e-07, "loss": 3.390968049643561e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 158, "train_speed(iter/s)": 0.022857 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 78.75000190734863, "completions/min_length": 34.25, "epoch": 0.31571109456440805, "grad_norm": 0.007675206194843289, "kl": 0.0335235595703125, "learning_rate": 9.988395272542052e-07, "loss": 3.3529042411828414e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 159, "train_speed(iter/s)": 0.022844 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.375, "completions/mean_length": 73.94791889190674, "completions/min_length": 31.625, "epoch": 0.31769669893273766, "grad_norm": 0.0026379205790726377, "kl": 0.02301025390625, "learning_rate": 9.988179460370678e-07, "loss": 2.3003169189905748e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 160, "train_speed(iter/s)": 0.022817 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 82.56250286102295, "completions/min_length": 36.625, "epoch": 0.3196823033010673, "grad_norm": 0.019981606658156535, "kl": 0.05838775634765625, "learning_rate": 9.987961662298514e-07, "loss": 5.835080082761124e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 161, "train_speed(iter/s)": 0.02282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 69.94791984558105, "completions/min_length": 33.625, "epoch": 0.32166790766939685, "grad_norm": 1.1121464524504103, "kl": 0.0399017333984375, "learning_rate": 9.987741878412273e-07, "loss": 3.9871782064437866e-05, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.833333333954215, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 162, "train_speed(iter/s)": 0.022825 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.625, "completions/mean_length": 82.65625286102295, "completions/min_length": 31.75, "epoch": 0.32365351203772647, "grad_norm": 0.9818224490673001, "kl": 0.0430145263671875, "learning_rate": 9.987520108799455e-07, "loss": -0.0016035562148317695, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.30977265536785126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 163, "train_speed(iter/s)": 0.022819 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 81.39583492279053, "completions/min_length": 35.5, "epoch": 0.3256391164060561, "grad_norm": 0.005152185096823894, "kl": 0.02822113037109375, "learning_rate": 9.98729635354835e-07, "loss": 2.8228670998942107e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 164, "train_speed(iter/s)": 0.022809 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 75.14583492279053, "completions/min_length": 37.125, "epoch": 0.3276247207743857, "grad_norm": 0.007955405284278825, "kl": 0.0336761474609375, "learning_rate": 9.987070612748041e-07, "loss": 3.369571641087532e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 165, "train_speed(iter/s)": 0.02281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.5, "completions/mean_length": 84.60416793823242, "completions/min_length": 37.5, "epoch": 0.3296103251427153, "grad_norm": 0.0037385246624281293, "kl": 0.03680419921875, "learning_rate": 9.986842886488398e-07, "loss": 3.675417974591255e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 166, "train_speed(iter/s)": 0.022795 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.125, "completions/mean_length": 84.79166841506958, "completions/min_length": 42.625, "epoch": 0.33159592951104494, "grad_norm": 1.0329286841533998, "kl": 0.0334014892578125, "learning_rate": 9.986613174860087e-07, "loss": 0.005444470327347517, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 167, "train_speed(iter/s)": 0.022797 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 85.83333587646484, "completions/min_length": 36.25, "epoch": 0.33358153387937456, "grad_norm": 0.0030034448511075465, "kl": 0.0290985107421875, "learning_rate": 9.98638147795456e-07, "loss": 2.9090309908497147e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 168, "train_speed(iter/s)": 0.022794 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.875, "completions/mean_length": 89.98958587646484, "completions/min_length": 36.25, "epoch": 0.3355671382477041, "grad_norm": 0.6460215211895096, "kl": 0.0342559814453125, "learning_rate": 9.98614779586406e-07, "loss": -0.01349202822893858, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 169, "train_speed(iter/s)": 0.022787 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 79.25000190734863, "completions/min_length": 36.25, "epoch": 0.33755274261603374, "grad_norm": 0.014864367188056237, "kl": 0.048095703125, "learning_rate": 9.98591212868162e-07, "loss": 4.805076605407521e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 170, "train_speed(iter/s)": 0.022773 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 81.78125143051147, "completions/min_length": 37.25, "epoch": 0.33953834698436336, "grad_norm": 0.9752023643543171, "kl": 0.041717529296875, "learning_rate": 9.985674476501063e-07, "loss": 0.009409889578819275, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.05653337761759758, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 171, "train_speed(iter/s)": 0.022784 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 85.98958778381348, "completions/min_length": 38.875, "epoch": 0.341523951352693, "grad_norm": 0.0023246536830248844, "kl": 0.0350799560546875, "learning_rate": 9.985434839417009e-07, "loss": 3.503591869957745e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 172, "train_speed(iter/s)": 0.022756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.625, "completions/mean_length": 90.75000286102295, "completions/min_length": 43.125, "epoch": 0.3435095557210226, "grad_norm": 1.1776113696584138, "kl": 0.029327392578125, "learning_rate": 9.985193217524856e-07, "loss": -0.0044493707828223705, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 173, "train_speed(iter/s)": 0.022754 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 90.52083492279053, "completions/min_length": 43.125, "epoch": 0.3454951600893522, "grad_norm": 0.0060009006219862385, "kl": 0.0308990478515625, "learning_rate": 9.984949610920804e-07, "loss": 3.088471567025408e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 174, "train_speed(iter/s)": 0.022732 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 74.0104193687439, "completions/min_length": 32.875, "epoch": 0.34748076445768183, "grad_norm": 1.7715474696283, "kl": 0.045623779296875, "learning_rate": 9.984704019701834e-07, "loss": 0.005407353863120079, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 175, "train_speed(iter/s)": 0.022734 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.875, "completions/mean_length": 85.82291889190674, "completions/min_length": 33.375, "epoch": 0.3494663688260114, "grad_norm": 1.217663861653863, "kl": 0.0449981689453125, "learning_rate": 9.984456443965726e-07, "loss": 0.009984655305743217, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.31764985248446465, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 176, "train_speed(iter/s)": 0.022732 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.25, "completions/mean_length": 75.98958587646484, "completions/min_length": 40.0, "epoch": 0.351451973194341, "grad_norm": 1.7135717160363662, "kl": 0.0357666015625, "learning_rate": 9.98420688381104e-07, "loss": 0.007558799814432859, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 177, "train_speed(iter/s)": 0.022733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.625, "completions/mean_length": 86.53125476837158, "completions/min_length": 38.125, "epoch": 0.35343757756267064, "grad_norm": 1.062893936555404, "kl": 0.0452880859375, "learning_rate": 9.983955339337133e-07, "loss": 0.006683503743261099, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 178, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 73.00000190734863, "completions/min_length": 30.875, "epoch": 0.35542318193100025, "grad_norm": 1.3567944264860505, "kl": 0.049652099609375, "learning_rate": 9.98370181064415e-07, "loss": -0.0048940302804112434, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 179, "train_speed(iter/s)": 0.022732 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 71.33333539962769, "completions/min_length": 32.75, "epoch": 0.3574087862993299, "grad_norm": 0.005652753110890744, "kl": 0.0494537353515625, "learning_rate": 9.983446297833029e-07, "loss": 4.945131513522938e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 180, "train_speed(iter/s)": 0.022732 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.875, "completions/mean_length": 77.45833587646484, "completions/min_length": 35.25, "epoch": 0.3593943906676595, "grad_norm": 0.007234938322409181, "kl": 0.0520782470703125, "learning_rate": 9.98318880100549e-07, "loss": 5.203445834922604e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 181, "train_speed(iter/s)": 0.022742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 75.55208587646484, "completions/min_length": 34.0, "epoch": 0.36137999503598905, "grad_norm": 1.2782972997384654, "kl": 0.049957275390625, "learning_rate": 9.982929320264052e-07, "loss": 0.007232224568724632, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 182, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 81.03125286102295, "completions/min_length": 35.5, "epoch": 0.3633655994043187, "grad_norm": 0.007560394321297047, "kl": 0.0479888916015625, "learning_rate": 9.982667855712021e-07, "loss": 4.798533336725086e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 183, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 73.23958587646484, "completions/min_length": 39.75, "epoch": 0.3653512037726483, "grad_norm": 0.0057649652997279685, "kl": 0.0381317138671875, "learning_rate": 9.982404407453487e-07, "loss": 3.8141461118357256e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 184, "train_speed(iter/s)": 0.022727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 77.84375286102295, "completions/min_length": 31.75, "epoch": 0.3673368081409779, "grad_norm": 1.6356979932321425, "kl": 0.04962158203125, "learning_rate": 9.982138975593337e-07, "loss": 0.0018555410206317902, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 185, "train_speed(iter/s)": 0.022733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.625, "completions/mean_length": 81.2291693687439, "completions/min_length": 32.875, "epoch": 0.36932241250930753, "grad_norm": 0.006116373265448865, "kl": 0.0457763671875, "learning_rate": 9.981871560237246e-07, "loss": 4.576150968205184e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 186, "train_speed(iter/s)": 0.022745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.125, "completions/mean_length": 81.37500190734863, "completions/min_length": 36.375, "epoch": 0.37130801687763715, "grad_norm": 0.007859130592004748, "kl": 0.0453643798828125, "learning_rate": 9.981602161491675e-07, "loss": 4.534694744506851e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 187, "train_speed(iter/s)": 0.022744 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 82.83333683013916, "completions/min_length": 38.125, "epoch": 0.37329362124596677, "grad_norm": 0.0029779687640615524, "kl": 0.04638671875, "learning_rate": 9.98133077946388e-07, "loss": 4.642216663341969e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 188, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 79.69791984558105, "completions/min_length": 37.125, "epoch": 0.37527922561429633, "grad_norm": 0.0054615328069089015, "kl": 0.03363037109375, "learning_rate": 9.981057414261901e-07, "loss": 3.364525764482096e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 189, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 74.46875190734863, "completions/min_length": 34.375, "epoch": 0.37726482998262595, "grad_norm": 0.007080508958201603, "kl": 0.042327880859375, "learning_rate": 9.980782065994575e-07, "loss": 4.234170773997903e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 190, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 74.18750286102295, "completions/min_length": 32.0, "epoch": 0.37925043435095557, "grad_norm": 0.007638784127471412, "kl": 0.0382843017578125, "learning_rate": 9.980504734771521e-07, "loss": 3.823750012088567e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 191, "train_speed(iter/s)": 0.022717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 74.79167032241821, "completions/min_length": 34.625, "epoch": 0.3812360387192852, "grad_norm": 0.004682504778406129, "kl": 0.042877197265625, "learning_rate": 9.98022542070315e-07, "loss": 4.284096939954907e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 192, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/mean_length": 75.27083587646484, "completions/min_length": 36.375, "epoch": 0.3832216430876148, "grad_norm": 0.006744413200935781, "kl": 0.034637451171875, "learning_rate": 9.979944123900666e-07, "loss": 3.460952575551346e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 193, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 71.61458444595337, "completions/min_length": 33.375, "epoch": 0.3852072474559444, "grad_norm": 0.005485243666423708, "kl": 0.0358428955078125, "learning_rate": 9.979660844476055e-07, "loss": 3.581827331800014e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 194, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 75.27083587646484, "completions/min_length": 32.125, "epoch": 0.387192851824274, "grad_norm": 0.0026650032870345276, "kl": 0.0361480712890625, "learning_rate": 9.9793755825421e-07, "loss": 3.617025868152268e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 195, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.875, "completions/mean_length": 71.14583683013916, "completions/min_length": 32.25, "epoch": 0.3891784561926036, "grad_norm": 0.011339254737858831, "kl": 0.04736328125, "learning_rate": 9.979088338212367e-07, "loss": 4.738846837426536e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 196, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 74.39583492279053, "completions/min_length": 34.625, "epoch": 0.3911640605609332, "grad_norm": 0.00682870203745647, "kl": 0.0364227294921875, "learning_rate": 9.978799111601215e-07, "loss": 3.645420292741619e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 197, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.75, "completions/mean_length": 86.50000190734863, "completions/min_length": 38.125, "epoch": 0.39314966492926284, "grad_norm": 0.004287982782498444, "kl": 0.0297088623046875, "learning_rate": 9.978507902823794e-07, "loss": 2.9695647754124366e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 198, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.375, "completions/mean_length": 80.72916841506958, "completions/min_length": 30.5, "epoch": 0.39513526929759246, "grad_norm": 0.004199276731034225, "kl": 0.0319061279296875, "learning_rate": 9.978214711996038e-07, "loss": 3.188504706486128e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 199, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.875, "completions/mean_length": 74.95833587646484, "completions/min_length": 28.125, "epoch": 0.3971208736659221, "grad_norm": 0.006726660768530172, "kl": 0.0353240966796875, "learning_rate": 9.977919539234674e-07, "loss": 3.5278513678349555e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 200, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 71.03125381469727, "completions/min_length": 28.625, "epoch": 0.3991064780342517, "grad_norm": 0.002983145134616605, "kl": 0.043304443359375, "learning_rate": 9.977622384657214e-07, "loss": 4.3290659959893674e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 201, "train_speed(iter/s)": 0.022729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.625, "completions/mean_length": 85.18750238418579, "completions/min_length": 37.125, "epoch": 0.40109208240258126, "grad_norm": 0.035229834694054274, "kl": 0.078033447265625, "learning_rate": 9.977323248381964e-07, "loss": 7.797306898282841e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 202, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 76.95833492279053, "completions/min_length": 28.25, "epoch": 0.4030776867709109, "grad_norm": 1.4431260101444856, "kl": 0.0474700927734375, "learning_rate": 9.977022130528014e-07, "loss": 0.010393962264060974, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 203, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 73.32291889190674, "completions/min_length": 29.375, "epoch": 0.4050632911392405, "grad_norm": 0.024717577152067193, "kl": 0.03564453125, "learning_rate": 9.97671903121525e-07, "loss": 3.5631743230624124e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 204, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.875, "completions/mean_length": 87.88541889190674, "completions/min_length": 33.5, "epoch": 0.4070488955075701, "grad_norm": 0.002780148259801971, "kl": 0.03948974609375, "learning_rate": 9.976413950564337e-07, "loss": 3.9515100070275366e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 205, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 77.92708539962769, "completions/min_length": 33.75, "epoch": 0.40903449987589974, "grad_norm": 0.0022159754933545627, "kl": 0.02740478515625, "learning_rate": 9.976106888696735e-07, "loss": 2.7362289984012023e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 206, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 80.77083587646484, "completions/min_length": 31.25, "epoch": 0.41102010424422936, "grad_norm": 0.004332621026768188, "kl": 0.0364837646484375, "learning_rate": 9.975797845734696e-07, "loss": 3.6460402043303475e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 207, "train_speed(iter/s)": 0.022681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.0, "completions/mean_length": 83.84375190734863, "completions/min_length": 34.0, "epoch": 0.413005708612559, "grad_norm": 0.7001292843181908, "kl": 0.0475311279296875, "learning_rate": 9.975486821801255e-07, "loss": 0.00872232485562563, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 208, "train_speed(iter/s)": 0.022671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.375, "completions/mean_length": 70.13541984558105, "completions/min_length": 30.875, "epoch": 0.41499131298088854, "grad_norm": 0.01058725693311942, "kl": 0.0386199951171875, "learning_rate": 9.975173817020235e-07, "loss": 3.862637822749093e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 209, "train_speed(iter/s)": 0.022673 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.25, "completions/mean_length": 82.63541793823242, "completions/min_length": 34.25, "epoch": 0.41697691734921816, "grad_norm": 0.03540330059763418, "kl": 0.080810546875, "learning_rate": 9.974858831516252e-07, "loss": 8.076893573161215e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 210, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.375, "completions/mean_length": 77.93750238418579, "completions/min_length": 30.625, "epoch": 0.4189625217175478, "grad_norm": 0.004490513100645009, "kl": 0.0385284423828125, "learning_rate": 9.974541865414707e-07, "loss": 3.852363079204224e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 211, "train_speed(iter/s)": 0.022669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 80.7291693687439, "completions/min_length": 29.5, "epoch": 0.4209481260858774, "grad_norm": 0.006926164779839543, "kl": 0.0422821044921875, "learning_rate": 9.97422291884179e-07, "loss": 4.230643389746547e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 212, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.25, "completions/mean_length": 78.01041984558105, "completions/min_length": 29.5, "epoch": 0.422933730454207, "grad_norm": 0.9432920181158546, "kl": 0.045013427734375, "learning_rate": 9.973901991924485e-07, "loss": -0.006227703299373388, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 213, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.25, "completions/mean_length": 83.18750286102295, "completions/min_length": 35.5, "epoch": 0.42491933482253663, "grad_norm": 1.245023826855593, "kl": 0.05291748046875, "learning_rate": 9.973579084790555e-07, "loss": -0.0023613572120666504, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 214, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 79.98958444595337, "completions/min_length": 35.125, "epoch": 0.4269049391908662, "grad_norm": 0.0041365932117952385, "kl": 0.0484619140625, "learning_rate": 9.973254197568559e-07, "loss": 4.843662463827059e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 215, "train_speed(iter/s)": 0.022665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 77.25000238418579, "completions/min_length": 35.0, "epoch": 0.4288905435591958, "grad_norm": 0.0075376148887607965, "kl": 0.050933837890625, "learning_rate": 9.97292733038784e-07, "loss": 5.091197817819193e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 216, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 90.13541984558105, "completions/min_length": 40.25, "epoch": 0.43087614792752543, "grad_norm": 1.3811214492343569, "kl": 0.0423736572265625, "learning_rate": 9.97259848337853e-07, "loss": 0.009429289028048515, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 217, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.375, "completions/mean_length": 89.16666984558105, "completions/min_length": 35.625, "epoch": 0.43286175229585505, "grad_norm": 0.007093413604702633, "kl": 0.0416107177734375, "learning_rate": 9.972267656671555e-07, "loss": 4.167377483099699e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 218, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.375, "completions/mean_length": 82.25000286102295, "completions/min_length": 37.875, "epoch": 0.43484735666418467, "grad_norm": 0.0032133978419041143, "kl": 0.0321197509765625, "learning_rate": 9.97193485039862e-07, "loss": 3.212739829905331e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 219, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.375, "completions/mean_length": 82.51041984558105, "completions/min_length": 32.75, "epoch": 0.4368329610325143, "grad_norm": 0.006870201965163573, "kl": 0.0323028564453125, "learning_rate": 9.97160006469222e-07, "loss": 3.233300958527252e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 220, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 76.56250143051147, "completions/min_length": 33.25, "epoch": 0.4388185654008439, "grad_norm": 0.008288409891884372, "kl": 0.0484466552734375, "learning_rate": 9.971263299685647e-07, "loss": 4.844851719099097e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 221, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.5, "completions/mean_length": 87.30208778381348, "completions/min_length": 28.875, "epoch": 0.44080416976917347, "grad_norm": 1.5929481681531068, "kl": 0.044677734375, "learning_rate": 9.97092455551297e-07, "loss": -0.0032086584251374006, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 222, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 73.53125286102295, "completions/min_length": 32.125, "epoch": 0.4427897741375031, "grad_norm": 1.4608313242259412, "kl": 0.071014404296875, "learning_rate": 9.970583832309049e-07, "loss": 0.00710804108530283, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 223, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 96.67708730697632, "completions/min_length": 47.5, "epoch": 0.4447753785058327, "grad_norm": 1.1323641519408885, "kl": 0.2937774658203125, "learning_rate": 9.970241130209535e-07, "loss": 0.006472825538367033, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 224, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 84.79166889190674, "completions/min_length": 37.5, "epoch": 0.4467609828741623, "grad_norm": 0.025415104142644453, "kl": 0.069915771484375, "learning_rate": 9.969896449350867e-07, "loss": 6.981095066294074e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 225, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.75, "completions/mean_length": 86.21875381469727, "completions/min_length": 36.125, "epoch": 0.44874658724249195, "grad_norm": 0.022870315593115254, "kl": 0.0628509521484375, "learning_rate": 9.969549789870268e-07, "loss": 6.29330679657869e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 226, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.5, "completions/mean_length": 97.69791889190674, "completions/min_length": 44.125, "epoch": 0.45073219161082156, "grad_norm": 0.0033120817024783213, "kl": 0.03057861328125, "learning_rate": 9.96920115190575e-07, "loss": 3.062791802221909e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 227, "train_speed(iter/s)": 0.022621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 80.2291693687439, "completions/min_length": 37.125, "epoch": 0.4527177959791511, "grad_norm": 0.003413224744477318, "kl": 0.03436279296875, "learning_rate": 9.968850535596112e-07, "loss": 3.437111445236951e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 228, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.125, "completions/mean_length": 107.64583683013916, "completions/min_length": 43.75, "epoch": 0.45470340034748075, "grad_norm": 0.01834847724387913, "kl": 0.0690460205078125, "learning_rate": 9.968497941080947e-07, "loss": 6.910585943842307e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 229, "train_speed(iter/s)": 0.022582 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 85.92708587646484, "completions/min_length": 38.875, "epoch": 0.45668900471581036, "grad_norm": 0.005574792809061802, "kl": 0.0443878173828125, "learning_rate": 9.968143368500624e-07, "loss": 4.4396467274054885e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 230, "train_speed(iter/s)": 0.022577 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 92.47916984558105, "completions/min_length": 39.125, "epoch": 0.45867460908414, "grad_norm": 1.6943395973826418, "kl": 0.04364013671875, "learning_rate": 9.96778681799631e-07, "loss": -0.0025493118446320295, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 231, "train_speed(iter/s)": 0.022565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.875, "completions/mean_length": 103.64583587646484, "completions/min_length": 39.125, "epoch": 0.4606602134524696, "grad_norm": 0.004216408435550681, "kl": 0.03826904296875, "learning_rate": 9.967428289709954e-07, "loss": 3.823783481493592e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 232, "train_speed(iter/s)": 0.022547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.5, "completions/mean_length": 92.43750381469727, "completions/min_length": 35.625, "epoch": 0.4626458178207992, "grad_norm": 0.006381344909782896, "kl": 0.049468994140625, "learning_rate": 9.967067783784295e-07, "loss": 4.9477264838060364e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 233, "train_speed(iter/s)": 0.022538 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 79.51041984558105, "completions/min_length": 32.5, "epoch": 0.46463142218912884, "grad_norm": 0.007741405754680641, "kl": 0.0480499267578125, "learning_rate": 9.966705300362856e-07, "loss": 4.800094393431209e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 234, "train_speed(iter/s)": 0.022536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 90.66666889190674, "completions/min_length": 42.875, "epoch": 0.4666170265574584, "grad_norm": 0.007435477817668643, "kl": 0.051727294921875, "learning_rate": 9.966340839589952e-07, "loss": 5.173621320864186e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 235, "train_speed(iter/s)": 0.022535 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.875, "completions/mean_length": 98.91666984558105, "completions/min_length": 38.625, "epoch": 0.468602630925788, "grad_norm": 0.0067140910993177885, "kl": 0.043060302734375, "learning_rate": 9.965974401610681e-07, "loss": 4.3081170588266104e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 236, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.625, "completions/mean_length": 83.93750238418579, "completions/min_length": 35.25, "epoch": 0.47058823529411764, "grad_norm": 0.9093190382569424, "kl": 0.0510711669921875, "learning_rate": 9.96560598657093e-07, "loss": 5.110601705382578e-05, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 237, "train_speed(iter/s)": 0.022534 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.625, "completions/mean_length": 85.70833587646484, "completions/min_length": 32.25, "epoch": 0.47257383966244726, "grad_norm": 0.005275237660228004, "kl": 0.046417236328125, "learning_rate": 9.96523559461737e-07, "loss": 4.6435285184998065e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 238, "train_speed(iter/s)": 0.022535 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.375, "completions/mean_length": 90.32291889190674, "completions/min_length": 38.75, "epoch": 0.4745594440307769, "grad_norm": 0.00663160482510452, "kl": 0.0475921630859375, "learning_rate": 9.96486322589747e-07, "loss": 4.759243893204257e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 239, "train_speed(iter/s)": 0.022531 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.375, "completions/mean_length": 77.40625286102295, "completions/min_length": 35.875, "epoch": 0.4765450483991065, "grad_norm": 0.004519614019553688, "kl": 0.043975830078125, "learning_rate": 9.964488880559467e-07, "loss": 4.393017297843471e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 240, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.625, "completions/mean_length": 87.23958587646484, "completions/min_length": 38.625, "epoch": 0.4785306527674361, "grad_norm": 0.006456902365410465, "kl": 0.038970947265625, "learning_rate": 9.964112558752404e-07, "loss": 3.894519613822922e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 241, "train_speed(iter/s)": 0.022517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.375, "completions/mean_length": 86.46875143051147, "completions/min_length": 34.5, "epoch": 0.4805162571357657, "grad_norm": 0.009139479028185519, "kl": 0.0570831298828125, "learning_rate": 9.963734260626102e-07, "loss": 5.712979327654466e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 242, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.375, "completions/mean_length": 94.75000286102295, "completions/min_length": 39.875, "epoch": 0.4825018615040953, "grad_norm": 0.00588933968219357, "kl": 0.040985107421875, "learning_rate": 9.963353986331167e-07, "loss": 4.097309647477232e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 243, "train_speed(iter/s)": 0.022487 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.375, "completions/mean_length": 99.65625190734863, "completions/min_length": 47.5, "epoch": 0.4844874658724249, "grad_norm": 1.7037963995996528, "kl": 0.0664520263671875, "learning_rate": 9.962971736018994e-07, "loss": 0.005754552781581879, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 244, "train_speed(iter/s)": 0.022478 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.75, "completions/mean_length": 87.07292032241821, "completions/min_length": 40.25, "epoch": 0.48647307024075453, "grad_norm": 0.004045860851494451, "kl": 0.0439453125, "learning_rate": 9.962587509841769e-07, "loss": 4.3991487473249435e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 245, "train_speed(iter/s)": 0.022484 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.25, "completions/mean_length": 87.14583587646484, "completions/min_length": 36.125, "epoch": 0.48845867460908415, "grad_norm": 1.0926547240421847, "kl": 0.0585784912109375, "learning_rate": 9.962201307952454e-07, "loss": 0.013563526794314384, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 246, "train_speed(iter/s)": 0.02248 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.375, "completions/mean_length": 83.97917032241821, "completions/min_length": 30.75, "epoch": 0.4904442789774138, "grad_norm": 0.007633299726407986, "kl": 0.0464019775390625, "learning_rate": 9.961813130504812e-07, "loss": 4.640250699594617e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 247, "train_speed(iter/s)": 0.022458 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.875, "completions/mean_length": 84.64583587646484, "completions/min_length": 37.5, "epoch": 0.49242988334574334, "grad_norm": 0.004855671388923836, "kl": 0.031890869140625, "learning_rate": 9.961422977653378e-07, "loss": 3.1894323910819367e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 248, "train_speed(iter/s)": 0.02246 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.625, "completions/mean_length": 83.68750190734863, "completions/min_length": 36.0, "epoch": 0.49441548771407295, "grad_norm": 0.006811685669870012, "kl": 0.0357666015625, "learning_rate": 9.961030849553484e-07, "loss": 3.576920062187128e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 249, "train_speed(iter/s)": 0.022453 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.5, "completions/mean_length": 93.46875190734863, "completions/min_length": 32.625, "epoch": 0.4964010920824026, "grad_norm": 0.0036080834204791417, "kl": 0.033050537109375, "learning_rate": 9.960636746361243e-07, "loss": 3.304368146928027e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 250, "train_speed(iter/s)": 0.022458 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 84.34375333786011, "completions/min_length": 33.0, "epoch": 0.4983866964507322, "grad_norm": 1.1380731620526459, "kl": 0.0817108154296875, "learning_rate": 9.96024066823356e-07, "loss": -0.004473407752811909, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 251, "train_speed(iter/s)": 0.022464 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 78.51041889190674, "completions/min_length": 32.375, "epoch": 0.5003723008190618, "grad_norm": 1.5622229972707942, "kl": 0.064605712890625, "learning_rate": 9.959842615328115e-07, "loss": -0.008338342420756817, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.19401127845048904, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 252, "train_speed(iter/s)": 0.022464 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.875, "completions/mean_length": 93.94791841506958, "completions/min_length": 37.0, "epoch": 0.5023579051873914, "grad_norm": 0.006438376083683289, "kl": 0.040191650390625, "learning_rate": 9.959442587803385e-07, "loss": 4.020285268779844e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 253, "train_speed(iter/s)": 0.022464 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 79.15625095367432, "completions/min_length": 36.875, "epoch": 0.504343509555721, "grad_norm": 0.0051954476787273145, "kl": 0.037506103515625, "learning_rate": 9.959040585818633e-07, "loss": 3.753899363800883e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 254, "train_speed(iter/s)": 0.022464 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 100.38541889190674, "completions/min_length": 39.375, "epoch": 0.5063291139240507, "grad_norm": 1.545961572006339, "kl": 0.0454254150390625, "learning_rate": 9.958636609533898e-07, "loss": 0.00044527961290441453, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 255, "train_speed(iter/s)": 0.022445 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.125, "completions/mean_length": 82.21875286102295, "completions/min_length": 40.5, "epoch": 0.5083147182923803, "grad_norm": 0.03625501034741339, "kl": 0.0703582763671875, "learning_rate": 9.958230659110015e-07, "loss": 7.038464536890388e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 256, "train_speed(iter/s)": 0.022447 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 66.52083444595337, "completions/min_length": 28.875, "epoch": 0.5103003226607099, "grad_norm": 0.0043879151172943115, "kl": 0.0428009033203125, "learning_rate": 9.957822734708601e-07, "loss": 4.278571213944815e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 257, "train_speed(iter/s)": 0.022464 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 80.46875190734863, "completions/min_length": 34.75, "epoch": 0.5122859270290394, "grad_norm": 0.020347219478938732, "kl": 0.064788818359375, "learning_rate": 9.95741283649206e-07, "loss": 6.475487316492945e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 258, "train_speed(iter/s)": 0.022458 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 90.50000238418579, "completions/min_length": 30.5, "epoch": 0.514271531397369, "grad_norm": 0.011466843767705974, "kl": 0.0531005859375, "learning_rate": 9.957000964623582e-07, "loss": 5.304034857545048e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 259, "train_speed(iter/s)": 0.022444 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 81.04166889190674, "completions/min_length": 33.125, "epoch": 0.5162571357656986, "grad_norm": 0.00401580149659702, "kl": 0.0311279296875, "learning_rate": 9.95658711926714e-07, "loss": 3.109029057668522e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 260, "train_speed(iter/s)": 0.022447 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.5, "completions/mean_length": 87.50000095367432, "completions/min_length": 39.25, "epoch": 0.5182427401340283, "grad_norm": 0.7951947229030756, "kl": 0.0403289794921875, "learning_rate": 9.956171300587497e-07, "loss": 0.0014353692531585693, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 261, "train_speed(iter/s)": 0.022442 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.125, "completions/mean_length": 89.78125095367432, "completions/min_length": 28.75, "epoch": 0.5202283445023579, "grad_norm": 0.08799506980790449, "kl": 0.0831756591796875, "learning_rate": 9.955753508750195e-07, "loss": 8.309617987833917e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 262, "train_speed(iter/s)": 0.022438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 78.44791889190674, "completions/min_length": 29.75, "epoch": 0.5222139488706875, "grad_norm": 0.9698159168886337, "kl": 0.047393798828125, "learning_rate": 9.955333743921572e-07, "loss": 0.0029926998540759087, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.895833333954215, "rewards/CineAccuracyORM/std": 0.04865618050098419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 263, "train_speed(iter/s)": 0.022436 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.375, "completions/mean_length": 87.67708492279053, "completions/min_length": 41.125, "epoch": 0.5241995532390171, "grad_norm": 0.005996355455010234, "kl": 0.0398712158203125, "learning_rate": 9.954912006268741e-07, "loss": 3.985785224358551e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 264, "train_speed(iter/s)": 0.022425 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.625, "completions/mean_length": 96.97916793823242, "completions/min_length": 37.375, "epoch": 0.5261851576073467, "grad_norm": 0.00471993857290991, "kl": 0.04254150390625, "learning_rate": 9.954488295959603e-07, "loss": 4.256993270246312e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 265, "train_speed(iter/s)": 0.02241 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.75, "completions/mean_length": 97.50000190734863, "completions/min_length": 45.125, "epoch": 0.5281707619756764, "grad_norm": 0.5242150185143424, "kl": 0.040802001953125, "learning_rate": 9.954062613162853e-07, "loss": 0.0003016740083694458, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 266, "train_speed(iter/s)": 0.0224 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.375, "completions/mean_length": 85.45833683013916, "completions/min_length": 32.25, "epoch": 0.530156366344006, "grad_norm": 1.2094964227030165, "kl": 0.0511932373046875, "learning_rate": 9.95363495804796e-07, "loss": -0.0017611955991014838, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 267, "train_speed(iter/s)": 0.022394 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.625, "completions/mean_length": 88.79166889190674, "completions/min_length": 28.125, "epoch": 0.5321419707123356, "grad_norm": 0.005372675718279741, "kl": 0.0513763427734375, "learning_rate": 9.953205330785181e-07, "loss": 5.144669194123708e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 268, "train_speed(iter/s)": 0.022395 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.5, "completions/mean_length": 100.26041889190674, "completions/min_length": 42.75, "epoch": 0.5341275750806652, "grad_norm": 1.217340586434367, "kl": 0.046112060546875, "learning_rate": 9.952773731545562e-07, "loss": -0.006409394554793835, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.05103103816509247, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 269, "train_speed(iter/s)": 0.022389 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 92.29166984558105, "completions/min_length": 37.875, "epoch": 0.5361131794489948, "grad_norm": 0.0069470145760686, "kl": 0.046142578125, "learning_rate": 9.95234016050093e-07, "loss": 4.617391823558137e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 270, "train_speed(iter/s)": 0.022381 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.625, "completions/mean_length": 85.10416889190674, "completions/min_length": 33.75, "epoch": 0.5380987838173243, "grad_norm": 0.005386574412981814, "kl": 0.039306640625, "learning_rate": 9.951904617823906e-07, "loss": 3.9301499782595783e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 271, "train_speed(iter/s)": 0.022376 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 79.56250238418579, "completions/min_length": 34.75, "epoch": 0.540084388185654, "grad_norm": 1.3403204334209802, "kl": 0.061492919921875, "learning_rate": 9.951467103687878e-07, "loss": 0.00987269263714552, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 272, "train_speed(iter/s)": 0.022379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.0, "completions/mean_length": 91.26041984558105, "completions/min_length": 40.125, "epoch": 0.5420699925539836, "grad_norm": 0.00409245928499806, "kl": 0.03070068359375, "learning_rate": 9.95102761826704e-07, "loss": 3.071514947805554e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 273, "train_speed(iter/s)": 0.022384 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.875, "completions/mean_length": 86.89583539962769, "completions/min_length": 35.125, "epoch": 0.5440555969223132, "grad_norm": 0.0071617563473429634, "kl": 0.045074462890625, "learning_rate": 9.950586161736352e-07, "loss": 4.5137589040677994e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 274, "train_speed(iter/s)": 0.022379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 78.19791984558105, "completions/min_length": 33.875, "epoch": 0.5460412012906428, "grad_norm": 0.007087138473852444, "kl": 0.0514373779296875, "learning_rate": 9.950142734271572e-07, "loss": 5.141062865732238e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 275, "train_speed(iter/s)": 0.022388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.625, "completions/mean_length": 87.02083587646484, "completions/min_length": 33.5, "epoch": 0.5480268056589724, "grad_norm": 0.005770063883750235, "kl": 0.04537200927734375, "learning_rate": 9.949697336049236e-07, "loss": 4.5340821088757366e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 276, "train_speed(iter/s)": 0.022391 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.0, "completions/mean_length": 83.56250143051147, "completions/min_length": 38.875, "epoch": 0.5500124100273021, "grad_norm": 0.005353004081434314, "kl": 0.03570556640625, "learning_rate": 9.949249967246668e-07, "loss": 3.572989226086065e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 277, "train_speed(iter/s)": 0.022384 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 89.14583587646484, "completions/min_length": 34.25, "epoch": 0.5519980143956317, "grad_norm": 0.004317811649377504, "kl": 0.038909912109375, "learning_rate": 9.948800628041975e-07, "loss": 3.8890226278454065e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 278, "train_speed(iter/s)": 0.022378 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.5, "completions/mean_length": 87.21875333786011, "completions/min_length": 44.75, "epoch": 0.5539836187639613, "grad_norm": 1.1137985471826737, "kl": 0.03668212890625, "learning_rate": 9.948349318614047e-07, "loss": -0.008970300666987896, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 279, "train_speed(iter/s)": 0.022367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.875, "completions/mean_length": 82.96875286102295, "completions/min_length": 34.125, "epoch": 0.5559692231322909, "grad_norm": 0.007099269441260585, "kl": 0.0446624755859375, "learning_rate": 9.947896039142563e-07, "loss": 4.4696083932649344e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 280, "train_speed(iter/s)": 0.022354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 76.63541793823242, "completions/min_length": 30.875, "epoch": 0.5579548275006205, "grad_norm": 0.007768146728698171, "kl": 0.0522308349609375, "learning_rate": 9.947440789807979e-07, "loss": 5.225494896876626e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 281, "train_speed(iter/s)": 0.022359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.375, "completions/mean_length": 73.66667032241821, "completions/min_length": 29.0, "epoch": 0.5599404318689502, "grad_norm": 0.008280904620608561, "kl": 0.044586181640625, "learning_rate": 9.946983570791542e-07, "loss": 4.455495945876464e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 282, "train_speed(iter/s)": 0.022368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.625, "completions/mean_length": 89.302086353302, "completions/min_length": 34.625, "epoch": 0.5619260362372798, "grad_norm": 0.022693547898120697, "kl": 0.0592498779296875, "learning_rate": 9.946524382275281e-07, "loss": 5.9227146266493946e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 283, "train_speed(iter/s)": 0.022361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 70.1354193687439, "completions/min_length": 26.75, "epoch": 0.5639116406056094, "grad_norm": 0.03910581010816483, "kl": 0.0723419189453125, "learning_rate": 9.94606322444201e-07, "loss": 7.234037184389308e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 284, "train_speed(iter/s)": 0.022362 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.0, "completions/mean_length": 73.25000190734863, "completions/min_length": 34.75, "epoch": 0.5658972449739389, "grad_norm": 0.0072473280602554, "kl": 0.043609619140625, "learning_rate": 9.94560009747532e-07, "loss": 4.363508924143389e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 285, "train_speed(iter/s)": 0.022355 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 78.62500286102295, "completions/min_length": 36.75, "epoch": 0.5678828493422685, "grad_norm": 0.00513229061344777, "kl": 0.052734375, "learning_rate": 9.9451350015596e-07, "loss": 5.2742478146683425e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 286, "train_speed(iter/s)": 0.02234 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 72.06250190734863, "completions/min_length": 34.75, "epoch": 0.5698684537105981, "grad_norm": 0.007204794620578479, "kl": 0.051910400390625, "learning_rate": 9.944667936880007e-07, "loss": 5.192386743146926e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 287, "train_speed(iter/s)": 0.022347 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.625, "completions/mean_length": 78.52083539962769, "completions/min_length": 30.125, "epoch": 0.5718540580789278, "grad_norm": 0.0080653686717861, "kl": 0.056793212890625, "learning_rate": 9.944198903622492e-07, "loss": 5.678350498783402e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 288, "train_speed(iter/s)": 0.022338 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.125, "completions/mean_length": 82.47917079925537, "completions/min_length": 34.5, "epoch": 0.5738396624472574, "grad_norm": 0.004289253503086958, "kl": 0.0355377197265625, "learning_rate": 9.943727901973792e-07, "loss": 3.553136775735766e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 289, "train_speed(iter/s)": 0.022345 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 263.375, "completions/mean_length": 102.79166889190674, "completions/min_length": 31.25, "epoch": 0.575825266815587, "grad_norm": 0.5694186165643396, "kl": 0.051727294921875, "learning_rate": 9.943254932121415e-07, "loss": 5.182623863220215e-05, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 0.9791666716337204, "rewards/Format/std": 0.04865618050098419, "step": 290, "train_speed(iter/s)": 0.022321 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.625, "completions/mean_length": 82.02083683013916, "completions/min_length": 34.75, "epoch": 0.5778108711839166, "grad_norm": 0.004835814356733436, "kl": 0.05499267578125, "learning_rate": 9.942779994253665e-07, "loss": 5.494131983141415e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 291, "train_speed(iter/s)": 0.022317 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.5, "completions/mean_length": 80.95833444595337, "completions/min_length": 32.75, "epoch": 0.5797964755522462, "grad_norm": 0.00600677045636444, "kl": 0.06103515625, "learning_rate": 9.942303088559624e-07, "loss": 6.105084321461618e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 292, "train_speed(iter/s)": 0.02231 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 82.58333587646484, "completions/min_length": 39.75, "epoch": 0.5817820799205758, "grad_norm": 0.005354442662333966, "kl": 0.038970947265625, "learning_rate": 9.941824215229158e-07, "loss": 3.8933507312322035e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 293, "train_speed(iter/s)": 0.022321 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.875, "completions/mean_length": 77.895836353302, "completions/min_length": 35.75, "epoch": 0.5837676842889055, "grad_norm": 0.0034861829787340166, "kl": 0.051513671875, "learning_rate": 9.941343374452917e-07, "loss": 5.145318209542893e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 294, "train_speed(iter/s)": 0.022321 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 70.67708587646484, "completions/min_length": 31.5, "epoch": 0.5857532886572351, "grad_norm": 0.0075820542365631315, "kl": 0.0500640869140625, "learning_rate": 9.940860566422333e-07, "loss": 5.0081373046850786e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 295, "train_speed(iter/s)": 0.022325 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.25, "completions/mean_length": 84.75000190734863, "completions/min_length": 41.5, "epoch": 0.5877388930255647, "grad_norm": 0.006990368984765174, "kl": 0.05078125, "learning_rate": 9.940375791329626e-07, "loss": 5.07462173118256e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 296, "train_speed(iter/s)": 0.022319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.5, "completions/mean_length": 80.16666889190674, "completions/min_length": 34.375, "epoch": 0.5897244973938943, "grad_norm": 0.007612508013274423, "kl": 0.0609130859375, "learning_rate": 9.93988904936779e-07, "loss": 6.082579056965187e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 297, "train_speed(iter/s)": 0.022308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.375, "completions/mean_length": 70.20833492279053, "completions/min_length": 33.25, "epoch": 0.5917101017622238, "grad_norm": 3.3256630180378113, "kl": 0.053009033203125, "learning_rate": 9.939400340730611e-07, "loss": 5.300156772136688e-05, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.39076167345046997, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 298, "train_speed(iter/s)": 0.022309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 80.70833587646484, "completions/min_length": 33.25, "epoch": 0.5936957061305534, "grad_norm": 0.5084239726350965, "kl": 0.063568115234375, "learning_rate": 9.938909665612654e-07, "loss": 0.00017539411783218384, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 299, "train_speed(iter/s)": 0.022309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.75, "completions/mean_length": 67.59375190734863, "completions/min_length": 30.375, "epoch": 0.5956813104988831, "grad_norm": 0.007156312969301177, "kl": 0.048553466796875, "learning_rate": 9.938417024209264e-07, "loss": 4.8531859647482634e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 300, "train_speed(iter/s)": 0.0223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.375, "completions/mean_length": 70.8854193687439, "completions/min_length": 33.125, "epoch": 0.5976669148672127, "grad_norm": 1.6348234739854737, "kl": 0.059356689453125, "learning_rate": 9.937922416716576e-07, "loss": -0.0008529163897037506, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 301, "train_speed(iter/s)": 0.022308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.375, "completions/mean_length": 66.22916746139526, "completions/min_length": 32.875, "epoch": 0.5996525192355423, "grad_norm": 0.004159999935517174, "kl": 0.071044921875, "learning_rate": 9.937425843331503e-07, "loss": 7.094735337886959e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 302, "train_speed(iter/s)": 0.022309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 67.69791793823242, "completions/min_length": 31.125, "epoch": 0.6016381236038719, "grad_norm": 0.007738811762971821, "kl": 0.0450286865234375, "learning_rate": 9.93692730425174e-07, "loss": 4.5024469727650285e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 303, "train_speed(iter/s)": 0.022315 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.625, "completions/mean_length": 87.93750286102295, "completions/min_length": 35.0, "epoch": 0.6036237279722015, "grad_norm": 0.007015388847999951, "kl": 0.047637939453125, "learning_rate": 9.936426799675768e-07, "loss": 4.764476034324616e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 304, "train_speed(iter/s)": 0.022304 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.875, "completions/mean_length": 80.97916841506958, "completions/min_length": 31.75, "epoch": 0.6056093323405312, "grad_norm": 0.82371722631021, "kl": 0.044219970703125, "learning_rate": 9.935924329802845e-07, "loss": 0.006322646047919989, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 305, "train_speed(iter/s)": 0.022298 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.5, "completions/mean_length": 73.82291889190674, "completions/min_length": 35.625, "epoch": 0.6075949367088608, "grad_norm": 0.005484129378873674, "kl": 0.047882080078125, "learning_rate": 9.93541989483302e-07, "loss": 4.78505899081938e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 306, "train_speed(iter/s)": 0.022302 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 78.56250286102295, "completions/min_length": 34.5, "epoch": 0.6095805410771904, "grad_norm": 0.0074837079932473325, "kl": 0.06488037109375, "learning_rate": 9.934913494967115e-07, "loss": 6.480350566562265e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 307, "train_speed(iter/s)": 0.022308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 73.63541841506958, "completions/min_length": 36.25, "epoch": 0.61156614544552, "grad_norm": 0.005640734271409294, "kl": 0.0375518798828125, "learning_rate": 9.93440513040674e-07, "loss": 3.755502984859049e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 308, "train_speed(iter/s)": 0.022309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 68.36458539962769, "completions/min_length": 32.375, "epoch": 0.6135517498138496, "grad_norm": 0.008726351361175394, "kl": 0.0644073486328125, "learning_rate": 9.933894801354288e-07, "loss": 6.441806181101128e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 309, "train_speed(iter/s)": 0.022304 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 79.23958587646484, "completions/min_length": 33.75, "epoch": 0.6155373541821793, "grad_norm": 0.005139653467304836, "kl": 0.0477142333984375, "learning_rate": 9.933382508012929e-07, "loss": 4.774502303916961e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 310, "train_speed(iter/s)": 0.022311 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 72.31250238418579, "completions/min_length": 34.5, "epoch": 0.6175229585505088, "grad_norm": 0.007771748144733654, "kl": 0.065460205078125, "learning_rate": 9.932868250586617e-07, "loss": 6.555522850248963e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 311, "train_speed(iter/s)": 0.022318 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 83.28125238418579, "completions/min_length": 30.375, "epoch": 0.6195085629188384, "grad_norm": 1.1986690163625677, "kl": 0.0679168701171875, "learning_rate": 9.932352029280094e-07, "loss": -0.00581248989328742, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.05103103630244732, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.12873215973377228, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 312, "train_speed(iter/s)": 0.022311 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 76.63541984558105, "completions/min_length": 32.875, "epoch": 0.621494167287168, "grad_norm": 1.219891757000396, "kl": 0.16864013671875, "learning_rate": 9.931833844298874e-07, "loss": 0.009329535998404026, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 313, "train_speed(iter/s)": 0.022307 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 78.72916984558105, "completions/min_length": 35.75, "epoch": 0.6234797716554976, "grad_norm": 0.0049773928757574975, "kl": 0.0591888427734375, "learning_rate": 9.931313695849258e-07, "loss": 5.92159922234714e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 314, "train_speed(iter/s)": 0.022312 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 73.833336353302, "completions/min_length": 31.0, "epoch": 0.6254653760238272, "grad_norm": 0.021152166395529724, "kl": 0.0635833740234375, "learning_rate": 9.930791584138333e-07, "loss": 6.348952592816204e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 315, "train_speed(iter/s)": 0.022306 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.5, "completions/mean_length": 73.62500143051147, "completions/min_length": 33.75, "epoch": 0.6274509803921569, "grad_norm": 0.004783430890467514, "kl": 0.05029296875, "learning_rate": 9.930267509373956e-07, "loss": 5.024982237955555e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 316, "train_speed(iter/s)": 0.022302 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 72.82291841506958, "completions/min_length": 33.125, "epoch": 0.6294365847604865, "grad_norm": 0.993476054674328, "kl": 0.04193115234375, "learning_rate": 9.929741471764776e-07, "loss": -0.0001899873313959688, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666679084301, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 317, "train_speed(iter/s)": 0.022298 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.625, "completions/mean_length": 81.79166984558105, "completions/min_length": 40.0, "epoch": 0.6314221891288161, "grad_norm": 1.4042089329030751, "kl": 0.035736083984375, "learning_rate": 9.92921347152022e-07, "loss": -0.014559095725417137, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 318, "train_speed(iter/s)": 0.0223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 71.739586353302, "completions/min_length": 30.75, "epoch": 0.6334077934971457, "grad_norm": 1.5579331441674198, "kl": 0.051544189453125, "learning_rate": 9.928683508850495e-07, "loss": -0.015120545402169228, "memory(GiB)": 94.21, "reward": 1.7604166865348816, "reward_std": 0.0900652389973402, "rewards/CineAccuracyORM/mean": 0.7604166753590107, "rewards/CineAccuracyORM/std": 0.2768445573747158, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 319, "train_speed(iter/s)": 0.022298 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 74.13541793823242, "completions/min_length": 30.125, "epoch": 0.6353933978654753, "grad_norm": 0.004824523037702253, "kl": 0.0405731201171875, "learning_rate": 9.928151583966592e-07, "loss": 4.057247861055657e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 320, "train_speed(iter/s)": 0.022294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 76.145836353302, "completions/min_length": 38.125, "epoch": 0.637379002233805, "grad_norm": 0.02241491387971438, "kl": 0.0762939453125, "learning_rate": 9.927617697080278e-07, "loss": 7.626811566296965e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 321, "train_speed(iter/s)": 0.022292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 68.46875190734863, "completions/min_length": 31.875, "epoch": 0.6393646066021346, "grad_norm": 0.0034482191002354365, "kl": 0.04315185546875, "learning_rate": 9.92708184840411e-07, "loss": 4.309406358515844e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 322, "train_speed(iter/s)": 0.022292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.125, "completions/mean_length": 79.66666984558105, "completions/min_length": 33.75, "epoch": 0.6413502109704642, "grad_norm": 0.014205677935915088, "kl": 0.04925537109375, "learning_rate": 9.926544038151414e-07, "loss": 4.916825128020719e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 323, "train_speed(iter/s)": 0.02229 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 68.30208587646484, "completions/min_length": 26.75, "epoch": 0.6433358153387937, "grad_norm": 5.454400755145376, "kl": 1.1224365234375, "learning_rate": 9.926004266536313e-07, "loss": -0.0042072865180671215, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 324, "train_speed(iter/s)": 0.022285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.875, "completions/mean_length": 77.56250190734863, "completions/min_length": 34.875, "epoch": 0.6453214197071233, "grad_norm": 0.03913550833558443, "kl": 0.0589141845703125, "learning_rate": 9.925462533773693e-07, "loss": 5.893352135899477e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 325, "train_speed(iter/s)": 0.022278 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 79.98958539962769, "completions/min_length": 36.5, "epoch": 0.6473070240754529, "grad_norm": 0.1877933333268866, "kl": 0.1809844970703125, "learning_rate": 9.924918840079234e-07, "loss": 0.00018087081843987107, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 326, "train_speed(iter/s)": 0.022279 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.875, "completions/mean_length": 72.81250238418579, "completions/min_length": 35.75, "epoch": 0.6492926284437825, "grad_norm": 0.01665023136607427, "kl": 0.060211181640625, "learning_rate": 9.92437318566939e-07, "loss": 6.011496589053422e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 327, "train_speed(iter/s)": 0.02228 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.75, "completions/mean_length": 87.16666889190674, "completions/min_length": 45.0, "epoch": 0.6512782328121122, "grad_norm": 0.003712898810028288, "kl": 0.038299560546875, "learning_rate": 9.9238255707614e-07, "loss": 3.829874185612425e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 328, "train_speed(iter/s)": 0.022279 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 74.73958587646484, "completions/min_length": 33.375, "epoch": 0.6532638371804418, "grad_norm": 0.004942025964931426, "kl": 0.050689697265625, "learning_rate": 9.923275995573278e-07, "loss": 5.067026359029114e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 329, "train_speed(iter/s)": 0.022272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.75, "completions/mean_length": 66.21875238418579, "completions/min_length": 26.75, "epoch": 0.6552494415487714, "grad_norm": 0.7874083390702336, "kl": 0.055450439453125, "learning_rate": 9.922724460323825e-07, "loss": -0.014580942690372467, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 330, "train_speed(iter/s)": 0.022272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 76.00000238418579, "completions/min_length": 34.5, "epoch": 0.657235045917101, "grad_norm": 0.004149881115869045, "kl": 0.051239013671875, "learning_rate": 9.922170965232618e-07, "loss": 5.1151342631783336e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 331, "train_speed(iter/s)": 0.022261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.5, "completions/mean_length": 62.47916841506958, "completions/min_length": 29.625, "epoch": 0.6592206502854306, "grad_norm": 0.018607020206996103, "kl": 0.076171875, "learning_rate": 9.921615510520014e-07, "loss": 7.611622277181596e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 332, "train_speed(iter/s)": 0.022275 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.125, "completions/mean_length": 87.60416841506958, "completions/min_length": 37.375, "epoch": 0.6612062546537603, "grad_norm": 1.4880830188793217, "kl": 0.0885009765625, "learning_rate": 9.921058096407152e-07, "loss": 0.0032539120875298977, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 333, "train_speed(iter/s)": 0.022277 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 71.89583539962769, "completions/min_length": 34.125, "epoch": 0.6631918590220899, "grad_norm": 0.027870046553761317, "kl": 0.08740234375, "learning_rate": 9.920498723115949e-07, "loss": 8.746829553274438e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 334, "train_speed(iter/s)": 0.022268 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 71.81250190734863, "completions/min_length": 33.0, "epoch": 0.6651774633904195, "grad_norm": 1.5966502848162438, "kl": 0.072967529296875, "learning_rate": 9.919937390869107e-07, "loss": 0.00811631977558136, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 335, "train_speed(iter/s)": 0.022276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 75.73958539962769, "completions/min_length": 34.5, "epoch": 0.6671630677587491, "grad_norm": 1.840870931410954, "kl": 0.0665283203125, "learning_rate": 9.919374099890101e-07, "loss": 0.0009361244738101959, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 336, "train_speed(iter/s)": 0.022278 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.75, "completions/mean_length": 77.89583492279053, "completions/min_length": 37.5, "epoch": 0.6691486721270786, "grad_norm": 0.9857391057574876, "kl": 0.087371826171875, "learning_rate": 9.918808850403192e-07, "loss": -0.000843668996822089, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 337, "train_speed(iter/s)": 0.022267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 82.63541889190674, "completions/min_length": 35.25, "epoch": 0.6711342764954082, "grad_norm": 0.03017565115051977, "kl": 0.075103759765625, "learning_rate": 9.918241642633414e-07, "loss": 7.500908395741135e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 338, "train_speed(iter/s)": 0.02227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.375, "completions/mean_length": 68.82291889190674, "completions/min_length": 31.625, "epoch": 0.6731198808637379, "grad_norm": 0.006981340970946005, "kl": 0.0548095703125, "learning_rate": 9.917672476806588e-07, "loss": 5.483850691234693e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 339, "train_speed(iter/s)": 0.022271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 84.85416841506958, "completions/min_length": 38.75, "epoch": 0.6751054852320675, "grad_norm": 0.010032278040829628, "kl": 0.054901123046875, "learning_rate": 9.91710135314931e-07, "loss": 5.494210563483648e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 340, "train_speed(iter/s)": 0.022263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 68.82291841506958, "completions/min_length": 32.125, "epoch": 0.6770910896003971, "grad_norm": 0.009341870336307356, "kl": 0.057037353515625, "learning_rate": 9.916528271888956e-07, "loss": 5.7096789532806724e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 341, "train_speed(iter/s)": 0.022271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 66.62500143051147, "completions/min_length": 33.125, "epoch": 0.6790766939687267, "grad_norm": 2.2099864740654147, "kl": 0.1019744873046875, "learning_rate": 9.915953233253683e-07, "loss": 0.0027551865205168724, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 342, "train_speed(iter/s)": 0.022277 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 80.71875286102295, "completions/min_length": 37.0, "epoch": 0.6810622983370563, "grad_norm": 0.005790785987682834, "kl": 0.05206298828125, "learning_rate": 9.915376237472425e-07, "loss": 5.207936555962078e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 343, "train_speed(iter/s)": 0.022281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 69.65625190734863, "completions/min_length": 29.375, "epoch": 0.683047902705386, "grad_norm": 0.5256727787266359, "kl": 0.076141357421875, "learning_rate": 9.914797284774895e-07, "loss": -0.007159882690757513, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 344, "train_speed(iter/s)": 0.022283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.875, "completions/mean_length": 75.06250190734863, "completions/min_length": 32.75, "epoch": 0.6850335070737156, "grad_norm": 0.00690008198512845, "kl": 0.05670166015625, "learning_rate": 9.914216375391593e-07, "loss": 5.6735367252258584e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 345, "train_speed(iter/s)": 0.022271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 75.21875286102295, "completions/min_length": 37.125, "epoch": 0.6870191114420452, "grad_norm": 0.007116456286213994, "kl": 0.06036376953125, "learning_rate": 9.913633509553784e-07, "loss": 6.0376849432941526e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 346, "train_speed(iter/s)": 0.022275 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 78.59375143051147, "completions/min_length": 41.875, "epoch": 0.6890047158103748, "grad_norm": 0.007416111518728378, "kl": 0.058441162109375, "learning_rate": 9.91304868749352e-07, "loss": 5.842831888003275e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 347, "train_speed(iter/s)": 0.022283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.5, "completions/mean_length": 80.333336353302, "completions/min_length": 31.125, "epoch": 0.6909903201787044, "grad_norm": 1.0017270820050066, "kl": 0.0704345703125, "learning_rate": 9.912461909443636e-07, "loss": 0.01074531301856041, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.31764985248446465, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 348, "train_speed(iter/s)": 0.022281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.625, "completions/mean_length": 67.13541841506958, "completions/min_length": 34.25, "epoch": 0.692975924547034, "grad_norm": 1.05958784983628, "kl": 0.0814208984375, "learning_rate": 9.911873175637739e-07, "loss": 0.007089934777468443, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 349, "train_speed(iter/s)": 0.022289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.375, "completions/mean_length": 65.64583444595337, "completions/min_length": 35.0, "epoch": 0.6949615289153637, "grad_norm": 0.009623425537380588, "kl": 0.067596435546875, "learning_rate": 9.911282486310212e-07, "loss": 6.757803203072399e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 350, "train_speed(iter/s)": 0.022295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.25, "completions/mean_length": 83.1041693687439, "completions/min_length": 41.625, "epoch": 0.6969471332836932, "grad_norm": 0.008448838470143364, "kl": 0.076141357421875, "learning_rate": 9.910689841696229e-07, "loss": 7.613049092469737e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 351, "train_speed(iter/s)": 0.022291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.875, "completions/mean_length": 75.73958539962769, "completions/min_length": 35.5, "epoch": 0.6989327376520228, "grad_norm": 0.008973097999604834, "kl": 0.073699951171875, "learning_rate": 9.910095242031727e-07, "loss": 7.361932512139902e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 352, "train_speed(iter/s)": 0.022287 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.75, "completions/mean_length": 55.42708444595337, "completions/min_length": 29.5, "epoch": 0.7009183420203524, "grad_norm": 0.011896671231470929, "kl": 0.09442138671875, "learning_rate": 9.909498687553433e-07, "loss": 9.437694097869098e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 353, "train_speed(iter/s)": 0.022301 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.625, "completions/mean_length": 77.80208587646484, "completions/min_length": 34.625, "epoch": 0.702903946388682, "grad_norm": 0.009088107575926282, "kl": 0.09478759765625, "learning_rate": 9.908900178498847e-07, "loss": 9.483918984187767e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 354, "train_speed(iter/s)": 0.022292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.625, "completions/mean_length": 63.406251430511475, "completions/min_length": 32.0, "epoch": 0.7048895507570117, "grad_norm": 0.010886413379501973, "kl": 0.097900390625, "learning_rate": 9.908299715106248e-07, "loss": 9.774637874215841e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 355, "train_speed(iter/s)": 0.022296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 77.03125190734863, "completions/min_length": 38.5, "epoch": 0.7068751551253413, "grad_norm": 0.009118278406867757, "kl": 0.094329833984375, "learning_rate": 9.907697297614694e-07, "loss": 9.433356899535283e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 356, "train_speed(iter/s)": 0.022298 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.875, "completions/mean_length": 63.65625238418579, "completions/min_length": 31.625, "epoch": 0.7088607594936709, "grad_norm": 0.010805662813000999, "kl": 0.1063232421875, "learning_rate": 9.90709292626402e-07, "loss": 0.00010625859431456774, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 357, "train_speed(iter/s)": 0.022306 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 71.85416841506958, "completions/min_length": 32.375, "epoch": 0.7108463638620005, "grad_norm": 0.8144932183814274, "kl": 0.1082763671875, "learning_rate": 9.906486601294836e-07, "loss": -0.010265604592859745, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 358, "train_speed(iter/s)": 0.022307 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.75, "completions/mean_length": 61.42708492279053, "completions/min_length": 29.875, "epoch": 0.7128319682303301, "grad_norm": 1.239045959422636, "kl": 0.11566162109375, "learning_rate": 9.90587832294854e-07, "loss": -0.006729485467076302, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 359, "train_speed(iter/s)": 0.022308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.125, "completions/mean_length": 77.1354193687439, "completions/min_length": 31.0, "epoch": 0.7148175725986597, "grad_norm": 1.9365133335728133, "kl": 0.10089111328125, "learning_rate": 9.905268091467294e-07, "loss": 0.00010097896301886067, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 360, "train_speed(iter/s)": 0.022309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.5, "completions/mean_length": 70.05208492279053, "completions/min_length": 36.375, "epoch": 0.7168031769669894, "grad_norm": 0.00962282633834019, "kl": 0.10614013671875, "learning_rate": 9.90465590709405e-07, "loss": 0.00010618605301715434, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 361, "train_speed(iter/s)": 0.022316 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.75, "completions/mean_length": 61.03125190734863, "completions/min_length": 29.75, "epoch": 0.718788781335319, "grad_norm": 0.014007692663057595, "kl": 0.111572265625, "learning_rate": 9.904041770072524e-07, "loss": 0.00011155110405525193, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 362, "train_speed(iter/s)": 0.022322 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.5, "completions/mean_length": 67.13541841506958, "completions/min_length": 32.875, "epoch": 0.7207743857036486, "grad_norm": 1.2162865170250325, "kl": 0.09619140625, "learning_rate": 9.903425680647223e-07, "loss": 0.0010505297686904669, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 363, "train_speed(iter/s)": 0.022324 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.75, "completions/mean_length": 60.98958492279053, "completions/min_length": 31.125, "epoch": 0.7227599900719781, "grad_norm": 0.011429530437017058, "kl": 0.10235595703125, "learning_rate": 9.902807639063425e-07, "loss": 0.0001023800577968359, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 364, "train_speed(iter/s)": 0.022339 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 61.833335876464844, "completions/min_length": 30.25, "epoch": 0.7247455944403077, "grad_norm": 0.016087478102308123, "kl": 0.10528564453125, "learning_rate": 9.902187645567183e-07, "loss": 0.00010526964615564793, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 365, "train_speed(iter/s)": 0.022353 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 75.30208539962769, "completions/min_length": 34.875, "epoch": 0.7267311988086373, "grad_norm": 0.009143578785985302, "kl": 0.0863037109375, "learning_rate": 9.901565700405331e-07, "loss": 8.640994201414287e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 366, "train_speed(iter/s)": 0.02236 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.375, "completions/mean_length": 71.03125143051147, "completions/min_length": 35.5, "epoch": 0.728716803176967, "grad_norm": 0.009792348440264387, "kl": 0.103759765625, "learning_rate": 9.90094180382548e-07, "loss": 0.00010376061254646629, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 367, "train_speed(iter/s)": 0.022364 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.375, "completions/mean_length": 75.04166889190674, "completions/min_length": 34.0, "epoch": 0.7307024075452966, "grad_norm": 0.008107420608290445, "kl": 0.068450927734375, "learning_rate": 9.900315956076015e-07, "loss": 6.850845966255292e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 368, "train_speed(iter/s)": 0.022366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 70.91666841506958, "completions/min_length": 31.875, "epoch": 0.7326880119136262, "grad_norm": 0.009108907130136363, "kl": 0.075958251953125, "learning_rate": 9.8996881574061e-07, "loss": 7.596638170070946e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 369, "train_speed(iter/s)": 0.022376 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.875, "completions/mean_length": 68.79166984558105, "completions/min_length": 32.75, "epoch": 0.7346736162819558, "grad_norm": 1.6168107617006753, "kl": 0.139007568359375, "learning_rate": 9.89905840806567e-07, "loss": 0.0032623931765556335, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6354166669771075, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 370, "train_speed(iter/s)": 0.022386 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 76.3854193687439, "completions/min_length": 29.75, "epoch": 0.7366592206502854, "grad_norm": 0.7460126406043012, "kl": 0.08642578125, "learning_rate": 9.898426708305453e-07, "loss": -0.0033700112253427505, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666679084301, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 371, "train_speed(iter/s)": 0.022386 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.625, "completions/mean_length": 74.28125095367432, "completions/min_length": 34.0, "epoch": 0.7386448250186151, "grad_norm": 0.9955465777870846, "kl": 0.1749267578125, "learning_rate": 9.897793058376932e-07, "loss": 0.0034031940158456564, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 372, "train_speed(iter/s)": 0.022388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.625, "completions/mean_length": 76.22917032241821, "completions/min_length": 34.75, "epoch": 0.7406304293869447, "grad_norm": 0.008875594900954038, "kl": 0.06402587890625, "learning_rate": 9.89715745853238e-07, "loss": 6.403854786185548e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 373, "train_speed(iter/s)": 0.022381 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 72.46875143051147, "completions/min_length": 33.875, "epoch": 0.7426160337552743, "grad_norm": 0.009116732402555323, "kl": 0.072784423828125, "learning_rate": 9.896519909024841e-07, "loss": 7.276926044141874e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 374, "train_speed(iter/s)": 0.022378 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.125, "completions/mean_length": 65.22916841506958, "completions/min_length": 33.125, "epoch": 0.7446016381236039, "grad_norm": 0.009145128063679963, "kl": 0.07708740234375, "learning_rate": 9.895880410108142e-07, "loss": 7.714629464317113e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 375, "train_speed(iter/s)": 0.022391 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 75.62500190734863, "completions/min_length": 36.875, "epoch": 0.7465872424919335, "grad_norm": 0.007210039722318257, "kl": 0.06585693359375, "learning_rate": 9.895238962036878e-07, "loss": 6.576798477908596e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 376, "train_speed(iter/s)": 0.022394 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 73.77083587646484, "completions/min_length": 35.625, "epoch": 0.748572846860263, "grad_norm": 0.010693999667093904, "kl": 0.079681396484375, "learning_rate": 9.894595565066422e-07, "loss": 7.966612611198798e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 377, "train_speed(iter/s)": 0.022392 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.0, "completions/mean_length": 76.03125286102295, "completions/min_length": 28.875, "epoch": 0.7505584512285927, "grad_norm": 0.006922013911813544, "kl": 0.0616455078125, "learning_rate": 9.893950219452926e-07, "loss": 6.165902595967054e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 378, "train_speed(iter/s)": 0.022397 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 70.03125190734863, "completions/min_length": 34.25, "epoch": 0.7525440555969223, "grad_norm": 0.005846201649571549, "kl": 0.064605712890625, "learning_rate": 9.893302925453314e-07, "loss": 6.460178701672703e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 379, "train_speed(iter/s)": 0.022401 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 75.08333492279053, "completions/min_length": 36.875, "epoch": 0.7545296599652519, "grad_norm": 0.008013367074850918, "kl": 0.063568115234375, "learning_rate": 9.892653683325292e-07, "loss": 6.364649016177282e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 380, "train_speed(iter/s)": 0.022402 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 73.63541889190674, "completions/min_length": 32.625, "epoch": 0.7565152643335815, "grad_norm": 0.005479395627387796, "kl": 0.06341552734375, "learning_rate": 9.892002493327331e-07, "loss": 6.33429444860667e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 381, "train_speed(iter/s)": 0.02241 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.75, "completions/mean_length": 73.01041841506958, "completions/min_length": 32.625, "epoch": 0.7585008687019111, "grad_norm": 0.034901433273961564, "kl": 0.066864013671875, "learning_rate": 9.891349355718688e-07, "loss": 6.686744745820761e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 382, "train_speed(iter/s)": 0.022411 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 74.30208539962769, "completions/min_length": 35.375, "epoch": 0.7604864730702408, "grad_norm": 0.005921809752816739, "kl": 0.048095703125, "learning_rate": 9.89069427075939e-07, "loss": 4.815867214347236e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 383, "train_speed(iter/s)": 0.022409 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.75, "completions/mean_length": 73.18750286102295, "completions/min_length": 32.375, "epoch": 0.7624720774385704, "grad_norm": 0.04355059164827771, "kl": 0.11383056640625, "learning_rate": 9.89003723871024e-07, "loss": 0.0001137763902079314, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 384, "train_speed(iter/s)": 0.022408 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 71.1354193687439, "completions/min_length": 35.375, "epoch": 0.7644576818069, "grad_norm": 0.015617950505835214, "kl": 0.065216064453125, "learning_rate": 9.889378259832816e-07, "loss": 6.520246097352356e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 385, "train_speed(iter/s)": 0.022413 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.375, "completions/mean_length": 83.02083492279053, "completions/min_length": 36.25, "epoch": 0.7664432861752296, "grad_norm": 0.0064691606105345845, "kl": 0.07122802734375, "learning_rate": 9.888717334389471e-07, "loss": 7.12551482138224e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 386, "train_speed(iter/s)": 0.022403 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 74.10416984558105, "completions/min_length": 33.0, "epoch": 0.7684288905435592, "grad_norm": 0.005423592725224395, "kl": 0.07147216796875, "learning_rate": 9.888054462643336e-07, "loss": 7.151537283789366e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 387, "train_speed(iter/s)": 0.022401 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 73.09375286102295, "completions/min_length": 34.5, "epoch": 0.7704144949118888, "grad_norm": 0.004434764158909642, "kl": 0.071502685546875, "learning_rate": 9.887389644858313e-07, "loss": 7.159661618061364e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 388, "train_speed(iter/s)": 0.022405 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 83.32292032241821, "completions/min_length": 35.875, "epoch": 0.7724000992802185, "grad_norm": 0.0043503700290423025, "kl": 0.050018310546875, "learning_rate": 9.88672288129908e-07, "loss": 5.001589306630194e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 389, "train_speed(iter/s)": 0.022396 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 84.36458587646484, "completions/min_length": 41.375, "epoch": 0.774385703648548, "grad_norm": 0.004625194476448961, "kl": 0.050048828125, "learning_rate": 9.88605417223109e-07, "loss": 5.003442493034527e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 390, "train_speed(iter/s)": 0.022395 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 73.95833539962769, "completions/min_length": 34.0, "epoch": 0.7763713080168776, "grad_norm": 0.006495049310969569, "kl": 0.05206298828125, "learning_rate": 9.88538351792057e-07, "loss": 5.2074785344302654e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 391, "train_speed(iter/s)": 0.022387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 69.59375190734863, "completions/min_length": 31.625, "epoch": 0.7783569123852072, "grad_norm": 0.005235384921360963, "kl": 0.05841064453125, "learning_rate": 9.884710918634523e-07, "loss": 5.844328552484512e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 392, "train_speed(iter/s)": 0.022387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.125, "completions/mean_length": 83.56250286102295, "completions/min_length": 36.375, "epoch": 0.7803425167535368, "grad_norm": 1.0439111989860015, "kl": 0.050567626953125, "learning_rate": 9.884036374640723e-07, "loss": 0.00759144825860858, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.833333333954215, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 393, "train_speed(iter/s)": 0.022389 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 92.51041984558105, "completions/min_length": 34.125, "epoch": 0.7823281211218664, "grad_norm": 0.005357530698709353, "kl": 0.05413818359375, "learning_rate": 9.883359886207723e-07, "loss": 5.421918467618525e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 394, "train_speed(iter/s)": 0.022383 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.375, "completions/mean_length": 68.98958587646484, "completions/min_length": 34.375, "epoch": 0.7843137254901961, "grad_norm": 1.9208803677712984, "kl": 0.067413330078125, "learning_rate": 9.882681453604844e-07, "loss": 0.010571276769042015, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 395, "train_speed(iter/s)": 0.02239 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.5, "completions/mean_length": 69.54166841506958, "completions/min_length": 31.875, "epoch": 0.7862993298585257, "grad_norm": 0.011602241931888259, "kl": 0.063201904296875, "learning_rate": 9.88200107710219e-07, "loss": 6.322594708763063e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 396, "train_speed(iter/s)": 0.022395 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.25, "completions/mean_length": 70.70833539962769, "completions/min_length": 36.625, "epoch": 0.7882849342268553, "grad_norm": 0.8977360123288795, "kl": 0.055145263671875, "learning_rate": 9.881318756970626e-07, "loss": 0.005529012531042099, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 397, "train_speed(iter/s)": 0.022405 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.5, "completions/mean_length": 86.03125238418579, "completions/min_length": 37.0, "epoch": 0.7902705385951849, "grad_norm": 0.004238869827726038, "kl": 0.05126953125, "learning_rate": 9.880634493481805e-07, "loss": 5.128474367666058e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 398, "train_speed(iter/s)": 0.022402 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.125, "completions/mean_length": 76.614586353302, "completions/min_length": 34.375, "epoch": 0.7922561429635145, "grad_norm": 0.005037145006621605, "kl": 0.061126708984375, "learning_rate": 9.879948286908144e-07, "loss": 6.111576658440754e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 399, "train_speed(iter/s)": 0.022402 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 76.79166889190674, "completions/min_length": 35.125, "epoch": 0.7942417473318442, "grad_norm": 0.004274011937062756, "kl": 0.0633544921875, "learning_rate": 9.879260137522835e-07, "loss": 6.329896859824657e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 400, "train_speed(iter/s)": 0.022403 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 70.63541889190674, "completions/min_length": 33.875, "epoch": 0.7962273517001738, "grad_norm": 0.2554109089907897, "kl": 0.20697021484375, "learning_rate": 9.87857004559985e-07, "loss": 0.00020729737298097461, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 401, "train_speed(iter/s)": 0.022408 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 75.8229193687439, "completions/min_length": 31.625, "epoch": 0.7982129560685034, "grad_norm": 0.011398283755190015, "kl": 0.055694580078125, "learning_rate": 9.877878011413922e-07, "loss": 5.56960585527122e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 402, "train_speed(iter/s)": 0.022403 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.125, "completions/mean_length": 82.51041984558105, "completions/min_length": 34.625, "epoch": 0.8001985604368329, "grad_norm": 1.8505796388499405, "kl": 0.0501708984375, "learning_rate": 9.877184035240572e-07, "loss": 5.0192080379929394e-05, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.04865618050098419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 403, "train_speed(iter/s)": 0.022396 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 80.73958444595337, "completions/min_length": 41.125, "epoch": 0.8021841648051625, "grad_norm": 0.012820796671756323, "kl": 0.057403564453125, "learning_rate": 9.87648811735608e-07, "loss": 5.742545909015462e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 404, "train_speed(iter/s)": 0.022397 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.125, "completions/mean_length": 79.27083587646484, "completions/min_length": 38.625, "epoch": 0.8041697691734921, "grad_norm": 0.014906282746045442, "kl": 0.058197021484375, "learning_rate": 9.875790258037514e-07, "loss": 5.812738891108893e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 405, "train_speed(iter/s)": 0.022401 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 77.302086353302, "completions/min_length": 36.625, "epoch": 0.8061553735418218, "grad_norm": 0.011130849927422536, "kl": 0.059295654296875, "learning_rate": 9.875090457562697e-07, "loss": 5.927743040956557e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 406, "train_speed(iter/s)": 0.022406 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 93.34375286102295, "completions/min_length": 40.875, "epoch": 0.8081409779101514, "grad_norm": 0.11481029559933198, "kl": 0.13702392578125, "learning_rate": 9.874388716210242e-07, "loss": 0.0001369929377688095, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 407, "train_speed(iter/s)": 0.022406 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 88.364586353302, "completions/min_length": 39.0, "epoch": 0.810126582278481, "grad_norm": 0.003428084619563002, "kl": 0.047027587890625, "learning_rate": 9.873685034259524e-07, "loss": 4.6960762119852006e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 408, "train_speed(iter/s)": 0.022411 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.75, "completions/mean_length": 86.16666793823242, "completions/min_length": 34.375, "epoch": 0.8121121866468106, "grad_norm": 0.00384555028749029, "kl": 0.06048583984375, "learning_rate": 9.872979411990694e-07, "loss": 6.048551222193055e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 409, "train_speed(iter/s)": 0.02241 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.875, "completions/mean_length": 83.8229193687439, "completions/min_length": 35.875, "epoch": 0.8140977910151402, "grad_norm": 0.003670618302961186, "kl": 0.067596435546875, "learning_rate": 9.872271849684674e-07, "loss": 6.753490742994472e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 410, "train_speed(iter/s)": 0.022412 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.875, "completions/mean_length": 85.27083492279053, "completions/min_length": 36.125, "epoch": 0.8160833953834699, "grad_norm": 0.004061576867033558, "kl": 0.056427001953125, "learning_rate": 9.871562347623164e-07, "loss": 5.638205766445026e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 411, "train_speed(iter/s)": 0.022405 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 77.59375381469727, "completions/min_length": 30.0, "epoch": 0.8180689997517995, "grad_norm": 0.00660518149892489, "kl": 0.06072998046875, "learning_rate": 9.870850906088627e-07, "loss": 6.069945084163919e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 412, "train_speed(iter/s)": 0.022407 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 83.90625190734863, "completions/min_length": 40.0, "epoch": 0.8200546041201291, "grad_norm": 0.005380659804986027, "kl": 0.063812255859375, "learning_rate": 9.870137525364308e-07, "loss": 6.376288365572691e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 413, "train_speed(iter/s)": 0.022405 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 76.20833683013916, "completions/min_length": 33.0, "epoch": 0.8220402084884587, "grad_norm": 0.004214593524566473, "kl": 0.047576904296875, "learning_rate": 9.869422205734215e-07, "loss": 4.756057387567125e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 414, "train_speed(iter/s)": 0.022402 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.625, "completions/mean_length": 86.87500286102295, "completions/min_length": 34.125, "epoch": 0.8240258128567883, "grad_norm": 0.004190381797998117, "kl": 0.07012939453125, "learning_rate": 9.868704947483133e-07, "loss": 7.006079249549657e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 415, "train_speed(iter/s)": 0.022405 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 79.07291841506958, "completions/min_length": 33.375, "epoch": 0.826011417225118, "grad_norm": 0.003436649069030626, "kl": 0.07763671875, "learning_rate": 9.867985750896619e-07, "loss": 7.758998981444165e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 416, "train_speed(iter/s)": 0.022403 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.25, "completions/mean_length": 83.48958492279053, "completions/min_length": 37.125, "epoch": 0.8279970215934475, "grad_norm": 1.2588152892744315, "kl": 0.240631103515625, "learning_rate": 9.867264616261e-07, "loss": 0.003334498032927513, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 417, "train_speed(iter/s)": 0.022391 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.375, "completions/mean_length": 81.20833492279053, "completions/min_length": 32.125, "epoch": 0.8299826259617771, "grad_norm": 0.106336738579708, "kl": 0.198455810546875, "learning_rate": 9.866541543863374e-07, "loss": 0.00019865986541844904, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 418, "train_speed(iter/s)": 0.022386 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.875, "completions/mean_length": 88.31250238418579, "completions/min_length": 36.375, "epoch": 0.8319682303301067, "grad_norm": 0.0046822794961672055, "kl": 0.05023193359375, "learning_rate": 9.865816533991614e-07, "loss": 5.0257076509296894e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 419, "train_speed(iter/s)": 0.022374 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 76.0416693687439, "completions/min_length": 34.25, "epoch": 0.8339538346984363, "grad_norm": 0.00622958021240744, "kl": 0.066162109375, "learning_rate": 9.86508958693436e-07, "loss": 6.617772305617109e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 420, "train_speed(iter/s)": 0.022378 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 86.37500190734863, "completions/min_length": 43.875, "epoch": 0.8359394390667659, "grad_norm": 0.005137313718892348, "kl": 0.052154541015625, "learning_rate": 9.864360702981024e-07, "loss": 5.2151775889797136e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 421, "train_speed(iter/s)": 0.022382 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.125, "completions/mean_length": 85.05208492279053, "completions/min_length": 39.0, "epoch": 0.8379250434350956, "grad_norm": 0.015260451196580099, "kl": 0.054931640625, "learning_rate": 9.863629882421792e-07, "loss": 5.488552778842859e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 422, "train_speed(iter/s)": 0.022376 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.5, "completions/mean_length": 95.82291984558105, "completions/min_length": 39.25, "epoch": 0.8399106478034252, "grad_norm": 0.0039593592890841064, "kl": 0.048614501953125, "learning_rate": 9.86289712554762e-07, "loss": 4.861816341872327e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 423, "train_speed(iter/s)": 0.022376 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 79.23958587646484, "completions/min_length": 34.0, "epoch": 0.8418962521717548, "grad_norm": 0.9652327185822706, "kl": 0.058807373046875, "learning_rate": 9.862162432650234e-07, "loss": 0.005081596784293652, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 424, "train_speed(iter/s)": 0.022369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.0, "completions/mean_length": 82.72916889190674, "completions/min_length": 32.375, "epoch": 0.8438818565400844, "grad_norm": 1.0403684241123083, "kl": 0.057403564453125, "learning_rate": 9.861425804022128e-07, "loss": -0.004854725208133459, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 425, "train_speed(iter/s)": 0.022362 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.25, "completions/mean_length": 83.16666889190674, "completions/min_length": 37.75, "epoch": 0.845867460908414, "grad_norm": 0.005497342568892283, "kl": 0.051544189453125, "learning_rate": 9.860687239956573e-07, "loss": 5.15462743351236e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 426, "train_speed(iter/s)": 0.022357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.75, "completions/mean_length": 91.81250286102295, "completions/min_length": 35.375, "epoch": 0.8478530652767436, "grad_norm": 0.005472516329216914, "kl": 0.04522705078125, "learning_rate": 9.859946740747607e-07, "loss": 4.515825639828108e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 427, "train_speed(iter/s)": 0.022352 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.75, "completions/mean_length": 91.36458683013916, "completions/min_length": 38.125, "epoch": 0.8498386696450733, "grad_norm": 0.004497448013878643, "kl": 0.0579833984375, "learning_rate": 9.859204306690037e-07, "loss": 5.7988228945760056e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 428, "train_speed(iter/s)": 0.02234 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.375, "completions/mean_length": 91.21875190734863, "completions/min_length": 41.5, "epoch": 0.8518242740134029, "grad_norm": 0.004461792169384546, "kl": 0.0748291015625, "learning_rate": 9.858459938079439e-07, "loss": 7.484655361622572e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 429, "train_speed(iter/s)": 0.022337 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.5, "completions/mean_length": 82.20833587646484, "completions/min_length": 34.0, "epoch": 0.8538098783817324, "grad_norm": 0.006069683115572057, "kl": 0.065399169921875, "learning_rate": 9.85771363521217e-07, "loss": 6.542236224049702e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 430, "train_speed(iter/s)": 0.022337 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 84.65625286102295, "completions/min_length": 39.125, "epoch": 0.855795482750062, "grad_norm": 0.007138089594645187, "kl": 0.063385009765625, "learning_rate": 9.856965398385342e-07, "loss": 6.338374805636704e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 431, "train_speed(iter/s)": 0.022337 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.875, "completions/mean_length": 85.1354193687439, "completions/min_length": 41.125, "epoch": 0.8577810871183916, "grad_norm": 0.003952132814534776, "kl": 0.0693359375, "learning_rate": 9.856215227896847e-07, "loss": 6.939301238162443e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 432, "train_speed(iter/s)": 0.022336 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 79.28125286102295, "completions/min_length": 34.25, "epoch": 0.8597666914867212, "grad_norm": 0.9416845816813911, "kl": 0.087799072265625, "learning_rate": 9.855463124045342e-07, "loss": 0.003828667104244232, "memory(GiB)": 94.21, "reward": 1.4583333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.45833333395421505, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 433, "train_speed(iter/s)": 0.022339 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.375, "completions/mean_length": 87.33333778381348, "completions/min_length": 33.125, "epoch": 0.8617522958550509, "grad_norm": 0.006785789142508644, "kl": 0.0637054443359375, "learning_rate": 9.85470908713026e-07, "loss": 6.379057595040649e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 434, "train_speed(iter/s)": 0.022339 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 79.11458492279053, "completions/min_length": 34.375, "epoch": 0.8637379002233805, "grad_norm": 0.0059735336974445635, "kl": 0.059967041015625, "learning_rate": 9.853953117451795e-07, "loss": 5.999209679430351e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 435, "train_speed(iter/s)": 0.022343 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 71.84375286102295, "completions/min_length": 31.375, "epoch": 0.8657235045917101, "grad_norm": 0.005514369774256764, "kl": 0.06683349609375, "learning_rate": 9.853195215310915e-07, "loss": 6.678507634205744e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 436, "train_speed(iter/s)": 0.022343 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.875, "completions/mean_length": 74.83333587646484, "completions/min_length": 32.5, "epoch": 0.8677091089600397, "grad_norm": 0.0035355475794871067, "kl": 0.042266845703125, "learning_rate": 9.85243538100936e-07, "loss": 4.223368159728125e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 437, "train_speed(iter/s)": 0.02234 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 84.27083587646484, "completions/min_length": 29.25, "epoch": 0.8696947133283693, "grad_norm": 0.004333809030999474, "kl": 0.05462646484375, "learning_rate": 9.851673614849632e-07, "loss": 5.459976091515273e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 438, "train_speed(iter/s)": 0.022332 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.125, "completions/mean_length": 79.50000238418579, "completions/min_length": 29.375, "epoch": 0.871680317696699, "grad_norm": 0.0051928642497738615, "kl": 0.052764892578125, "learning_rate": 9.850909917135009e-07, "loss": 5.270736801321618e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 439, "train_speed(iter/s)": 0.022322 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.75, "completions/mean_length": 73.25000095367432, "completions/min_length": 35.125, "epoch": 0.8736659220650286, "grad_norm": 1.7041063182273106, "kl": 0.066802978515625, "learning_rate": 9.850144288169535e-07, "loss": -0.007907523773610592, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 440, "train_speed(iter/s)": 0.022327 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.625, "completions/mean_length": 82.40625286102295, "completions/min_length": 36.5, "epoch": 0.8756515264333582, "grad_norm": 0.006393325256835077, "kl": 0.060028076171875, "learning_rate": 9.849376728258022e-07, "loss": 6.0007772844983265e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 441, "train_speed(iter/s)": 0.022328 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.75, "completions/mean_length": 81.51041984558105, "completions/min_length": 36.125, "epoch": 0.8776371308016878, "grad_norm": 0.7432697453317926, "kl": 0.056976318359375, "learning_rate": 9.848607237706056e-07, "loss": 0.007492425851523876, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 442, "train_speed(iter/s)": 0.022329 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 84.82291984558105, "completions/min_length": 33.625, "epoch": 0.8796227351700173, "grad_norm": 0.9463608415933458, "kl": 0.04425048828125, "learning_rate": 9.847835816819982e-07, "loss": -0.007214564364403486, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 443, "train_speed(iter/s)": 0.022321 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.125, "completions/mean_length": 82.13541889190674, "completions/min_length": 38.25, "epoch": 0.8816083395383469, "grad_norm": 0.6500412661226062, "kl": 0.055908203125, "learning_rate": 9.847062465906925e-07, "loss": -0.00016089281416498125, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 444, "train_speed(iter/s)": 0.022318 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.5, "completions/mean_length": 78.54166793823242, "completions/min_length": 32.25, "epoch": 0.8835939439066766, "grad_norm": 0.006739439862654462, "kl": 0.05487060546875, "learning_rate": 9.846287185274767e-07, "loss": 5.484447319759056e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 445, "train_speed(iter/s)": 0.022322 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 69.00000238418579, "completions/min_length": 32.75, "epoch": 0.8855795482750062, "grad_norm": 0.004097462078476503, "kl": 0.06689453125, "learning_rate": 9.845509975232168e-07, "loss": 6.683926039841026e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 446, "train_speed(iter/s)": 0.022319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 80.86458683013916, "completions/min_length": 35.0, "epoch": 0.8875651526433358, "grad_norm": 0.9098200202030318, "kl": 0.057952880859375, "learning_rate": 9.844730836088548e-07, "loss": 5.786865949630737e-05, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393530294299126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 447, "train_speed(iter/s)": 0.022325 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.125, "completions/mean_length": 93.62500476837158, "completions/min_length": 37.875, "epoch": 0.8895507570116654, "grad_norm": 0.012737751849066711, "kl": 0.0550994873046875, "learning_rate": 9.843949768154103e-07, "loss": 5.510202026925981e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 448, "train_speed(iter/s)": 0.022316 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 86.38541793823242, "completions/min_length": 34.125, "epoch": 0.891536361379995, "grad_norm": 1.1000038347729983, "kl": 0.085540771484375, "learning_rate": 9.84316677173979e-07, "loss": -0.013191180303692818, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 449, "train_speed(iter/s)": 0.022316 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 80.08333587646484, "completions/min_length": 38.75, "epoch": 0.8935219657483247, "grad_norm": 0.9812317954361416, "kl": 0.05841064453125, "learning_rate": 9.842381847157338e-07, "loss": 0.0026867142878472805, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 450, "train_speed(iter/s)": 0.022319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 84.42708396911621, "completions/min_length": 35.375, "epoch": 0.8955075701166543, "grad_norm": 1.0822573801673039, "kl": 0.083709716796875, "learning_rate": 9.841594994719242e-07, "loss": -0.0019134258618578315, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.8229166679084301, "rewards/CineAccuracyORM/std": 0.18335824459791183, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 451, "train_speed(iter/s)": 0.022323 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 82.78125286102295, "completions/min_length": 34.875, "epoch": 0.8974931744849839, "grad_norm": 0.010675053619692178, "kl": 0.054962158203125, "learning_rate": 9.840806214738763e-07, "loss": 5.505572698893957e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 452, "train_speed(iter/s)": 0.022329 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.125, "completions/mean_length": 90.69792032241821, "completions/min_length": 38.125, "epoch": 0.8994787788533135, "grad_norm": 0.004581280845179157, "kl": 0.04107666015625, "learning_rate": 9.840015507529936e-07, "loss": 4.1029979911400005e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 453, "train_speed(iter/s)": 0.022321 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.625, "completions/mean_length": 99.42708587646484, "completions/min_length": 44.125, "epoch": 0.9014643832216431, "grad_norm": 0.685749072217824, "kl": 0.065582275390625, "learning_rate": 9.839222873407553e-07, "loss": 0.009730573743581772, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 454, "train_speed(iter/s)": 0.02232 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.875, "completions/mean_length": 90.40625381469727, "completions/min_length": 37.5, "epoch": 0.9034499875899727, "grad_norm": 0.003985228831653713, "kl": 0.061492919921875, "learning_rate": 9.838428312687179e-07, "loss": 6.147942622192204e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 455, "train_speed(iter/s)": 0.022312 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.0, "completions/mean_length": 88.73958492279053, "completions/min_length": 34.5, "epoch": 0.9054355919583023, "grad_norm": 1.5560878386072767, "kl": 0.05950927734375, "learning_rate": 9.83763182568515e-07, "loss": -0.011519490741193295, "memory(GiB)": 94.21, "reward": 1.7916666865348816, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.12309149652719498, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 456, "train_speed(iter/s)": 0.022302 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.75, "completions/mean_length": 93.26041889190674, "completions/min_length": 43.125, "epoch": 0.9074211963266319, "grad_norm": 0.006232388259372655, "kl": 0.0594482421875, "learning_rate": 9.83683341271856e-07, "loss": 5.939140828559175e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 457, "train_speed(iter/s)": 0.022303 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 99.54167079925537, "completions/min_length": 42.0, "epoch": 0.9094068006949615, "grad_norm": 0.005745981046939235, "kl": 0.06011962890625, "learning_rate": 9.836033074105277e-07, "loss": 6.0100726841483265e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 458, "train_speed(iter/s)": 0.022284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 95.10416793823242, "completions/min_length": 30.25, "epoch": 0.9113924050632911, "grad_norm": 0.5701364923659495, "kl": 0.057586669921875, "learning_rate": 9.835230810163932e-07, "loss": -0.004036252852529287, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 459, "train_speed(iter/s)": 0.022283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.375, "completions/mean_length": 88.8854193687439, "completions/min_length": 34.875, "epoch": 0.9133780094316207, "grad_norm": 0.004460898395404908, "kl": 0.04986572265625, "learning_rate": 9.834426621213923e-07, "loss": 4.981301026418805e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 460, "train_speed(iter/s)": 0.022273 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 97.92708683013916, "completions/min_length": 39.5, "epoch": 0.9153636137999503, "grad_norm": 0.9662989797226541, "kl": 0.065399169921875, "learning_rate": 9.833620507575415e-07, "loss": -0.0023719172459095716, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 461, "train_speed(iter/s)": 0.022274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.25, "completions/mean_length": 85.98958492279053, "completions/min_length": 34.875, "epoch": 0.91734921816828, "grad_norm": 0.005387381371872264, "kl": 0.064361572265625, "learning_rate": 9.83281246956934e-07, "loss": 6.429507629945874e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 462, "train_speed(iter/s)": 0.02227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 81.69791841506958, "completions/min_length": 39.375, "epoch": 0.9193348225366096, "grad_norm": 0.8071229644781568, "kl": 0.06201171875, "learning_rate": 9.832002507517392e-07, "loss": 6.19615166215226e-05, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 463, "train_speed(iter/s)": 0.022273 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.125, "completions/mean_length": 90.82291984558105, "completions/min_length": 35.5, "epoch": 0.9213204269049392, "grad_norm": 0.0058570233010228345, "kl": 0.0528564453125, "learning_rate": 9.831190621742038e-07, "loss": 5.286728992359713e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 464, "train_speed(iter/s)": 0.022275 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.125, "completions/mean_length": 81.32291841506958, "completions/min_length": 29.875, "epoch": 0.9233060312732688, "grad_norm": 0.8001698029224038, "kl": 0.077239990234375, "learning_rate": 9.830376812566507e-07, "loss": -0.0023334722500294447, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 465, "train_speed(iter/s)": 0.022275 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.625, "completions/mean_length": 78.90625190734863, "completions/min_length": 39.625, "epoch": 0.9252916356415984, "grad_norm": 0.9830800175828178, "kl": 0.076934814453125, "learning_rate": 9.829561080314793e-07, "loss": -0.002045848174020648, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 466, "train_speed(iter/s)": 0.022275 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.25, "completions/mean_length": 91.01041793823242, "completions/min_length": 32.25, "epoch": 0.9272772400099281, "grad_norm": 0.6224140283352512, "kl": 0.064666748046875, "learning_rate": 9.828743425311653e-07, "loss": 0.0011969867628067732, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 467, "train_speed(iter/s)": 0.022273 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.875, "completions/mean_length": 88.89583683013916, "completions/min_length": 36.625, "epoch": 0.9292628443782577, "grad_norm": 0.007733428656259304, "kl": 0.068450927734375, "learning_rate": 9.827923847882616e-07, "loss": 6.8443245254457e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 468, "train_speed(iter/s)": 0.022272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 68.41666841506958, "completions/min_length": 30.375, "epoch": 0.9312484487465872, "grad_norm": 0.009634204070546371, "kl": 0.09173583984375, "learning_rate": 9.827102348353975e-07, "loss": 9.169169061351568e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 469, "train_speed(iter/s)": 0.022281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.5, "completions/mean_length": 79.47916889190674, "completions/min_length": 32.125, "epoch": 0.9332340531149168, "grad_norm": 0.009121017923592918, "kl": 0.0889892578125, "learning_rate": 9.826278927052783e-07, "loss": 8.899247040972114e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 470, "train_speed(iter/s)": 0.022281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.375, "completions/mean_length": 70.67708539962769, "completions/min_length": 30.875, "epoch": 0.9352196574832464, "grad_norm": 0.8894852280259735, "kl": 0.10858154296875, "learning_rate": 9.825453584306865e-07, "loss": 0.0014512266498059034, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 471, "train_speed(iter/s)": 0.022284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 74.69791889190674, "completions/min_length": 34.875, "epoch": 0.937205261851576, "grad_norm": 0.7357905779175437, "kl": 0.11627197265625, "learning_rate": 9.824626320444803e-07, "loss": 0.0033022775314748287, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 472, "train_speed(iter/s)": 0.022287 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.875, "completions/mean_length": 76.45833587646484, "completions/min_length": 37.25, "epoch": 0.9391908662199057, "grad_norm": 0.009486380069485866, "kl": 0.104736328125, "learning_rate": 9.823797135795955e-07, "loss": 0.00010473228758201003, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 473, "train_speed(iter/s)": 0.022289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 80.36458492279053, "completions/min_length": 32.375, "epoch": 0.9411764705882353, "grad_norm": 0.009200236837578818, "kl": 0.11175537109375, "learning_rate": 9.82296603069043e-07, "loss": 0.00011178151180502027, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 474, "train_speed(iter/s)": 0.022291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 75.43750190734863, "completions/min_length": 30.375, "epoch": 0.9431620749565649, "grad_norm": 0.009447692408176854, "kl": 0.1064453125, "learning_rate": 9.822133005459115e-07, "loss": 0.0001064460666384548, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 475, "train_speed(iter/s)": 0.022291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 72.09375238418579, "completions/min_length": 31.125, "epoch": 0.9451476793248945, "grad_norm": 0.9053407087114659, "kl": 0.11505126953125, "learning_rate": 9.82129806043365e-07, "loss": 0.00011500219989102334, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.770833333954215, "rewards/CineAccuracyORM/std": 0.04865618422627449, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 476, "train_speed(iter/s)": 0.022296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 71.66666746139526, "completions/min_length": 34.625, "epoch": 0.9471332836932241, "grad_norm": 0.009712097149882143, "kl": 0.15216064453125, "learning_rate": 9.820461195946446e-07, "loss": 0.00015226402319967747, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 477, "train_speed(iter/s)": 0.022295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.5, "completions/mean_length": 67.01041793823242, "completions/min_length": 29.875, "epoch": 0.9491188880615538, "grad_norm": 0.9097396548529905, "kl": 0.12908935546875, "learning_rate": 9.81962241233068e-07, "loss": 0.017307542264461517, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 478, "train_speed(iter/s)": 0.022293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.875, "completions/mean_length": 65.73958492279053, "completions/min_length": 31.875, "epoch": 0.9511044924298834, "grad_norm": 0.010650313531835813, "kl": 0.14459228515625, "learning_rate": 9.818781709920283e-07, "loss": 0.00014469510642811656, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 479, "train_speed(iter/s)": 0.022299 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.875, "completions/mean_length": 73.03125190734863, "completions/min_length": 35.625, "epoch": 0.953090096798213, "grad_norm": 0.009855928168650403, "kl": 0.12811279296875, "learning_rate": 9.817939089049965e-07, "loss": 0.0001281301665585488, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 480, "train_speed(iter/s)": 0.022298 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 73.45833492279053, "completions/min_length": 29.375, "epoch": 0.9550757011665426, "grad_norm": 0.010171105548975897, "kl": 0.12945556640625, "learning_rate": 9.817094550055184e-07, "loss": 0.00012945401249453425, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 481, "train_speed(iter/s)": 0.0223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.875, "completions/mean_length": 80.75000190734863, "completions/min_length": 33.125, "epoch": 0.9570613055348722, "grad_norm": 0.009809432741606488, "kl": 0.14013671875, "learning_rate": 9.816248093272172e-07, "loss": 0.00014013744657859206, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 482, "train_speed(iter/s)": 0.022298 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 74.56250047683716, "completions/min_length": 32.75, "epoch": 0.9590469099032017, "grad_norm": 1.1564833909047878, "kl": 0.1185302734375, "learning_rate": 9.815399719037923e-07, "loss": -0.003725347574800253, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 483, "train_speed(iter/s)": 0.022303 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.875, "completions/mean_length": 75.45833396911621, "completions/min_length": 30.25, "epoch": 0.9610325142715314, "grad_norm": 0.0097016888727827, "kl": 0.1087646484375, "learning_rate": 9.81454942769019e-07, "loss": 0.00010866991215152666, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 484, "train_speed(iter/s)": 0.022294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.125, "completions/mean_length": 68.06250190734863, "completions/min_length": 33.625, "epoch": 0.963018118639861, "grad_norm": 0.8021611569373516, "kl": 0.12957763671875, "learning_rate": 9.813697219567491e-07, "loss": -0.007320082746446133, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 485, "train_speed(iter/s)": 0.022296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.75, "completions/mean_length": 77.09375143051147, "completions/min_length": 28.125, "epoch": 0.9650037230081906, "grad_norm": 0.6919317084759832, "kl": 0.12451171875, "learning_rate": 9.812843095009115e-07, "loss": 0.007042422890663147, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 486, "train_speed(iter/s)": 0.022295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.875, "completions/mean_length": 64.88541889190674, "completions/min_length": 31.625, "epoch": 0.9669893273765202, "grad_norm": 0.01100579170144626, "kl": 0.13653564453125, "learning_rate": 9.811987054355101e-07, "loss": 0.00013647452578879893, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 487, "train_speed(iter/s)": 0.0223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.75, "completions/mean_length": 64.76041793823242, "completions/min_length": 29.25, "epoch": 0.9689749317448498, "grad_norm": 1.2087116831368303, "kl": 0.19085693359375, "learning_rate": 9.81112909794626e-07, "loss": -0.011763500049710274, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.770833333954215, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 488, "train_speed(iter/s)": 0.022309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.625, "completions/mean_length": 72.67708539962769, "completions/min_length": 34.5, "epoch": 0.9709605361131795, "grad_norm": 1.6311525741553783, "kl": 0.1282958984375, "learning_rate": 9.810269226124159e-07, "loss": 0.005242710467427969, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 489, "train_speed(iter/s)": 0.022314 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 68.51041841506958, "completions/min_length": 33.625, "epoch": 0.9729461404815091, "grad_norm": 0.010023110660537278, "kl": 0.107421875, "learning_rate": 9.809407439231138e-07, "loss": 0.00010748908971436322, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 490, "train_speed(iter/s)": 0.022312 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 70.56250190734863, "completions/min_length": 29.375, "epoch": 0.9749317448498387, "grad_norm": 0.009828444264579165, "kl": 0.10894775390625, "learning_rate": 9.808543737610286e-07, "loss": 0.00010895646846620366, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 491, "train_speed(iter/s)": 0.022317 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.875, "completions/mean_length": 81.20833683013916, "completions/min_length": 30.5, "epoch": 0.9769173492181683, "grad_norm": 0.8947125321222433, "kl": 0.09814453125, "learning_rate": 9.807678121605465e-07, "loss": -0.0024527187924832106, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 492, "train_speed(iter/s)": 0.022317 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.875, "completions/mean_length": 65.69791889190674, "completions/min_length": 29.5, "epoch": 0.9789029535864979, "grad_norm": 0.010135246690629863, "kl": 0.103271484375, "learning_rate": 9.806810591561293e-07, "loss": 0.00010322139132767916, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 493, "train_speed(iter/s)": 0.022319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 69.72916841506958, "completions/min_length": 30.25, "epoch": 0.9808885579548275, "grad_norm": 1.1786096067559, "kl": 0.101806640625, "learning_rate": 9.805941147823156e-07, "loss": 0.00010183329868596047, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.04865618050098419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 494, "train_speed(iter/s)": 0.022317 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 75.989586353302, "completions/min_length": 38.0, "epoch": 0.9828741623231572, "grad_norm": 1.3332248847327404, "kl": 0.066925048828125, "learning_rate": 9.805069790737192e-07, "loss": -0.014144557528197765, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.05974818021059036, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.15789688751101494, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 495, "train_speed(iter/s)": 0.022314 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 79.25000190734863, "completions/min_length": 32.5, "epoch": 0.9848597666914867, "grad_norm": 0.008184887848023242, "kl": 0.079193115234375, "learning_rate": 9.804196520650315e-07, "loss": 7.917750917840749e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 496, "train_speed(iter/s)": 0.02231 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.25, "completions/mean_length": 79.40625381469727, "completions/min_length": 37.25, "epoch": 0.9868453710598163, "grad_norm": 0.008136075588755821, "kl": 0.08233642578125, "learning_rate": 9.803321337910184e-07, "loss": 8.249451639130712e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 497, "train_speed(iter/s)": 0.022309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 81.05208587646484, "completions/min_length": 41.125, "epoch": 0.9888309754281459, "grad_norm": 0.914418444581033, "kl": 0.084442138671875, "learning_rate": 9.802444242865232e-07, "loss": -0.004461756441742182, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 498, "train_speed(iter/s)": 0.022306 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 71.48958492279053, "completions/min_length": 26.5, "epoch": 0.9908165797964755, "grad_norm": 0.033248927385579524, "kl": 0.119659423828125, "learning_rate": 9.80156523586465e-07, "loss": 0.00011982783325947821, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 499, "train_speed(iter/s)": 0.02231 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 68.51041841506958, "completions/min_length": 32.25, "epoch": 0.9928021841648051, "grad_norm": 0.013145718844948953, "kl": 0.103851318359375, "learning_rate": 9.80068431725839e-07, "loss": 0.00010379446030128747, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 500, "train_speed(iter/s)": 0.022314 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.625, "completions/mean_length": 70.00000238418579, "completions/min_length": 27.875, "epoch": 0.9947877885331348, "grad_norm": 0.008375740313006693, "kl": 0.078643798828125, "learning_rate": 9.799801487397161e-07, "loss": 7.863771315896884e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 501, "train_speed(iter/s)": 0.022285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.125, "completions/mean_length": 74.84375333786011, "completions/min_length": 32.875, "epoch": 0.9967733929014644, "grad_norm": 0.007199773473163106, "kl": 0.0740966796875, "learning_rate": 9.79891674663244e-07, "loss": 7.400034519378096e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 502, "train_speed(iter/s)": 0.02229 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 75.79166793823242, "completions/min_length": 31.25, "epoch": 0.998758997269794, "grad_norm": 0.006870565108412514, "kl": 0.063812255859375, "learning_rate": 9.798030095316459e-07, "loss": 6.379494880093262e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 503, "train_speed(iter/s)": 0.022284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 70.03125286102295, "completions/min_length": 28.75, "epoch": 1.0019856043683295, "grad_norm": 0.005756176540661593, "kl": 0.07269287109375, "learning_rate": 9.797141533802213e-07, "loss": 7.26907528587617e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 504, "train_speed(iter/s)": 0.022281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.75, "completions/mean_length": 73.33333444595337, "completions/min_length": 32.125, "epoch": 1.0039712087366592, "grad_norm": 0.007158366064493304, "kl": 0.074371337890625, "learning_rate": 9.796251062443458e-07, "loss": 7.43550481274724e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 505, "train_speed(iter/s)": 0.022283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 73.13541889190674, "completions/min_length": 30.125, "epoch": 1.0059568131049887, "grad_norm": 0.018076724632159164, "kl": 0.09466552734375, "learning_rate": 9.795358681594709e-07, "loss": 9.470425720792264e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 506, "train_speed(iter/s)": 0.02228 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 74.0416693687439, "completions/min_length": 28.875, "epoch": 1.0079424174733185, "grad_norm": 0.00504712541718879, "kl": 0.071502685546875, "learning_rate": 9.794464391611244e-07, "loss": 7.15227797627449e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 507, "train_speed(iter/s)": 0.022282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.75, "completions/mean_length": 67.44791746139526, "completions/min_length": 30.125, "epoch": 1.009928021841648, "grad_norm": 0.01076316741107328, "kl": 0.079925537109375, "learning_rate": 9.793568192849098e-07, "loss": 7.990360609255731e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 508, "train_speed(iter/s)": 0.022284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 79.14583539962769, "completions/min_length": 31.0, "epoch": 1.0119136262099777, "grad_norm": 0.006282426776595067, "kl": 0.0953369140625, "learning_rate": 9.792670085665063e-07, "loss": 9.538599260849878e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 509, "train_speed(iter/s)": 0.022287 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 75.69791793823242, "completions/min_length": 32.75, "epoch": 1.0138992305783072, "grad_norm": 0.01716490730911805, "kl": 0.094696044921875, "learning_rate": 9.7917700704167e-07, "loss": 9.4724862719886e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 510, "train_speed(iter/s)": 0.022287 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 69.45833492279053, "completions/min_length": 28.0, "epoch": 1.015884834946637, "grad_norm": 0.004842520584587723, "kl": 0.07867431640625, "learning_rate": 9.790868147462321e-07, "loss": 7.858409662730992e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 511, "train_speed(iter/s)": 0.022291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/mean_length": 72.75000190734863, "completions/min_length": 36.125, "epoch": 1.0178704393149665, "grad_norm": 2.162647037893844, "kl": 0.10076904296875, "learning_rate": 9.789964317161002e-07, "loss": -0.0035934101324528456, "memory(GiB)": 94.21, "reward": 1.7500000149011612, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.7500000037252903, "rewards/CineAccuracyORM/std": 0.2536497339606285, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 512, "train_speed(iter/s)": 0.022291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 73.79166984558105, "completions/min_length": 37.25, "epoch": 1.0198560436832962, "grad_norm": 0.004328347207335489, "kl": 0.0701904296875, "learning_rate": 9.789058579872578e-07, "loss": 7.007941894698888e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 513, "train_speed(iter/s)": 0.022285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.625, "completions/mean_length": 89.18750286102295, "completions/min_length": 35.5, "epoch": 1.0218416480516257, "grad_norm": 1.8372919943631991, "kl": 0.064453125, "learning_rate": 9.788150935957643e-07, "loss": -0.004091219510883093, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 514, "train_speed(iter/s)": 0.022278 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.75, "completions/mean_length": 76.208336353302, "completions/min_length": 34.625, "epoch": 1.0238272524199554, "grad_norm": 0.005050457461279664, "kl": 0.071044921875, "learning_rate": 9.787241385777545e-07, "loss": 7.10004023858346e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 515, "train_speed(iter/s)": 0.022281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.375, "completions/mean_length": 73.90625333786011, "completions/min_length": 29.625, "epoch": 1.025812856788285, "grad_norm": 0.7704170526906402, "kl": 0.086456298828125, "learning_rate": 9.786329929694399e-07, "loss": 8.654098201077431e-05, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 516, "train_speed(iter/s)": 0.022282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 69.11458492279053, "completions/min_length": 30.75, "epoch": 1.0277984611566144, "grad_norm": 0.005801654499940909, "kl": 0.061981201171875, "learning_rate": 9.785416568071074e-07, "loss": 6.214170571183786e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 517, "train_speed(iter/s)": 0.022281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.375, "completions/mean_length": 79.60416889190674, "completions/min_length": 34.0, "epoch": 1.0297840655249442, "grad_norm": 0.004460711134197175, "kl": 0.07073974609375, "learning_rate": 9.7845013012712e-07, "loss": 7.083349191816524e-05, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.45695383101701736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 518, "train_speed(iter/s)": 0.022284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 72.54166841506958, "completions/min_length": 34.25, "epoch": 1.0317696698932737, "grad_norm": 1.4908082648025123, "kl": 0.07379150390625, "learning_rate": 9.78358412965916e-07, "loss": 0.004011109005659819, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 519, "train_speed(iter/s)": 0.022282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 76.08333492279053, "completions/min_length": 32.0, "epoch": 1.0337552742616034, "grad_norm": 0.0051476155769187475, "kl": 0.056854248046875, "learning_rate": 9.782665053600108e-07, "loss": 5.6818025768734515e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 520, "train_speed(iter/s)": 0.022283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 89.37500190734863, "completions/min_length": 45.875, "epoch": 1.035740878629933, "grad_norm": 0.004454868572791521, "kl": 0.068878173828125, "learning_rate": 9.781744073459941e-07, "loss": 6.887844210723415e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 521, "train_speed(iter/s)": 0.022276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.125, "completions/mean_length": 80.09375381469727, "completions/min_length": 36.625, "epoch": 1.0377264829982626, "grad_norm": 0.020376927241310686, "kl": 0.103729248046875, "learning_rate": 9.780821189605321e-07, "loss": 0.00010364950867369771, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 522, "train_speed(iter/s)": 0.022274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.625, "completions/mean_length": 81.53125190734863, "completions/min_length": 41.25, "epoch": 1.0397120873665922, "grad_norm": 0.004244578044599986, "kl": 0.071319580078125, "learning_rate": 9.779896402403673e-07, "loss": 7.135018677217886e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 523, "train_speed(iter/s)": 0.02227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 75.16666841506958, "completions/min_length": 32.0, "epoch": 1.0416976917349219, "grad_norm": 0.9229648514430092, "kl": 0.062896728515625, "learning_rate": 9.778969712223168e-07, "loss": -0.00029640336288139224, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 524, "train_speed(iter/s)": 0.022272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 81.65625333786011, "completions/min_length": 31.75, "epoch": 1.0436832961032514, "grad_norm": 0.005766362042818235, "kl": 0.07427978515625, "learning_rate": 9.778041119432745e-07, "loss": 7.427332457154989e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 525, "train_speed(iter/s)": 0.022269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 70.98958587646484, "completions/min_length": 33.5, "epoch": 1.0456689004715811, "grad_norm": 0.005632745793919945, "kl": 0.08355712890625, "learning_rate": 9.777110624402097e-07, "loss": 8.361268555745482e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 526, "train_speed(iter/s)": 0.022269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.875, "completions/mean_length": 77.302086353302, "completions/min_length": 32.0, "epoch": 1.0476545048399106, "grad_norm": 0.788546399607603, "kl": 0.07257080078125, "learning_rate": 9.77617822750167e-07, "loss": 0.00844595581293106, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 527, "train_speed(iter/s)": 0.022271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.0, "completions/mean_length": 78.57291793823242, "completions/min_length": 33.625, "epoch": 1.0496401092082404, "grad_norm": 0.00458352611145281, "kl": 0.080230712890625, "learning_rate": 9.775243929102675e-07, "loss": 8.02253489382565e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 528, "train_speed(iter/s)": 0.022268 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.375, "completions/mean_length": 84.77083492279053, "completions/min_length": 35.375, "epoch": 1.0516257135765699, "grad_norm": 0.006605951247201382, "kl": 0.07244873046875, "learning_rate": 9.774307729577075e-07, "loss": 7.250799535540864e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 529, "train_speed(iter/s)": 0.022264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.25, "completions/mean_length": 77.42708539962769, "completions/min_length": 35.625, "epoch": 1.0536113179448994, "grad_norm": 0.7840239999005865, "kl": 0.07177734375, "learning_rate": 9.773369629297592e-07, "loss": 0.013522615656256676, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 530, "train_speed(iter/s)": 0.022264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 78.37500143051147, "completions/min_length": 31.625, "epoch": 1.055596922313229, "grad_norm": 0.008212685233090624, "kl": 0.074432373046875, "learning_rate": 9.772429628637702e-07, "loss": 7.443128561135381e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 531, "train_speed(iter/s)": 0.022262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.625, "completions/mean_length": 90.52083587646484, "completions/min_length": 39.5, "epoch": 1.0575825266815586, "grad_norm": 0.7164334416660307, "kl": 0.089874267578125, "learning_rate": 9.77148772797164e-07, "loss": -0.00206756847910583, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 532, "train_speed(iter/s)": 0.022256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.375, "completions/mean_length": 76.44791984558105, "completions/min_length": 35.25, "epoch": 1.0595681310498883, "grad_norm": 0.007644393277728821, "kl": 0.068206787109375, "learning_rate": 9.770543927674397e-07, "loss": 6.809654587414116e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 533, "train_speed(iter/s)": 0.022254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 69.43750143051147, "completions/min_length": 33.625, "epoch": 1.0615537354182178, "grad_norm": 0.00816462313993516, "kl": 0.079376220703125, "learning_rate": 9.769598228121721e-07, "loss": 7.938842463772744e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 534, "train_speed(iter/s)": 0.022253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 69.57291793823242, "completions/min_length": 33.0, "epoch": 1.0635393397865476, "grad_norm": 0.8271361955283226, "kl": 0.071990966796875, "learning_rate": 9.768650629690112e-07, "loss": 0.006067268550395966, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 535, "train_speed(iter/s)": 0.022255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 73.57291841506958, "completions/min_length": 34.5, "epoch": 1.065524944154877, "grad_norm": 0.005765702871034344, "kl": 0.091094970703125, "learning_rate": 9.767701132756832e-07, "loss": 9.09459195099771e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 536, "train_speed(iter/s)": 0.022253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.625, "completions/mean_length": 74.59375190734863, "completions/min_length": 34.25, "epoch": 1.0675105485232068, "grad_norm": 0.0075379752580999665, "kl": 0.06256103515625, "learning_rate": 9.766749737699894e-07, "loss": 6.25491957180202e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 537, "train_speed(iter/s)": 0.022256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 71.52083587646484, "completions/min_length": 31.375, "epoch": 1.0694961528915363, "grad_norm": 0.007045632345683777, "kl": 0.076202392578125, "learning_rate": 9.765796444898073e-07, "loss": 7.618629024364054e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 538, "train_speed(iter/s)": 0.022253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.125, "completions/mean_length": 81.64583492279053, "completions/min_length": 38.5, "epoch": 1.071481757259866, "grad_norm": 1.2930726323799988, "kl": 0.0855712890625, "learning_rate": 9.76484125473089e-07, "loss": -0.0059394738636910915, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 539, "train_speed(iter/s)": 0.022253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/mean_length": 60.01041889190674, "completions/min_length": 30.875, "epoch": 1.0734673616281956, "grad_norm": 1.6187815902552098, "kl": 0.07476806640625, "learning_rate": 9.76388416757863e-07, "loss": 0.0015911355149000883, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 540, "train_speed(iter/s)": 0.022259 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 68.67708539962769, "completions/min_length": 32.125, "epoch": 1.0754529659965253, "grad_norm": 0.006886318912753367, "kl": 0.088104248046875, "learning_rate": 9.76292518382233e-07, "loss": 8.805892139207572e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 541, "train_speed(iter/s)": 0.02226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 70.92708396911621, "completions/min_length": 30.75, "epoch": 1.0774385703648548, "grad_norm": 0.007738070130499908, "kl": 0.087615966796875, "learning_rate": 9.761964303843779e-07, "loss": 8.774644084041938e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 542, "train_speed(iter/s)": 0.022264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.375, "completions/mean_length": 79.00000143051147, "completions/min_length": 34.25, "epoch": 1.0794241747331843, "grad_norm": 1.3135111635460766, "kl": 0.08270263671875, "learning_rate": 9.761001528025525e-07, "loss": -0.00990224163979292, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.10518955811858177, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 543, "train_speed(iter/s)": 0.022269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 65.88541793823242, "completions/min_length": 31.125, "epoch": 1.081409779101514, "grad_norm": 0.9530289209328595, "kl": 0.087738037109375, "learning_rate": 9.760036856750871e-07, "loss": 0.008948824368417263, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 544, "train_speed(iter/s)": 0.022268 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.375, "completions/mean_length": 76.25000286102295, "completions/min_length": 35.75, "epoch": 1.0833953834698435, "grad_norm": 0.8307619189537885, "kl": 0.082550048828125, "learning_rate": 9.759070290403872e-07, "loss": -0.001533027971163392, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 545, "train_speed(iter/s)": 0.022267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 67.15625238418579, "completions/min_length": 28.25, "epoch": 1.0853809878381733, "grad_norm": 0.007410270803046887, "kl": 0.088897705078125, "learning_rate": 9.758101829369338e-07, "loss": 8.878414519131184e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 546, "train_speed(iter/s)": 0.022272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 64.50000047683716, "completions/min_length": 33.25, "epoch": 1.0873665922065028, "grad_norm": 0.006274003877070703, "kl": 0.094268798828125, "learning_rate": 9.757131474032836e-07, "loss": 9.4304108642973e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 547, "train_speed(iter/s)": 0.022269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 70.78125190734863, "completions/min_length": 29.875, "epoch": 1.0893521965748325, "grad_norm": 0.008522866321777733, "kl": 0.08306884765625, "learning_rate": 9.756159224780685e-07, "loss": 8.306295785587281e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 548, "train_speed(iter/s)": 0.022269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.125, "completions/mean_length": 78.2291693687439, "completions/min_length": 33.25, "epoch": 1.091337800943162, "grad_norm": 0.7510567466480369, "kl": 0.09552001953125, "learning_rate": 9.755185081999955e-07, "loss": -0.007611400447785854, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 549, "train_speed(iter/s)": 0.022268 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 69.33333492279053, "completions/min_length": 27.125, "epoch": 1.0933234053114917, "grad_norm": 0.7092548104403367, "kl": 0.08740234375, "learning_rate": 9.754209046078478e-07, "loss": 0.011761273257434368, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 550, "train_speed(iter/s)": 0.022267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 72.59375143051147, "completions/min_length": 32.75, "epoch": 1.0953090096798213, "grad_norm": 0.004961644686553474, "kl": 0.07257080078125, "learning_rate": 9.75323111740483e-07, "loss": 7.263156294357032e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 551, "train_speed(iter/s)": 0.022273 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.75, "completions/mean_length": 70.0416693687439, "completions/min_length": 29.625, "epoch": 1.097294614048151, "grad_norm": 0.0045129546953054335, "kl": 0.069488525390625, "learning_rate": 9.75225129636835e-07, "loss": 6.942203617654741e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 552, "train_speed(iter/s)": 0.022273 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 66.03125238418579, "completions/min_length": 33.25, "epoch": 1.0992802184164805, "grad_norm": 0.008681414916424587, "kl": 0.069915771484375, "learning_rate": 9.75126958335912e-07, "loss": 6.986403604969382e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 553, "train_speed(iter/s)": 0.022273 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 66.64583492279053, "completions/min_length": 28.375, "epoch": 1.1012658227848102, "grad_norm": 1.0271986211342294, "kl": 0.093963623046875, "learning_rate": 9.750285978767986e-07, "loss": 9.381522977491841e-05, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6666666679084301, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 554, "train_speed(iter/s)": 0.022273 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 66.58333444595337, "completions/min_length": 29.625, "epoch": 1.1032514271531397, "grad_norm": 0.005193049814199608, "kl": 0.0740966796875, "learning_rate": 9.749300482986542e-07, "loss": 7.403695781249553e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 555, "train_speed(iter/s)": 0.022277 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.625, "completions/mean_length": 68.97916841506958, "completions/min_length": 26.0, "epoch": 1.1052370315214692, "grad_norm": 0.006816479913573796, "kl": 0.08026123046875, "learning_rate": 9.74831309640713e-07, "loss": 8.033551421249285e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 556, "train_speed(iter/s)": 0.02228 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.375, "completions/mean_length": 71.0104193687439, "completions/min_length": 30.125, "epoch": 1.107222635889799, "grad_norm": 0.007952116847060918, "kl": 0.0810546875, "learning_rate": 9.747323819422854e-07, "loss": 8.10863493825309e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 557, "train_speed(iter/s)": 0.022275 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.625, "completions/mean_length": 66.62500190734863, "completions/min_length": 33.125, "epoch": 1.1092082402581285, "grad_norm": 0.009222802985643283, "kl": 0.07696533203125, "learning_rate": 9.746332652427565e-07, "loss": 7.687990000704303e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 558, "train_speed(iter/s)": 0.022279 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 69.54166889190674, "completions/min_length": 30.25, "epoch": 1.1111938446264582, "grad_norm": 0.005154444981706478, "kl": 0.07647705078125, "learning_rate": 9.745339595815866e-07, "loss": 7.652018393855542e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 559, "train_speed(iter/s)": 0.022282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.5, "completions/mean_length": 75.03125286102295, "completions/min_length": 27.75, "epoch": 1.1131794489947877, "grad_norm": 0.0054852679381394535, "kl": 0.078277587890625, "learning_rate": 9.744344649983118e-07, "loss": 7.82212518970482e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 560, "train_speed(iter/s)": 0.022282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 74.20833539962769, "completions/min_length": 37.75, "epoch": 1.1151650533631174, "grad_norm": 0.0046925295878660934, "kl": 0.07037353515625, "learning_rate": 9.743347815325427e-07, "loss": 7.043230289127678e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 561, "train_speed(iter/s)": 0.022283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 68.68750143051147, "completions/min_length": 28.125, "epoch": 1.117150657731447, "grad_norm": 0.0064295972139579525, "kl": 0.082794189453125, "learning_rate": 9.742349092239657e-07, "loss": 8.272120612673461e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 562, "train_speed(iter/s)": 0.022284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 73.55208587646484, "completions/min_length": 28.625, "epoch": 1.1191362620997767, "grad_norm": 0.006041683775244193, "kl": 0.06353759765625, "learning_rate": 9.741348481123417e-07, "loss": 6.355245568556711e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 563, "train_speed(iter/s)": 0.022288 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.875, "completions/mean_length": 67.50000143051147, "completions/min_length": 30.625, "epoch": 1.1211218664681062, "grad_norm": 0.005537210520376235, "kl": 0.06201171875, "learning_rate": 9.740345982375075e-07, "loss": 6.209105777088553e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 564, "train_speed(iter/s)": 0.02229 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.25, "completions/mean_length": 87.48958539962769, "completions/min_length": 31.5, "epoch": 1.123107470836436, "grad_norm": 0.004627467234734946, "kl": 0.060821533203125, "learning_rate": 9.739341596393744e-07, "loss": 6.0824411775683984e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 565, "train_speed(iter/s)": 0.022283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.125, "completions/mean_length": 67.67708683013916, "completions/min_length": 29.375, "epoch": 1.1250930752047654, "grad_norm": 0.005403305140264467, "kl": 0.0694580078125, "learning_rate": 9.738335323579296e-07, "loss": 6.947192014195025e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 566, "train_speed(iter/s)": 0.022283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 68.79166793823242, "completions/min_length": 28.75, "epoch": 1.1270786795730952, "grad_norm": 0.008100067700864159, "kl": 0.08172607421875, "learning_rate": 9.737327164332345e-07, "loss": 8.171075023710728e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 567, "train_speed(iter/s)": 0.022287 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.125, "completions/mean_length": 83.60417079925537, "completions/min_length": 31.25, "epoch": 1.1290642839414247, "grad_norm": 0.005568777931783807, "kl": 0.084014892578125, "learning_rate": 9.736317119054266e-07, "loss": 8.400918886763975e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 568, "train_speed(iter/s)": 0.022286 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 73.78125143051147, "completions/min_length": 29.375, "epoch": 1.1310498883097542, "grad_norm": 1.377599708980024, "kl": 0.08050537109375, "learning_rate": 9.735305188147174e-07, "loss": -0.004226197954267263, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 569, "train_speed(iter/s)": 0.022289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 74.00000190734863, "completions/min_length": 36.375, "epoch": 1.133035492678084, "grad_norm": 0.9006534554013635, "kl": 0.079132080078125, "learning_rate": 9.734291372013944e-07, "loss": 0.004293154925107956, "memory(GiB)": 94.21, "reward": 1.6458333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 570, "train_speed(iter/s)": 0.022286 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.625, "completions/mean_length": 70.63541793823242, "completions/min_length": 31.0, "epoch": 1.1350210970464134, "grad_norm": 0.006454065852785443, "kl": 0.073272705078125, "learning_rate": 9.733275671058194e-07, "loss": 7.332953828154132e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 571, "train_speed(iter/s)": 0.022283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 65.14583539962769, "completions/min_length": 28.375, "epoch": 1.1370067014147431, "grad_norm": 0.005973812324536165, "kl": 0.075653076171875, "learning_rate": 9.732258085684301e-07, "loss": 7.564792758785188e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 572, "train_speed(iter/s)": 0.022284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.625, "completions/mean_length": 63.47916841506958, "completions/min_length": 27.375, "epoch": 1.1389923057830726, "grad_norm": 0.005360766270262526, "kl": 0.0867919921875, "learning_rate": 9.731238616297386e-07, "loss": 8.681170584168285e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 573, "train_speed(iter/s)": 0.022285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.875, "completions/mean_length": 62.281250953674316, "completions/min_length": 29.125, "epoch": 1.1409779101514024, "grad_norm": 0.0362138873978799, "kl": 0.1376953125, "learning_rate": 9.73021726330332e-07, "loss": 0.00013740996655542403, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 574, "train_speed(iter/s)": 0.022291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 67.67708587646484, "completions/min_length": 31.25, "epoch": 1.1429635145197319, "grad_norm": 0.005293666035821966, "kl": 0.063720703125, "learning_rate": 9.729194027108727e-07, "loss": 6.370741175487638e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 575, "train_speed(iter/s)": 0.022294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 67.43750238418579, "completions/min_length": 29.75, "epoch": 1.1449491188880616, "grad_norm": 1.3119494316607527, "kl": 0.06207275390625, "learning_rate": 9.728168908120978e-07, "loss": 6.201180804055184e-05, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 576, "train_speed(iter/s)": 0.022298 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 249.5, "completions/mean_length": 88.40625238418579, "completions/min_length": 33.0, "epoch": 1.1469347232563911, "grad_norm": 0.3271877449936268, "kl": 0.0828857421875, "learning_rate": 9.727141906748195e-07, "loss": 0.022682178765535355, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 577, "train_speed(iter/s)": 0.022289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.5, "completions/mean_length": 70.32291793823242, "completions/min_length": 27.75, "epoch": 1.1489203276247208, "grad_norm": 0.005576377585265308, "kl": 0.09912109375, "learning_rate": 9.726113023399248e-07, "loss": 9.906635386869311e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 578, "train_speed(iter/s)": 0.022285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 72.19791793823242, "completions/min_length": 31.25, "epoch": 1.1509059319930504, "grad_norm": 0.009045235726883746, "kl": 0.09552001953125, "learning_rate": 9.725082258483764e-07, "loss": 9.553381096338853e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 579, "train_speed(iter/s)": 0.022285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.875, "completions/mean_length": 79.12500286102295, "completions/min_length": 29.875, "epoch": 1.15289153636138, "grad_norm": 1.0449247123185463, "kl": 0.09832763671875, "learning_rate": 9.724049612412103e-07, "loss": -0.0012995228171348572, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 580, "train_speed(iter/s)": 0.022284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 71.78125095367432, "completions/min_length": 32.125, "epoch": 1.1548771407297096, "grad_norm": 0.00654337629304603, "kl": 0.076019287109375, "learning_rate": 9.72301508559539e-07, "loss": 7.600930257467553e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 581, "train_speed(iter/s)": 0.022287 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/mean_length": 63.70833730697632, "completions/min_length": 28.875, "epoch": 1.156862745098039, "grad_norm": 0.008240480651193175, "kl": 0.073974609375, "learning_rate": 9.72197867844549e-07, "loss": 7.402162736980245e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 582, "train_speed(iter/s)": 0.02229 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.75, "completions/mean_length": 61.04166841506958, "completions/min_length": 31.375, "epoch": 1.1588483494663688, "grad_norm": 0.010119890727403267, "kl": 0.1004638671875, "learning_rate": 9.720940391375017e-07, "loss": 0.00010052209108835086, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 583, "train_speed(iter/s)": 0.022295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.375, "completions/mean_length": 80.20833492279053, "completions/min_length": 29.5, "epoch": 1.1608339538346983, "grad_norm": 0.8486786913735911, "kl": 0.0975341796875, "learning_rate": 9.71990022479734e-07, "loss": -9.814029181143269e-05, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 584, "train_speed(iter/s)": 0.022294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 77.67708683013916, "completions/min_length": 29.875, "epoch": 1.162819558203028, "grad_norm": 0.008552646877854078, "kl": 0.081268310546875, "learning_rate": 9.718858179126567e-07, "loss": 8.129799243761227e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 585, "train_speed(iter/s)": 0.022295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 76.46875190734863, "completions/min_length": 27.75, "epoch": 1.1648051625713576, "grad_norm": 0.008366967018786422, "kl": 0.08905029296875, "learning_rate": 9.71781425477756e-07, "loss": 8.896699728211388e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 586, "train_speed(iter/s)": 0.022294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 81.80208587646484, "completions/min_length": 34.0, "epoch": 1.1667907669396873, "grad_norm": 0.008196082771375898, "kl": 0.085784912109375, "learning_rate": 9.71676845216593e-07, "loss": 8.579804853070527e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 587, "train_speed(iter/s)": 0.022291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.0, "completions/mean_length": 72.92708539962769, "completions/min_length": 26.75, "epoch": 1.1687763713080168, "grad_norm": 0.005775455703144766, "kl": 0.091156005859375, "learning_rate": 9.715720771708031e-07, "loss": 9.109506936511025e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 588, "train_speed(iter/s)": 0.022292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 74.88541984558105, "completions/min_length": 33.375, "epoch": 1.1707619756763465, "grad_norm": 1.2103084833535742, "kl": 0.09063720703125, "learning_rate": 9.714671213820966e-07, "loss": -0.0009210556745529175, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 589, "train_speed(iter/s)": 0.022296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 79.07291841506958, "completions/min_length": 32.0, "epoch": 1.172747580044676, "grad_norm": 0.00727854680400535, "kl": 0.095062255859375, "learning_rate": 9.713619778922587e-07, "loss": 9.491811943007633e-05, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 590, "train_speed(iter/s)": 0.0223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.125, "completions/mean_length": 79.66666984558105, "completions/min_length": 30.375, "epoch": 1.1747331844130058, "grad_norm": 0.005139729517205997, "kl": 0.07672119140625, "learning_rate": 9.712566467431496e-07, "loss": 7.6712341979146e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 591, "train_speed(iter/s)": 0.022302 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 75.00000238418579, "completions/min_length": 28.75, "epoch": 1.1767187887813353, "grad_norm": 0.005262161215713062, "kl": 0.076171875, "learning_rate": 9.711511279767035e-07, "loss": 7.62054551159963e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 592, "train_speed(iter/s)": 0.022301 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.125, "completions/mean_length": 75.43750238418579, "completions/min_length": 35.625, "epoch": 1.178704393149665, "grad_norm": 0.0066197236244243075, "kl": 0.08331298828125, "learning_rate": 9.710454216349298e-07, "loss": 8.333769801538438e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 593, "train_speed(iter/s)": 0.022299 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.0, "completions/mean_length": 71.7604193687439, "completions/min_length": 31.875, "epoch": 1.1806899975179945, "grad_norm": 0.9440998961946998, "kl": 0.073974609375, "learning_rate": 9.709395277599124e-07, "loss": -0.002237787004560232, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.833333333954215, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 594, "train_speed(iter/s)": 0.022296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 91.32291793823242, "completions/min_length": 36.75, "epoch": 1.182675601886324, "grad_norm": 0.005108966715773797, "kl": 0.06787109375, "learning_rate": 9.7083344639381e-07, "loss": 6.787019810872152e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 595, "train_speed(iter/s)": 0.022293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.375, "completions/mean_length": 80.78125190734863, "completions/min_length": 33.5, "epoch": 1.1846612062546538, "grad_norm": 1.192715802963661, "kl": 0.068511962890625, "learning_rate": 9.707271775788558e-07, "loss": -3.7482786865439266e-05, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.770833333954215, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 596, "train_speed(iter/s)": 0.022296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 64.06250190734863, "completions/min_length": 24.75, "epoch": 1.1866468106229835, "grad_norm": 0.7680531624455892, "kl": 0.0888671875, "learning_rate": 9.706207213573579e-07, "loss": 0.003137031104415655, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.31764985248446465, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 597, "train_speed(iter/s)": 0.022291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 68.83333587646484, "completions/min_length": 27.75, "epoch": 1.188632414991313, "grad_norm": 0.9941811488501888, "kl": 0.106414794921875, "learning_rate": 9.705140777716985e-07, "loss": -0.003719897475093603, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 598, "train_speed(iter/s)": 0.022295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 78.93750381469727, "completions/min_length": 35.375, "epoch": 1.1906180193596425, "grad_norm": 0.005860471067882116, "kl": 0.06390380859375, "learning_rate": 9.704072468643347e-07, "loss": 6.389830377884209e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 599, "train_speed(iter/s)": 0.022294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 79.62500238418579, "completions/min_length": 30.875, "epoch": 1.1926036237279722, "grad_norm": 0.00509660365868744, "kl": 0.082275390625, "learning_rate": 9.703002286777983e-07, "loss": 8.220366726163775e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 600, "train_speed(iter/s)": 0.022293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 73.88541841506958, "completions/min_length": 27.375, "epoch": 1.1945892280963017, "grad_norm": 0.04406790502517936, "kl": 0.099945068359375, "learning_rate": 9.701930232546954e-07, "loss": 9.99331459752284e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 601, "train_speed(iter/s)": 0.022293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 73.29166889190674, "completions/min_length": 32.5, "epoch": 1.1965748324646315, "grad_norm": 0.00804967431869538, "kl": 0.07000732421875, "learning_rate": 9.70085630637707e-07, "loss": 6.998522439971566e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 602, "train_speed(iter/s)": 0.022295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 255.25, "completions/mean_length": 81.33333444595337, "completions/min_length": 29.0, "epoch": 1.198560436832961, "grad_norm": 0.532666801069856, "kl": 0.096282958984375, "learning_rate": 9.69978050869588e-07, "loss": 0.02345992624759674, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 603, "train_speed(iter/s)": 0.022288 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 70.64583587646484, "completions/min_length": 30.625, "epoch": 1.2005460412012907, "grad_norm": 1.0674061726574062, "kl": 0.0982666015625, "learning_rate": 9.698702839931687e-07, "loss": 0.009132467210292816, "memory(GiB)": 94.21, "reward": 1.6041666716337204, "reward_std": 0.05974818021059036, "rewards/CineAccuracyORM/mean": 0.6041666669771075, "rewards/CineAccuracyORM/std": 0.2231760062277317, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 604, "train_speed(iter/s)": 0.022288 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 67.56250190734863, "completions/min_length": 30.0, "epoch": 1.2025316455696202, "grad_norm": 1.097897789618336, "kl": 0.09222412109375, "learning_rate": 9.69762330051353e-07, "loss": 0.01471245288848877, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.22787059843540192, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 605, "train_speed(iter/s)": 0.022289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 71.45833587646484, "completions/min_length": 28.25, "epoch": 1.20451724993795, "grad_norm": 0.009278574148672496, "kl": 0.0850830078125, "learning_rate": 9.696541890871198e-07, "loss": 8.51130680530332e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 606, "train_speed(iter/s)": 0.022289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 71.94791889190674, "completions/min_length": 29.25, "epoch": 1.2065028543062795, "grad_norm": 0.009320926369159988, "kl": 0.0885009765625, "learning_rate": 9.695458611435228e-07, "loss": 8.853545296005905e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 607, "train_speed(iter/s)": 0.02229 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.5, "completions/mean_length": 81.37500095367432, "completions/min_length": 31.5, "epoch": 1.208488458674609, "grad_norm": 1.4641006828415697, "kl": 0.10943603515625, "learning_rate": 9.694373462636887e-07, "loss": 0.006353606935590506, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 608, "train_speed(iter/s)": 0.022284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.0, "completions/mean_length": 64.76041889190674, "completions/min_length": 30.25, "epoch": 1.2104740630429387, "grad_norm": 0.009977849519231516, "kl": 0.097137451171875, "learning_rate": 9.693286444908201e-07, "loss": 9.709088772069663e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 609, "train_speed(iter/s)": 0.022289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/mean_length": 59.96875238418579, "completions/min_length": 32.0, "epoch": 1.2124596674112684, "grad_norm": 0.011079030117479677, "kl": 0.124267578125, "learning_rate": 9.69219755868194e-07, "loss": 0.00012425723252817988, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 610, "train_speed(iter/s)": 0.022297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.875, "completions/mean_length": 71.08333587646484, "completions/min_length": 29.0, "epoch": 1.214445271779598, "grad_norm": 0.009538366947412816, "kl": 0.1038818359375, "learning_rate": 9.691106804391603e-07, "loss": 0.00010387604561401531, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 611, "train_speed(iter/s)": 0.0223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 72.29166841506958, "completions/min_length": 32.375, "epoch": 1.2164308761479274, "grad_norm": 0.009378721437578636, "kl": 0.10986328125, "learning_rate": 9.690014182471447e-07, "loss": 0.00010981389641528949, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 612, "train_speed(iter/s)": 0.0223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.5, "completions/mean_length": 83.25000190734863, "completions/min_length": 27.75, "epoch": 1.2184164805162572, "grad_norm": 0.00875745529021343, "kl": 0.0936279296875, "learning_rate": 9.688919693356471e-07, "loss": 9.357710223412141e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 613, "train_speed(iter/s)": 0.022302 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 70.10416889190674, "completions/min_length": 32.875, "epoch": 1.2204020848845867, "grad_norm": 0.02622649092908211, "kl": 0.096343994140625, "learning_rate": 9.68782333748241e-07, "loss": 9.625627717468888e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 614, "train_speed(iter/s)": 0.0223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.375, "completions/mean_length": 70.08333539962769, "completions/min_length": 31.25, "epoch": 1.2223876892529164, "grad_norm": 0.009778015591590647, "kl": 0.096923828125, "learning_rate": 9.686725115285753e-07, "loss": 9.710974700283259e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 615, "train_speed(iter/s)": 0.022301 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.375, "completions/mean_length": 68.37500286102295, "completions/min_length": 27.625, "epoch": 1.224373293621246, "grad_norm": 0.008639201113997063, "kl": 0.069488525390625, "learning_rate": 9.685625027203717e-07, "loss": 6.949243106646463e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 616, "train_speed(iter/s)": 0.022302 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.125, "completions/mean_length": 76.88541889190674, "completions/min_length": 29.5, "epoch": 1.2263588979895756, "grad_norm": 0.7601950132829342, "kl": 0.092315673828125, "learning_rate": 9.684523073674279e-07, "loss": 0.00995372049510479, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 617, "train_speed(iter/s)": 0.0223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 82.02083492279053, "completions/min_length": 27.375, "epoch": 1.2283445023579052, "grad_norm": 0.006405114926944636, "kl": 0.074371337890625, "learning_rate": 9.683419255136144e-07, "loss": 7.436737359967083e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 618, "train_speed(iter/s)": 0.022299 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/mean_length": 61.82291793823242, "completions/min_length": 24.75, "epoch": 1.2303301067262349, "grad_norm": 0.008953339752428532, "kl": 0.0848388671875, "learning_rate": 9.682313572028767e-07, "loss": 8.475745562463999e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 619, "train_speed(iter/s)": 0.022304 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 71.10416841506958, "completions/min_length": 30.875, "epoch": 1.2323157110945644, "grad_norm": 1.2336909977040258, "kl": 0.095855712890625, "learning_rate": 9.681206024792346e-07, "loss": 0.01599309965968132, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 620, "train_speed(iter/s)": 0.022308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 66.40625143051147, "completions/min_length": 25.75, "epoch": 1.234301315462894, "grad_norm": 0.0070794522303797664, "kl": 0.08160400390625, "learning_rate": 9.680096613867818e-07, "loss": 8.146717300405726e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 621, "train_speed(iter/s)": 0.022305 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.875, "completions/mean_length": 80.18750190734863, "completions/min_length": 27.125, "epoch": 1.2362869198312236, "grad_norm": 0.005021899244058689, "kl": 0.08172607421875, "learning_rate": 9.678985339696864e-07, "loss": 8.161667210515589e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 622, "train_speed(iter/s)": 0.022304 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 65.94791841506958, "completions/min_length": 25.625, "epoch": 1.2382725241995534, "grad_norm": 2.2559389874404454, "kl": 0.101318359375, "learning_rate": 9.677872202721903e-07, "loss": 0.015005329623818398, "memory(GiB)": 94.21, "reward": 1.9062500149011612, "reward_std": 0.09878238290548325, "rewards/CineAccuracyORM/mean": 0.9062500074505806, "rewards/CineAccuracyORM/std": 0.16673530638217926, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 623, "train_speed(iter/s)": 0.022306 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 67.87500333786011, "completions/min_length": 27.5, "epoch": 1.2402581285678829, "grad_norm": 0.9838700404438614, "kl": 0.10595703125, "learning_rate": 9.676757203386106e-07, "loss": 0.009951414540410042, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 624, "train_speed(iter/s)": 0.022308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 68.48958444595337, "completions/min_length": 30.25, "epoch": 1.2422437329362124, "grad_norm": 0.012438820543306112, "kl": 0.076507568359375, "learning_rate": 9.67564034213337e-07, "loss": 7.641559932380915e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 625, "train_speed(iter/s)": 0.022312 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 61.42708492279053, "completions/min_length": 28.75, "epoch": 1.244229337304542, "grad_norm": 0.02635202543200512, "kl": 0.090057373046875, "learning_rate": 9.674521619408345e-07, "loss": 9.003737068269402e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 626, "train_speed(iter/s)": 0.022316 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 72.395836353302, "completions/min_length": 24.625, "epoch": 1.2462149416728716, "grad_norm": 0.04149972290410046, "kl": 0.112823486328125, "learning_rate": 9.673401035656418e-07, "loss": 0.00011268883099546656, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 627, "train_speed(iter/s)": 0.022319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 65.80208492279053, "completions/min_length": 28.5, "epoch": 1.2482005460412013, "grad_norm": 0.8450286055869382, "kl": 0.103271484375, "learning_rate": 9.672278591323715e-07, "loss": -0.00820427667349577, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 628, "train_speed(iter/s)": 0.022321 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.875, "completions/mean_length": 60.72916793823242, "completions/min_length": 28.625, "epoch": 1.2501861504095308, "grad_norm": 0.008435497557858766, "kl": 0.079864501953125, "learning_rate": 9.67115428685711e-07, "loss": 7.984022522578016e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 629, "train_speed(iter/s)": 0.022323 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 66.71875143051147, "completions/min_length": 31.125, "epoch": 1.2521717547778606, "grad_norm": 2.9675699842817203, "kl": 0.09912109375, "learning_rate": 9.670028122704208e-07, "loss": 0.001313959015533328, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166669771075, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 630, "train_speed(iter/s)": 0.022322 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 72.27083539962769, "completions/min_length": 29.375, "epoch": 1.25415735914619, "grad_norm": 0.008018207494434805, "kl": 0.07928466796875, "learning_rate": 9.668900099313363e-07, "loss": 7.921218639239669e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 631, "train_speed(iter/s)": 0.022319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 240.625, "completions/mean_length": 74.53125381469727, "completions/min_length": 26.125, "epoch": 1.2561429635145198, "grad_norm": 0.8151050059746938, "kl": 0.11920166015625, "learning_rate": 9.667770217133662e-07, "loss": 0.027313342317938805, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.07654655165970325, "rewards/CineAccuracyORM/mean": 0.9375000074505806, "rewards/CineAccuracyORM/std": 0.10045047104358673, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 632, "train_speed(iter/s)": 0.022312 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 64.94791793823242, "completions/min_length": 31.25, "epoch": 1.2581285678828493, "grad_norm": 0.010397980907964339, "kl": 0.077301025390625, "learning_rate": 9.666638476614936e-07, "loss": 7.72877101553604e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 633, "train_speed(iter/s)": 0.022313 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 70.40625238418579, "completions/min_length": 31.625, "epoch": 1.2601141722511788, "grad_norm": 1.4435889506206374, "kl": 0.09027099609375, "learning_rate": 9.665504878207756e-07, "loss": -0.0059862323105335236, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 634, "train_speed(iter/s)": 0.022315 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.25, "completions/mean_length": 70.85416841506958, "completions/min_length": 25.875, "epoch": 1.2620997766195086, "grad_norm": 1.2504565080965673, "kl": 0.083251953125, "learning_rate": 9.664369422363429e-07, "loss": 0.008449604734778404, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 635, "train_speed(iter/s)": 0.022315 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 67.39583539962769, "completions/min_length": 29.0, "epoch": 1.2640853809878383, "grad_norm": 0.007656929688250345, "kl": 0.10076904296875, "learning_rate": 9.663232109534009e-07, "loss": 0.00010077543265651911, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 636, "train_speed(iter/s)": 0.022318 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.875, "completions/mean_length": 65.04166841506958, "completions/min_length": 30.0, "epoch": 1.2660709853561678, "grad_norm": 1.46198711903342, "kl": 0.101531982421875, "learning_rate": 9.662092940172282e-07, "loss": 0.0019501249771565199, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 637, "train_speed(iter/s)": 0.022324 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.875, "completions/mean_length": 68.06250190734863, "completions/min_length": 30.0, "epoch": 1.2680565897244973, "grad_norm": 0.8390499127228502, "kl": 0.099578857421875, "learning_rate": 9.660951914731774e-07, "loss": -0.005982631351798773, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 638, "train_speed(iter/s)": 0.022323 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 68.42708587646484, "completions/min_length": 31.25, "epoch": 1.270042194092827, "grad_norm": 1.6806593559938343, "kl": 0.07940673828125, "learning_rate": 9.659809033666753e-07, "loss": -0.0035557765513658524, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 639, "train_speed(iter/s)": 0.022322 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.125, "completions/mean_length": 76.55208492279053, "completions/min_length": 33.875, "epoch": 1.2720277984611565, "grad_norm": 0.8663757649264218, "kl": 0.078521728515625, "learning_rate": 9.658664297432225e-07, "loss": 7.8544020652771e-05, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 640, "train_speed(iter/s)": 0.022317 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.75, "completions/mean_length": 61.333335399627686, "completions/min_length": 27.75, "epoch": 1.2740134028294863, "grad_norm": 0.008434956470041294, "kl": 0.07586669921875, "learning_rate": 9.657517706483934e-07, "loss": 7.581163663417101e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 641, "train_speed(iter/s)": 0.02232 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.875, "completions/mean_length": 62.70833396911621, "completions/min_length": 30.0, "epoch": 1.2759990071978158, "grad_norm": 0.010045195652376267, "kl": 0.085357666015625, "learning_rate": 9.656369261278361e-07, "loss": 8.534900553058833e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 642, "train_speed(iter/s)": 0.022321 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 68.27083444595337, "completions/min_length": 26.125, "epoch": 1.2779846115661455, "grad_norm": 1.0554497550881037, "kl": 0.11859130859375, "learning_rate": 9.655218962272728e-07, "loss": -0.010795101523399353, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 643, "train_speed(iter/s)": 0.022319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 75.36458539962769, "completions/min_length": 27.375, "epoch": 1.279970215934475, "grad_norm": 0.009788075197411747, "kl": 0.11468505859375, "learning_rate": 9.654066809924992e-07, "loss": 0.00011463784903753549, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 644, "train_speed(iter/s)": 0.022323 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.375, "completions/mean_length": 57.156251430511475, "completions/min_length": 25.375, "epoch": 1.2819558203028047, "grad_norm": 0.010643797160266798, "kl": 0.094451904296875, "learning_rate": 9.65291280469385e-07, "loss": 9.443907765671611e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 645, "train_speed(iter/s)": 0.022326 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.25, "completions/mean_length": 55.677085399627686, "completions/min_length": 26.5, "epoch": 1.2839414246711343, "grad_norm": 0.011833210739744362, "kl": 0.13201904296875, "learning_rate": 9.651756947038738e-07, "loss": 0.00013195353676564991, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 646, "train_speed(iter/s)": 0.022328 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 60.9791693687439, "completions/min_length": 28.5, "epoch": 1.2859270290394638, "grad_norm": 0.011210033476712178, "kl": 0.114990234375, "learning_rate": 9.650599237419827e-07, "loss": 0.00011511107732076198, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 647, "train_speed(iter/s)": 0.02233 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.625, "completions/mean_length": 72.83333444595337, "completions/min_length": 27.125, "epoch": 1.2879126334077935, "grad_norm": 0.010959313179157357, "kl": 0.12579345703125, "learning_rate": 9.649439676298022e-07, "loss": 0.0001258813717868179, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 648, "train_speed(iter/s)": 0.022331 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 62.77083492279053, "completions/min_length": 25.75, "epoch": 1.2898982377761232, "grad_norm": 2.044842511411833, "kl": 0.23211669921875, "learning_rate": 9.648278264134975e-07, "loss": 0.008665397763252258, "memory(GiB)": 94.21, "reward": 1.7916666865348816, "reward_std": 0.12429790198802948, "rewards/CineAccuracyORM/mean": 0.7916666753590107, "rewards/CineAccuracyORM/std": 0.2680988162755966, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 649, "train_speed(iter/s)": 0.022336 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.875, "completions/mean_length": 59.32291889190674, "completions/min_length": 26.25, "epoch": 1.2918838421444527, "grad_norm": 0.011490252626536829, "kl": 0.11663818359375, "learning_rate": 9.647115001393065e-07, "loss": 0.00011668381921481341, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 650, "train_speed(iter/s)": 0.022339 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.875, "completions/mean_length": 59.2604193687439, "completions/min_length": 24.25, "epoch": 1.2938694465127822, "grad_norm": 0.012767910335676542, "kl": 0.15509033203125, "learning_rate": 9.645949888535412e-07, "loss": 0.00015494822582695633, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 651, "train_speed(iter/s)": 0.022345 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.75, "completions/mean_length": 58.35416841506958, "completions/min_length": 33.0, "epoch": 1.295855050881112, "grad_norm": 1.2624373647543505, "kl": 0.1265869140625, "learning_rate": 9.644782926025876e-07, "loss": -0.0012904548784717917, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7604166669771075, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 652, "train_speed(iter/s)": 0.022347 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.625, "completions/mean_length": 59.937501430511475, "completions/min_length": 22.875, "epoch": 1.2978406552494415, "grad_norm": 0.011365666925268391, "kl": 0.1207275390625, "learning_rate": 9.643614114329045e-07, "loss": 0.00012068400246789679, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 653, "train_speed(iter/s)": 0.022347 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 241.875, "completions/mean_length": 68.31250190734863, "completions/min_length": 26.625, "epoch": 1.2998262596177712, "grad_norm": 0.44431536708994884, "kl": 0.26446533203125, "learning_rate": 9.642443453910248e-07, "loss": 0.022468041628599167, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 654, "train_speed(iter/s)": 0.022341 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 56.14583492279053, "completions/min_length": 27.25, "epoch": 1.3018118639861007, "grad_norm": 2.4951135790609826, "kl": 0.13372802734375, "learning_rate": 9.641270945235553e-07, "loss": -0.005837704055011272, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.09202304109930992, "rewards/CineAccuracyORM/mean": 0.7708333395421505, "rewards/CineAccuracyORM/std": 0.2407601661980152, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 655, "train_speed(iter/s)": 0.022338 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.125, "completions/mean_length": 58.32291793823242, "completions/min_length": 29.5, "epoch": 1.3037974683544304, "grad_norm": 0.7090318736946667, "kl": 0.1480712890625, "learning_rate": 9.64009658877176e-07, "loss": -0.018358584493398666, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 656, "train_speed(iter/s)": 0.022341 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.875, "completions/mean_length": 56.489585399627686, "completions/min_length": 27.625, "epoch": 1.30578307272276, "grad_norm": 0.012592383066213889, "kl": 0.140380859375, "learning_rate": 9.638920384986406e-07, "loss": 0.00014031633327249438, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 657, "train_speed(iter/s)": 0.022349 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.0, "completions/mean_length": 62.98958492279053, "completions/min_length": 28.0, "epoch": 1.3077686770910897, "grad_norm": 0.6508886062228177, "kl": 0.1754150390625, "learning_rate": 9.63774233434776e-07, "loss": -0.000887518166564405, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 658, "train_speed(iter/s)": 0.02235 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.625, "completions/mean_length": 57.90625238418579, "completions/min_length": 25.75, "epoch": 1.3097542814594192, "grad_norm": 1.953887439658314, "kl": 0.2039794921875, "learning_rate": 9.636562437324831e-07, "loss": -0.02238425426185131, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.05779037997126579, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24358049780130386, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 659, "train_speed(iter/s)": 0.022354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 61.87500047683716, "completions/min_length": 24.125, "epoch": 1.3117398858277487, "grad_norm": 1.4339302867240842, "kl": 0.1761474609375, "learning_rate": 9.63538069438736e-07, "loss": 0.0018901865696534514, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.833333333954215, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 660, "train_speed(iter/s)": 0.022355 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.75, "completions/mean_length": 53.97916793823242, "completions/min_length": 22.5, "epoch": 1.3137254901960784, "grad_norm": 0.013851973952639802, "kl": 0.205810546875, "learning_rate": 9.634197106005829e-07, "loss": 0.00020593572116922587, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 661, "train_speed(iter/s)": 0.022355 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 64.26041746139526, "completions/min_length": 29.25, "epoch": 1.3157110945644082, "grad_norm": 0.7603876055571326, "kl": 0.1680908203125, "learning_rate": 9.633011672651442e-07, "loss": -0.015636317431926727, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6666666679084301, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 662, "train_speed(iter/s)": 0.022355 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.25, "completions/mean_length": 52.60416793823242, "completions/min_length": 27.0, "epoch": 1.3176966989327377, "grad_norm": 0.7957485608138803, "kl": 0.18548583984375, "learning_rate": 9.631824394796151e-07, "loss": 0.009681493043899536, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 663, "train_speed(iter/s)": 0.022355 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.0, "completions/mean_length": 60.11458492279053, "completions/min_length": 27.375, "epoch": 1.3196823033010672, "grad_norm": 0.01204786438804525, "kl": 0.1971435546875, "learning_rate": 9.630635272912634e-07, "loss": 0.00019730752683244646, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 664, "train_speed(iter/s)": 0.022354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.375, "completions/mean_length": 54.739585399627686, "completions/min_length": 28.25, "epoch": 1.321667907669397, "grad_norm": 0.013205136110286331, "kl": 0.2052001953125, "learning_rate": 9.629444307474307e-07, "loss": 0.00020542668062262237, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 665, "train_speed(iter/s)": 0.022356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.375, "completions/mean_length": 60.54166889190674, "completions/min_length": 28.75, "epoch": 1.3236535120377264, "grad_norm": 0.012281741987093421, "kl": 0.1839599609375, "learning_rate": 9.62825149895532e-07, "loss": 0.0001841779303504154, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 666, "train_speed(iter/s)": 0.022357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.0, "completions/mean_length": 55.83333492279053, "completions/min_length": 28.375, "epoch": 1.3256391164060561, "grad_norm": 0.013486519715070807, "kl": 0.23095703125, "learning_rate": 9.627056847830553e-07, "loss": 0.00023079430684447289, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 667, "train_speed(iter/s)": 0.022363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 70.35416793823242, "completions/min_length": 30.75, "epoch": 1.3276247207743856, "grad_norm": 0.011119500547131964, "kl": 0.181396484375, "learning_rate": 9.625860354575623e-07, "loss": 0.00018149535753764212, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 668, "train_speed(iter/s)": 0.022363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.125, "completions/mean_length": 56.64583492279053, "completions/min_length": 32.625, "epoch": 1.3296103251427154, "grad_norm": 1.0749158168837043, "kl": 0.1949462890625, "learning_rate": 9.62466201966688e-07, "loss": 0.011314323171973228, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 669, "train_speed(iter/s)": 0.022367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.5, "completions/mean_length": 54.92708492279053, "completions/min_length": 24.75, "epoch": 1.3315959295110449, "grad_norm": 0.01345581265965735, "kl": 0.1976318359375, "learning_rate": 9.623461843581407e-07, "loss": 0.00019731343491002917, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 670, "train_speed(iter/s)": 0.022371 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.625, "completions/mean_length": 58.27083492279053, "completions/min_length": 28.125, "epoch": 1.3335815338793746, "grad_norm": 2.2330408127778196, "kl": 0.207763671875, "learning_rate": 9.622259826797017e-07, "loss": 0.030535360798239708, "memory(GiB)": 94.21, "reward": 1.8541666865348816, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.18837061524391174, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 671, "train_speed(iter/s)": 0.022376 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.625, "completions/mean_length": 53.78125190734863, "completions/min_length": 28.25, "epoch": 1.3355671382477041, "grad_norm": 0.8557632294020242, "kl": 0.204345703125, "learning_rate": 9.621055969792262e-07, "loss": -0.0044748904183506966, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 672, "train_speed(iter/s)": 0.022381 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/mean_length": 57.927085399627686, "completions/min_length": 25.125, "epoch": 1.3375527426160336, "grad_norm": 1.5076789219125566, "kl": 0.24554443359375, "learning_rate": 9.619850273046425e-07, "loss": -0.007958360016345978, "memory(GiB)": 94.21, "reward": 1.6458333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.645833333954215, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 673, "train_speed(iter/s)": 0.022381 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.125, "completions/mean_length": 58.677085399627686, "completions/min_length": 24.625, "epoch": 1.3395383469843634, "grad_norm": 0.012931793869248696, "kl": 0.207763671875, "learning_rate": 9.618642737039512e-07, "loss": 0.0002075552474707365, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 674, "train_speed(iter/s)": 0.022386 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.25, "completions/mean_length": 51.82291841506958, "completions/min_length": 25.375, "epoch": 1.341523951352693, "grad_norm": 1.452857268047448, "kl": 0.1881103515625, "learning_rate": 9.617433362252277e-07, "loss": 0.0001881470379885286, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 675, "train_speed(iter/s)": 0.022389 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.375, "completions/mean_length": 47.687500953674316, "completions/min_length": 25.125, "epoch": 1.3435095557210226, "grad_norm": 0.014470825863822586, "kl": 0.235595703125, "learning_rate": 9.616222149166192e-07, "loss": 0.00023564421280752867, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 676, "train_speed(iter/s)": 0.022395 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.25, "completions/mean_length": 53.437501430511475, "completions/min_length": 25.75, "epoch": 1.345495160089352, "grad_norm": 0.013884520631398625, "kl": 0.2369384765625, "learning_rate": 9.61500909826347e-07, "loss": 0.00023682885512243956, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 677, "train_speed(iter/s)": 0.0224 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.625, "completions/mean_length": 48.10416793823242, "completions/min_length": 25.375, "epoch": 1.3474807644576818, "grad_norm": 1.9511758493697813, "kl": 0.2664794921875, "learning_rate": 9.61379421002705e-07, "loss": 0.003057637019082904, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.22787059843540192, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 678, "train_speed(iter/s)": 0.022407 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.25, "completions/mean_length": 52.458335876464844, "completions/min_length": 24.75, "epoch": 1.3494663688260113, "grad_norm": 1.0348479635751253, "kl": 0.2271728515625, "learning_rate": 9.61257748494061e-07, "loss": 0.009338478557765484, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 679, "train_speed(iter/s)": 0.022412 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.625, "completions/mean_length": 48.062500953674316, "completions/min_length": 25.25, "epoch": 1.351451973194341, "grad_norm": 2.2576541116933613, "kl": 0.2552490234375, "learning_rate": 9.61135892348855e-07, "loss": 0.012460976839065552, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.05974818021059036, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.15789688751101494, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 680, "train_speed(iter/s)": 0.022392 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.875, "completions/mean_length": 55.84375190734863, "completions/min_length": 25.25, "epoch": 1.3534375775626706, "grad_norm": 0.40038093795369606, "kl": 0.5283203125, "learning_rate": 9.610138526156005e-07, "loss": 0.0005270745605230331, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 681, "train_speed(iter/s)": 0.02237 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.75, "completions/mean_length": 55.281250953674316, "completions/min_length": 31.75, "epoch": 1.3554231819310003, "grad_norm": 0.01284877358526103, "kl": 0.21484375, "learning_rate": 9.608916293428842e-07, "loss": 0.00021472698426805437, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 682, "train_speed(iter/s)": 0.022347 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.5, "completions/mean_length": 54.14583444595337, "completions/min_length": 29.0, "epoch": 1.3574087862993298, "grad_norm": 0.013208244398555247, "kl": 0.218017578125, "learning_rate": 9.60769222579366e-07, "loss": 0.00021804519928991795, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 683, "train_speed(iter/s)": 0.022326 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.875, "completions/mean_length": 53.06250190734863, "completions/min_length": 26.375, "epoch": 1.3593943906676595, "grad_norm": 0.0136822725478887, "kl": 0.25439453125, "learning_rate": 9.606466323737784e-07, "loss": 0.00025410568923689425, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 684, "train_speed(iter/s)": 0.022301 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 55.22916841506958, "completions/min_length": 27.0, "epoch": 1.361379995035989, "grad_norm": 1.6210194059118768, "kl": 0.2738037109375, "learning_rate": 9.605238587749275e-07, "loss": -0.02679913304746151, "memory(GiB)": 94.21, "reward": 1.7500000149011612, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.306039284914732, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 685, "train_speed(iter/s)": 0.022292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 57.531251430511475, "completions/min_length": 27.875, "epoch": 1.3633655994043186, "grad_norm": 1.1716004831406452, "kl": 0.2353515625, "learning_rate": 9.604009018316914e-07, "loss": -0.0020323614589869976, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 686, "train_speed(iter/s)": 0.022297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.75, "completions/mean_length": 49.916667461395264, "completions/min_length": 26.75, "epoch": 1.3653512037726483, "grad_norm": 1.9434426537127008, "kl": 0.2640380859375, "learning_rate": 9.602777615930226e-07, "loss": 0.000371312111383304, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.05974818021059036, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.3820159323513508, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 687, "train_speed(iter/s)": 0.022305 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.625, "completions/mean_length": 54.08333444595337, "completions/min_length": 26.625, "epoch": 1.367336808140978, "grad_norm": 1.5204574081738305, "kl": 0.1988525390625, "learning_rate": 9.601544381079457e-07, "loss": -0.010278332978487015, "memory(GiB)": 94.21, "reward": 1.7812500149011612, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.15001969039440155, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 688, "train_speed(iter/s)": 0.022311 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.625, "completions/mean_length": 52.10416841506958, "completions/min_length": 25.625, "epoch": 1.3693224125093075, "grad_norm": 1.2317309898053894, "kl": 0.2557373046875, "learning_rate": 9.600309314255582e-07, "loss": -0.004393937066197395, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 689, "train_speed(iter/s)": 0.022316 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.0, "completions/mean_length": 56.86458492279053, "completions/min_length": 27.875, "epoch": 1.371308016877637, "grad_norm": 0.012609256713666556, "kl": 0.2109375, "learning_rate": 9.59907241595031e-07, "loss": 0.00021082788589410484, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 690, "train_speed(iter/s)": 0.022319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 56.91666841506958, "completions/min_length": 27.125, "epoch": 1.3732936212459668, "grad_norm": 1.1874806828738682, "kl": 0.2598876953125, "learning_rate": 9.59783368665607e-07, "loss": -0.007028396241366863, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 691, "train_speed(iter/s)": 0.022319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 56.15625190734863, "completions/min_length": 28.25, "epoch": 1.3752792256142963, "grad_norm": 0.012626782650926477, "kl": 0.197509765625, "learning_rate": 9.596593126866037e-07, "loss": 0.00019758034613914788, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 692, "train_speed(iter/s)": 0.022322 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.125, "completions/mean_length": 56.55208492279053, "completions/min_length": 27.25, "epoch": 1.377264829982626, "grad_norm": 0.01387648969366496, "kl": 0.2310791015625, "learning_rate": 9.595350737074099e-07, "loss": 0.00023103014973457903, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 693, "train_speed(iter/s)": 0.022322 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.875, "completions/mean_length": 56.47916793823242, "completions/min_length": 28.75, "epoch": 1.3792504343509555, "grad_norm": 0.01281697974194212, "kl": 0.224853515625, "learning_rate": 9.594106517774878e-07, "loss": 0.00022499411716125906, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 694, "train_speed(iter/s)": 0.022326 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.75, "completions/mean_length": 59.8229193687439, "completions/min_length": 30.75, "epoch": 1.3812360387192852, "grad_norm": 0.011790021274502231, "kl": 0.2052001953125, "learning_rate": 9.592860469463724e-07, "loss": 0.000205205287784338, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 695, "train_speed(iter/s)": 0.02233 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.0, "completions/mean_length": 53.59375190734863, "completions/min_length": 25.75, "epoch": 1.3832216430876147, "grad_norm": 1.4718861644210572, "kl": 0.36279296875, "learning_rate": 9.591612592636714e-07, "loss": -0.004776414483785629, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 696, "train_speed(iter/s)": 0.022334 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.875, "completions/mean_length": 53.94791793823242, "completions/min_length": 25.25, "epoch": 1.3852072474559445, "grad_norm": 0.013371487625210303, "kl": 0.2022705078125, "learning_rate": 9.59036288779066e-07, "loss": 0.0002022602129727602, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 697, "train_speed(iter/s)": 0.022337 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.25, "completions/mean_length": 55.48958492279053, "completions/min_length": 27.5, "epoch": 1.387192851824274, "grad_norm": 0.012872637534962036, "kl": 0.242919921875, "learning_rate": 9.58911135542309e-07, "loss": 0.00024293379101436585, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 698, "train_speed(iter/s)": 0.022343 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 53.47916793823242, "completions/min_length": 27.5, "epoch": 1.3891784561926035, "grad_norm": 0.012993284476608873, "kl": 0.174560546875, "learning_rate": 9.587857996032269e-07, "loss": 0.0001747296191751957, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 699, "train_speed(iter/s)": 0.022343 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.375, "completions/mean_length": 53.02083444595337, "completions/min_length": 26.0, "epoch": 1.3911640605609332, "grad_norm": 0.013533895523861405, "kl": 0.18487548828125, "learning_rate": 9.586602810117185e-07, "loss": 0.0001847328821895644, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 700, "train_speed(iter/s)": 0.02235 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 59.052085399627686, "completions/min_length": 27.625, "epoch": 1.393149664929263, "grad_norm": 1.1541233708292815, "kl": 0.1884765625, "learning_rate": 9.585345798177554e-07, "loss": -0.014513371512293816, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.05974817834794521, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.15789688751101494, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 701, "train_speed(iter/s)": 0.022349 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.875, "completions/mean_length": 58.93750286102295, "completions/min_length": 29.125, "epoch": 1.3951352692975925, "grad_norm": 0.012091887388755121, "kl": 0.16741943359375, "learning_rate": 9.584086960713822e-07, "loss": 0.0001672252401476726, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 702, "train_speed(iter/s)": 0.022348 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.75, "completions/mean_length": 61.218751430511475, "completions/min_length": 27.625, "epoch": 1.397120873665922, "grad_norm": 0.01223138437901507, "kl": 0.1796875, "learning_rate": 9.582826298227157e-07, "loss": 0.00017950995243154466, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 703, "train_speed(iter/s)": 0.022352 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.625, "completions/mean_length": 54.343751430511475, "completions/min_length": 27.25, "epoch": 1.3991064780342517, "grad_norm": 0.012855877867865953, "kl": 0.1754150390625, "learning_rate": 9.581563811219453e-07, "loss": 0.00017559510888531804, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 704, "train_speed(iter/s)": 0.022355 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.125, "completions/mean_length": 51.04166793823242, "completions/min_length": 26.25, "epoch": 1.4010920824025812, "grad_norm": 0.013785684263398509, "kl": 0.14404296875, "learning_rate": 9.58029950019334e-07, "loss": 0.0001441091881133616, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 705, "train_speed(iter/s)": 0.022352 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.75, "completions/mean_length": 49.92708444595337, "completions/min_length": 25.625, "epoch": 1.403077686770911, "grad_norm": 0.014177132716538797, "kl": 0.1724853515625, "learning_rate": 9.579033365652158e-07, "loss": 0.00017262960318475962, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 706, "train_speed(iter/s)": 0.022356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/mean_length": 53.687500953674316, "completions/min_length": 31.0, "epoch": 1.4050632911392404, "grad_norm": 0.9219507527625238, "kl": 0.2100830078125, "learning_rate": 9.577765408099992e-07, "loss": -0.010596505366265774, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 707, "train_speed(iter/s)": 0.02236 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.875, "completions/mean_length": 56.604166984558105, "completions/min_length": 29.375, "epoch": 1.4070488955075702, "grad_norm": 0.012459113957852184, "kl": 0.1717529296875, "learning_rate": 9.576495628041635e-07, "loss": 0.00017167648184113204, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 708, "train_speed(iter/s)": 0.022365 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 53.58333444595337, "completions/min_length": 29.875, "epoch": 1.4090344998758997, "grad_norm": 1.1965680565612369, "kl": 0.14117431640625, "learning_rate": 9.575224025982618e-07, "loss": -0.007983053103089333, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 709, "train_speed(iter/s)": 0.022371 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.75, "completions/mean_length": 55.27083492279053, "completions/min_length": 32.0, "epoch": 1.4110201042442294, "grad_norm": 0.01235083862781987, "kl": 0.135986328125, "learning_rate": 9.573950602429191e-07, "loss": 0.00013611519534606487, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 710, "train_speed(iter/s)": 0.022373 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.25, "completions/mean_length": 50.78125190734863, "completions/min_length": 24.375, "epoch": 1.413005708612559, "grad_norm": 0.014431757532512577, "kl": 0.152587890625, "learning_rate": 9.572675357888333e-07, "loss": 0.00015238014748319983, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 711, "train_speed(iter/s)": 0.022379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.875, "completions/mean_length": 50.479167461395264, "completions/min_length": 27.375, "epoch": 1.4149913129808884, "grad_norm": 1.4901548759857677, "kl": 0.14459228515625, "learning_rate": 9.571398292867745e-07, "loss": 0.0016813030233606696, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 712, "train_speed(iter/s)": 0.022387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.5, "completions/mean_length": 56.32291793823242, "completions/min_length": 30.5, "epoch": 1.4169769173492182, "grad_norm": 0.011463893774063328, "kl": 0.1517333984375, "learning_rate": 9.570119407875852e-07, "loss": 0.0001517429482191801, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 713, "train_speed(iter/s)": 0.022389 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.75, "completions/mean_length": 54.98958492279053, "completions/min_length": 24.25, "epoch": 1.4189625217175479, "grad_norm": 0.010824340381178518, "kl": 0.12164306640625, "learning_rate": 9.568838703421808e-07, "loss": 0.0001216636155731976, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 714, "train_speed(iter/s)": 0.022393 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.5, "completions/mean_length": 56.72916889190674, "completions/min_length": 29.25, "epoch": 1.4209481260858774, "grad_norm": 1.3064436753768311, "kl": 0.137939453125, "learning_rate": 9.56755618001549e-07, "loss": -0.004561188630759716, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 715, "train_speed(iter/s)": 0.022399 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.125, "completions/mean_length": 50.92708444595337, "completions/min_length": 23.5, "epoch": 1.422933730454207, "grad_norm": 0.01112279629375603, "kl": 0.117431640625, "learning_rate": 9.566271838167495e-07, "loss": 0.00011737256863852963, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 716, "train_speed(iter/s)": 0.022406 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 54.177085876464844, "completions/min_length": 27.375, "epoch": 1.4249193348225366, "grad_norm": 1.0983939192808494, "kl": 0.26483154296875, "learning_rate": 9.564985678389146e-07, "loss": 0.0002649997768457979, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666679084301, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 717, "train_speed(iter/s)": 0.022412 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.625, "completions/mean_length": 47.11458444595337, "completions/min_length": 24.5, "epoch": 1.4269049391908661, "grad_norm": 0.011622590056280616, "kl": 0.10986328125, "learning_rate": 9.563697701192494e-07, "loss": 0.00010993148316629231, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 718, "train_speed(iter/s)": 0.022419 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.625, "completions/mean_length": 54.93750190734863, "completions/min_length": 29.125, "epoch": 1.4288905435591959, "grad_norm": 1.6695279450262681, "kl": 0.10980224609375, "learning_rate": 9.562407907090312e-07, "loss": 0.0017443995457142591, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 719, "train_speed(iter/s)": 0.022418 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.875, "completions/mean_length": 51.89583444595337, "completions/min_length": 26.25, "epoch": 1.4308761479275254, "grad_norm": 2.056668722315016, "kl": 0.13079833984375, "learning_rate": 9.561116296596085e-07, "loss": -0.0019818730652332306, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.05653337761759758, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 720, "train_speed(iter/s)": 0.022426 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.5, "completions/mean_length": 50.947917461395264, "completions/min_length": 25.875, "epoch": 1.432861752295855, "grad_norm": 0.9152808274658376, "kl": 0.13629150390625, "learning_rate": 9.55982287022404e-07, "loss": -0.0006033803219906986, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 721, "train_speed(iter/s)": 0.022433 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.375, "completions/mean_length": 53.72916793823242, "completions/min_length": 29.0, "epoch": 1.4348473566641846, "grad_norm": 0.988844908675142, "kl": 0.11163330078125, "learning_rate": 9.558527628489117e-07, "loss": 0.0013558641076087952, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 722, "train_speed(iter/s)": 0.022439 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.125, "completions/mean_length": 53.95833444595337, "completions/min_length": 30.5, "epoch": 1.4368329610325143, "grad_norm": 0.008405923776666074, "kl": 0.1568603515625, "learning_rate": 9.557230571906975e-07, "loss": 0.00015692139277234674, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 723, "train_speed(iter/s)": 0.022444 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.5, "completions/mean_length": 51.208335399627686, "completions/min_length": 26.625, "epoch": 1.4388185654008439, "grad_norm": 0.0078047463549477045, "kl": 0.1087646484375, "learning_rate": 9.555931700994004e-07, "loss": 0.00010875804582610726, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 724, "train_speed(iter/s)": 0.022444 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 59.73958444595337, "completions/min_length": 28.75, "epoch": 1.4408041697691734, "grad_norm": 0.021594558458315073, "kl": 0.13958740234375, "learning_rate": 9.554631016267308e-07, "loss": 0.00013938584015704691, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 725, "train_speed(iter/s)": 0.022445 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.125, "completions/mean_length": 52.312501430511475, "completions/min_length": 21.25, "epoch": 1.442789774137503, "grad_norm": 0.015217344707581466, "kl": 0.16424560546875, "learning_rate": 9.55332851824472e-07, "loss": 0.0001644698641030118, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 726, "train_speed(iter/s)": 0.02245 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.125, "completions/mean_length": 55.10416793823242, "completions/min_length": 30.375, "epoch": 1.4447753785058328, "grad_norm": 0.8643791074440206, "kl": 0.18280029296875, "learning_rate": 9.552024207444794e-07, "loss": -0.005577507428824902, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 727, "train_speed(iter/s)": 0.022455 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.25, "completions/mean_length": 50.85416793823242, "completions/min_length": 25.625, "epoch": 1.4467609828741623, "grad_norm": 1.1838750787173362, "kl": 0.11004638671875, "learning_rate": 9.5507180843868e-07, "loss": -0.0038419279735535383, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 728, "train_speed(iter/s)": 0.022458 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.5, "completions/mean_length": 56.19791793823242, "completions/min_length": 25.25, "epoch": 1.4487465872424918, "grad_norm": 0.8455386843974868, "kl": 0.13897705078125, "learning_rate": 9.549410149590737e-07, "loss": 0.000139145806315355, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 729, "train_speed(iter/s)": 0.022459 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 61.56250190734863, "completions/min_length": 28.5, "epoch": 1.4507321916108216, "grad_norm": 0.009484025716301098, "kl": 0.09088134765625, "learning_rate": 9.54810040357732e-07, "loss": 9.089943341678008e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 730, "train_speed(iter/s)": 0.022455 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.25, "completions/mean_length": 54.593751430511475, "completions/min_length": 26.25, "epoch": 1.452717795979151, "grad_norm": 0.00839230345880413, "kl": 0.145782470703125, "learning_rate": 9.546788846867987e-07, "loss": 0.0001458572514820844, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 731, "train_speed(iter/s)": 0.022457 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.75, "completions/mean_length": 55.67708492279053, "completions/min_length": 27.0, "epoch": 1.4547034003474808, "grad_norm": 0.007607238266323172, "kl": 0.12261962890625, "learning_rate": 9.545475479984898e-07, "loss": 0.0001225980813615024, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 732, "train_speed(iter/s)": 0.022461 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.25, "completions/mean_length": 59.11458492279053, "completions/min_length": 31.5, "epoch": 1.4566890047158103, "grad_norm": 0.007020407001812986, "kl": 0.088836669921875, "learning_rate": 9.544160303450927e-07, "loss": 8.882784459274262e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 733, "train_speed(iter/s)": 0.022466 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.5, "completions/mean_length": 55.541667461395264, "completions/min_length": 29.5, "epoch": 1.45867460908414, "grad_norm": 0.008425255303198686, "kl": 0.14459228515625, "learning_rate": 9.542843317789683e-07, "loss": 0.00014448503497987986, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 734, "train_speed(iter/s)": 0.022471 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 63.09375238418579, "completions/min_length": 31.0, "epoch": 1.4606602134524695, "grad_norm": 0.007488147097384869, "kl": 0.12017822265625, "learning_rate": 9.54152452352548e-07, "loss": 0.00012000129208900034, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 735, "train_speed(iter/s)": 0.022473 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.75, "completions/mean_length": 54.468750953674316, "completions/min_length": 24.0, "epoch": 1.4626458178207993, "grad_norm": 1.0455099198461375, "kl": 0.11578369140625, "learning_rate": 9.540203921183358e-07, "loss": -0.0001914985477924347, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 736, "train_speed(iter/s)": 0.022473 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.125, "completions/mean_length": 57.09375190734863, "completions/min_length": 29.125, "epoch": 1.4646314221891288, "grad_norm": 2.284600818709652, "kl": 0.08966064453125, "learning_rate": 9.538881511289078e-07, "loss": 0.0029439865611493587, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 737, "train_speed(iter/s)": 0.022481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/mean_length": 56.66666793823242, "completions/min_length": 28.375, "epoch": 1.4666170265574583, "grad_norm": 1.5075898859184218, "kl": 0.1265869140625, "learning_rate": 9.537557294369122e-07, "loss": 0.0031701817642897367, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.770833333954215, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 738, "train_speed(iter/s)": 0.022481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.5, "completions/mean_length": 54.81250190734863, "completions/min_length": 30.625, "epoch": 1.468602630925788, "grad_norm": 0.01026436959963118, "kl": 0.1094970703125, "learning_rate": 9.536231270950688e-07, "loss": 0.00010945153189823031, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 739, "train_speed(iter/s)": 0.022483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.75, "completions/mean_length": 57.13541841506958, "completions/min_length": 26.125, "epoch": 1.4705882352941178, "grad_norm": 0.9306134003713924, "kl": 0.12677001953125, "learning_rate": 9.534903441561692e-07, "loss": -0.0013227922609075904, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 740, "train_speed(iter/s)": 0.022491 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.25, "completions/mean_length": 54.27083444595337, "completions/min_length": 30.625, "epoch": 1.4725738396624473, "grad_norm": 0.010444108516729786, "kl": 0.1226806640625, "learning_rate": 9.533573806730773e-07, "loss": 0.00012264520046301186, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 741, "train_speed(iter/s)": 0.022496 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/mean_length": 56.531250953674316, "completions/min_length": 28.75, "epoch": 1.4745594440307768, "grad_norm": 0.009595558594323825, "kl": 0.096923828125, "learning_rate": 9.532242366987286e-07, "loss": 9.700939699541777e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 742, "train_speed(iter/s)": 0.022501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.625, "completions/mean_length": 56.250001430511475, "completions/min_length": 29.5, "epoch": 1.4765450483991065, "grad_norm": 0.007014834369061131, "kl": 0.10565185546875, "learning_rate": 9.530909122861306e-07, "loss": 0.00010564276453806087, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 743, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.75, "completions/mean_length": 60.10416841506958, "completions/min_length": 30.25, "epoch": 1.4785306527674362, "grad_norm": 0.007026090773405848, "kl": 0.0994873046875, "learning_rate": 9.529574074883627e-07, "loss": 9.951293759513646e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 744, "train_speed(iter/s)": 0.02251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.625, "completions/mean_length": 59.06250047683716, "completions/min_length": 26.875, "epoch": 1.4805162571357657, "grad_norm": 0.009650444704665776, "kl": 0.14251708984375, "learning_rate": 9.528237223585759e-07, "loss": 0.00014239756274037063, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 745, "train_speed(iter/s)": 0.022513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.875, "completions/mean_length": 55.15625047683716, "completions/min_length": 28.25, "epoch": 1.4825018615040952, "grad_norm": 0.009541456569087016, "kl": 0.12774658203125, "learning_rate": 9.526898569499931e-07, "loss": 0.00012780143879354, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 746, "train_speed(iter/s)": 0.022517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.875, "completions/mean_length": 58.93750047683716, "completions/min_length": 28.25, "epoch": 1.484487465872425, "grad_norm": 0.93812284308122, "kl": 0.11083984375, "learning_rate": 9.525558113159091e-07, "loss": 0.0067501007579267025, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 747, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.125, "completions/mean_length": 58.510417461395264, "completions/min_length": 28.5, "epoch": 1.4864730702407545, "grad_norm": 0.008629387575274932, "kl": 0.12152099609375, "learning_rate": 9.524215855096903e-07, "loss": 0.00012160909682279453, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 748, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.875, "completions/mean_length": 58.94791889190674, "completions/min_length": 27.75, "epoch": 1.4884586746090842, "grad_norm": 1.5815851591475962, "kl": 0.1556396484375, "learning_rate": 9.522871795847747e-07, "loss": -0.01076371781527996, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.05974817834794521, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.18617857620120049, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 749, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.125, "completions/mean_length": 53.20833396911621, "completions/min_length": 28.875, "epoch": 1.4904442789774137, "grad_norm": 0.009283020085570033, "kl": 0.1112060546875, "learning_rate": 9.521525935946722e-07, "loss": 0.00011112240463262424, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 750, "train_speed(iter/s)": 0.022534 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 61.812500953674316, "completions/min_length": 27.875, "epoch": 1.4924298833457432, "grad_norm": 0.009667783486176394, "kl": 0.12408447265625, "learning_rate": 9.520178275929647e-07, "loss": 0.00012400734703987837, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 751, "train_speed(iter/s)": 0.022535 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.125, "completions/mean_length": 62.20833492279053, "completions/min_length": 26.0, "epoch": 1.494415487714073, "grad_norm": 0.009630252798554353, "kl": 0.11883544921875, "learning_rate": 9.518828816333049e-07, "loss": 0.00011870354501297697, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 752, "train_speed(iter/s)": 0.022533 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 65.60416889190674, "completions/min_length": 30.625, "epoch": 1.4964010920824027, "grad_norm": 0.007012520130989951, "kl": 0.11419677734375, "learning_rate": 9.51747755769418e-07, "loss": 0.00011400604125810787, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 753, "train_speed(iter/s)": 0.022535 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.0, "completions/mean_length": 60.791667461395264, "completions/min_length": 29.0, "epoch": 1.4983866964507322, "grad_norm": 0.015739227950615216, "kl": 0.113037109375, "learning_rate": 9.516124500551004e-07, "loss": 0.00011308235116302967, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 754, "train_speed(iter/s)": 0.022533 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.875, "completions/mean_length": 61.89583444595337, "completions/min_length": 30.125, "epoch": 1.5003723008190617, "grad_norm": 0.007121488855440908, "kl": 0.11822509765625, "learning_rate": 9.514769645442202e-07, "loss": 0.00011818177154054865, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 755, "train_speed(iter/s)": 0.022536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.125, "completions/mean_length": 67.68750190734863, "completions/min_length": 33.625, "epoch": 1.5023579051873914, "grad_norm": 0.6177844241306097, "kl": 0.1590576171875, "learning_rate": 9.513412992907173e-07, "loss": 0.011229190975427628, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 756, "train_speed(iter/s)": 0.022541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 65.46875143051147, "completions/min_length": 31.875, "epoch": 1.5043435095557212, "grad_norm": 0.9274971643782269, "kl": 0.122314453125, "learning_rate": 9.512054543486025e-07, "loss": -0.00040559968329034746, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 757, "train_speed(iter/s)": 0.022541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 63.697919845581055, "completions/min_length": 31.5, "epoch": 1.5063291139240507, "grad_norm": 0.901867771077204, "kl": 0.11737060546875, "learning_rate": 9.510694297719588e-07, "loss": -0.004113540053367615, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 758, "train_speed(iter/s)": 0.022543 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.875, "completions/mean_length": 59.7604193687439, "completions/min_length": 30.375, "epoch": 1.5083147182923802, "grad_norm": 0.009326167634635434, "kl": 0.13714599609375, "learning_rate": 9.509332256149406e-07, "loss": 0.0001371256948914379, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 759, "train_speed(iter/s)": 0.022544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.25, "completions/mean_length": 63.15625190734863, "completions/min_length": 25.25, "epoch": 1.51030032266071, "grad_norm": 0.9037795662553252, "kl": 0.111083984375, "learning_rate": 9.507968419317736e-07, "loss": -0.004540104418992996, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 760, "train_speed(iter/s)": 0.022546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.75, "completions/mean_length": 61.89583492279053, "completions/min_length": 29.875, "epoch": 1.5122859270290394, "grad_norm": 1.4719736831373125, "kl": 0.12567138671875, "learning_rate": 9.506602787767549e-07, "loss": -0.008734042756259441, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.6979166679084301, "rewards/CineAccuracyORM/std": 0.31391648203134537, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 761, "train_speed(iter/s)": 0.02255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 63.96875190734863, "completions/min_length": 30.75, "epoch": 1.5142715313973691, "grad_norm": 0.014517003826497215, "kl": 0.12335205078125, "learning_rate": 9.505235362042534e-07, "loss": 0.00012353870260994881, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 762, "train_speed(iter/s)": 0.02255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.125, "completions/mean_length": 55.04166841506958, "completions/min_length": 29.125, "epoch": 1.5162571357656986, "grad_norm": 1.0381968233899606, "kl": 0.111572265625, "learning_rate": 9.503866142687091e-07, "loss": -0.002629011869430542, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 763, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 64.32291889190674, "completions/min_length": 29.375, "epoch": 1.5182427401340282, "grad_norm": 0.042451780322132066, "kl": 0.1676025390625, "learning_rate": 9.502495130246338e-07, "loss": 0.00016781894373707473, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 764, "train_speed(iter/s)": 0.022555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.375, "completions/mean_length": 61.552085399627686, "completions/min_length": 29.875, "epoch": 1.5202283445023579, "grad_norm": 0.0132875276350042, "kl": 0.1226806640625, "learning_rate": 9.501122325266103e-07, "loss": 0.00012267596321180463, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 765, "train_speed(iter/s)": 0.022558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 63.85416889190674, "completions/min_length": 31.25, "epoch": 1.5222139488706876, "grad_norm": 0.017579931638466623, "kl": 0.14801025390625, "learning_rate": 9.499747728292927e-07, "loss": 0.0001481170766055584, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 766, "train_speed(iter/s)": 0.022559 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.375, "completions/mean_length": 67.6354193687439, "completions/min_length": 36.25, "epoch": 1.5241995532390171, "grad_norm": 0.017843904723149635, "kl": 0.1181640625, "learning_rate": 9.498371339874068e-07, "loss": 0.0001180471372208558, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 767, "train_speed(iter/s)": 0.022559 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.375, "completions/mean_length": 60.78125047683716, "completions/min_length": 30.125, "epoch": 1.5261851576073466, "grad_norm": 1.0855973909512628, "kl": 0.158935546875, "learning_rate": 9.496993160557494e-07, "loss": 0.00015877436089795083, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.770833333954215, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 768, "train_speed(iter/s)": 0.02256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.875, "completions/mean_length": 63.177085399627686, "completions/min_length": 29.875, "epoch": 1.5281707619756764, "grad_norm": 0.013352749438859125, "kl": 0.12994384765625, "learning_rate": 9.495613190891891e-07, "loss": 0.00013011903502047062, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 769, "train_speed(iter/s)": 0.022562 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.5, "completions/mean_length": 56.41666841506958, "completions/min_length": 26.625, "epoch": 1.530156366344006, "grad_norm": 0.02242754711233612, "kl": 0.12506103515625, "learning_rate": 9.494231431426654e-07, "loss": 0.00012502526806201786, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 770, "train_speed(iter/s)": 0.022564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 71.01041746139526, "completions/min_length": 34.375, "epoch": 1.5321419707123356, "grad_norm": 0.00971207049700734, "kl": 0.14654541015625, "learning_rate": 9.492847882711888e-07, "loss": 0.00014651428500656039, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 771, "train_speed(iter/s)": 0.022568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 64.22916793823242, "completions/min_length": 30.875, "epoch": 1.534127575080665, "grad_norm": 0.8385842916742702, "kl": 0.1278076171875, "learning_rate": 9.491462545298415e-07, "loss": 0.0051539079286158085, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.708333333954215, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 772, "train_speed(iter/s)": 0.022567 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 69.87500143051147, "completions/min_length": 31.5, "epoch": 1.5361131794489948, "grad_norm": 0.7089185559306522, "kl": 0.092315673828125, "learning_rate": 9.490075419737767e-07, "loss": -0.00862202700227499, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 773, "train_speed(iter/s)": 0.022569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.625, "completions/mean_length": 71.97916841506958, "completions/min_length": 34.625, "epoch": 1.5380987838173243, "grad_norm": 0.009889296254899278, "kl": 0.13079833984375, "learning_rate": 9.488686506582188e-07, "loss": 0.0001309265790041536, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 774, "train_speed(iter/s)": 0.022568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 65.6666693687439, "completions/min_length": 33.75, "epoch": 1.540084388185654, "grad_norm": 0.011404412506493187, "kl": 0.138671875, "learning_rate": 9.487295806384636e-07, "loss": 0.00013863175990991294, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 775, "train_speed(iter/s)": 0.022571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.375, "completions/mean_length": 58.78125190734863, "completions/min_length": 33.75, "epoch": 1.5420699925539836, "grad_norm": 0.011675313289628312, "kl": 0.12164306640625, "learning_rate": 9.485903319698776e-07, "loss": 0.00012157539458712563, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 776, "train_speed(iter/s)": 0.022572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.375, "completions/mean_length": 70.02083444595337, "completions/min_length": 35.0, "epoch": 1.544055596922313, "grad_norm": 0.009008345408477022, "kl": 0.1031494140625, "learning_rate": 9.484509047078989e-07, "loss": 0.00010315240069758147, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 777, "train_speed(iter/s)": 0.022573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.25, "completions/mean_length": 66.42708444595337, "completions/min_length": 31.875, "epoch": 1.5460412012906428, "grad_norm": 0.037744752841359024, "kl": 0.15655517578125, "learning_rate": 9.483112989080363e-07, "loss": 0.00015669949061702937, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 778, "train_speed(iter/s)": 0.022576 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.625, "completions/mean_length": 66.51041841506958, "completions/min_length": 33.75, "epoch": 1.5480268056589725, "grad_norm": 0.8543397654982704, "kl": 0.110107421875, "learning_rate": 9.481715146258699e-07, "loss": 0.00011021520185749978, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 779, "train_speed(iter/s)": 0.022573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.625, "completions/mean_length": 67.80208492279053, "completions/min_length": 33.75, "epoch": 1.550012410027302, "grad_norm": 1.0463599205333745, "kl": 0.16094970703125, "learning_rate": 9.480315519170508e-07, "loss": -0.01297999732196331, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.39076167345046997, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 780, "train_speed(iter/s)": 0.022572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 75.21875190734863, "completions/min_length": 35.75, "epoch": 1.5519980143956316, "grad_norm": 0.9815733350719363, "kl": 0.151611328125, "learning_rate": 9.478914108373011e-07, "loss": 0.006083416752517223, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 781, "train_speed(iter/s)": 0.022572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 66.48958492279053, "completions/min_length": 33.875, "epoch": 1.5539836187639613, "grad_norm": 0.008523084624338484, "kl": 0.107421875, "learning_rate": 9.477510914424141e-07, "loss": 0.00010765058686956763, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 782, "train_speed(iter/s)": 0.022572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.5, "completions/mean_length": 72.47916793823242, "completions/min_length": 32.5, "epoch": 1.555969223132291, "grad_norm": 0.010365001287214036, "kl": 0.1300048828125, "learning_rate": 9.476105937882537e-07, "loss": 0.00012992092524655163, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 783, "train_speed(iter/s)": 0.022571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.375, "completions/mean_length": 57.500001430511475, "completions/min_length": 30.625, "epoch": 1.5579548275006205, "grad_norm": 0.010265576048434191, "kl": 0.1270751953125, "learning_rate": 9.474699179307552e-07, "loss": 0.00012720597442239523, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 784, "train_speed(iter/s)": 0.022571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 74.21875286102295, "completions/min_length": 37.875, "epoch": 1.55994043186895, "grad_norm": 0.009511907231798171, "kl": 0.1357421875, "learning_rate": 9.473290639259248e-07, "loss": 0.00013579762890003622, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 785, "train_speed(iter/s)": 0.022574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.125, "completions/mean_length": 71.56250143051147, "completions/min_length": 28.875, "epoch": 1.5619260362372798, "grad_norm": 1.0826211721848011, "kl": 0.13836669921875, "learning_rate": 9.471880318298393e-07, "loss": -0.026279527693986893, "memory(GiB)": 94.21, "reward": 1.7812500149011612, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.2805779278278351, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 786, "train_speed(iter/s)": 0.022571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.875, "completions/mean_length": 68.86458587646484, "completions/min_length": 33.5, "epoch": 1.5639116406056095, "grad_norm": 0.00893109141753033, "kl": 0.12847900390625, "learning_rate": 9.470468216986464e-07, "loss": 0.00012845388846471906, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 787, "train_speed(iter/s)": 0.022568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.0, "completions/mean_length": 70.739586353302, "completions/min_length": 34.75, "epoch": 1.565897244973939, "grad_norm": 0.5305438404485602, "kl": 0.13238525390625, "learning_rate": 9.469054335885653e-07, "loss": 0.01100987195968628, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 788, "train_speed(iter/s)": 0.022568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.125, "completions/mean_length": 77.94791889190674, "completions/min_length": 37.625, "epoch": 1.5678828493422685, "grad_norm": 1.5412602990359585, "kl": 0.1397705078125, "learning_rate": 9.467638675558854e-07, "loss": 0.009466740302741528, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 789, "train_speed(iter/s)": 0.022568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.125, "completions/mean_length": 66.70833539962769, "completions/min_length": 32.125, "epoch": 1.569868453710598, "grad_norm": 0.008285440752085574, "kl": 0.128692626953125, "learning_rate": 9.466221236569672e-07, "loss": 0.00012855028035119176, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 790, "train_speed(iter/s)": 0.022567 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/mean_length": 65.08333539962769, "completions/min_length": 30.75, "epoch": 1.5718540580789278, "grad_norm": 1.1063484598804467, "kl": 0.13690185546875, "learning_rate": 9.464802019482418e-07, "loss": -0.0007010664558038116, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 791, "train_speed(iter/s)": 0.022565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 67.94791793823242, "completions/min_length": 31.375, "epoch": 1.5738396624472575, "grad_norm": 0.009371043807297407, "kl": 0.11846923828125, "learning_rate": 9.463381024862114e-07, "loss": 0.00011852764873765409, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 792, "train_speed(iter/s)": 0.022568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 73.87500286102295, "completions/min_length": 37.125, "epoch": 1.575825266815587, "grad_norm": 1.6962383506131065, "kl": 0.12066650390625, "learning_rate": 9.461958253274489e-07, "loss": 0.020231738686561584, "memory(GiB)": 94.21, "reward": 1.7604166865348816, "reward_std": 0.05779037997126579, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.2934674955904484, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 793, "train_speed(iter/s)": 0.022568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 65.73958492279053, "completions/min_length": 33.5, "epoch": 1.5778108711839165, "grad_norm": 0.019906395330733715, "kl": 0.1201171875, "learning_rate": 9.460533705285978e-07, "loss": 0.00012027585034957156, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 794, "train_speed(iter/s)": 0.022569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 62.114585399627686, "completions/min_length": 32.375, "epoch": 1.5797964755522462, "grad_norm": 0.011294376926733699, "kl": 0.12542724609375, "learning_rate": 9.459107381463725e-07, "loss": 0.0001254882081411779, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 795, "train_speed(iter/s)": 0.022573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 75.10416889190674, "completions/min_length": 33.25, "epoch": 1.581782079920576, "grad_norm": 0.00795684719757277, "kl": 0.12921142578125, "learning_rate": 9.457679282375578e-07, "loss": 0.00012897045235149562, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 796, "train_speed(iter/s)": 0.02257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.125, "completions/mean_length": 74.08333492279053, "completions/min_length": 30.125, "epoch": 1.5837676842889055, "grad_norm": 0.8817028583267403, "kl": 0.18048095703125, "learning_rate": 9.456249408590096e-07, "loss": -0.006961992010474205, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 797, "train_speed(iter/s)": 0.022566 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 68.84375238418579, "completions/min_length": 33.625, "epoch": 1.585753288657235, "grad_norm": 1.0506957066411309, "kl": 0.1197509765625, "learning_rate": 9.45481776067654e-07, "loss": 0.0053444355726242065, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 798, "train_speed(iter/s)": 0.022564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 73.73958444595337, "completions/min_length": 37.875, "epoch": 1.5877388930255647, "grad_norm": 0.5890124463293395, "kl": 0.1383056640625, "learning_rate": 9.453384339204882e-07, "loss": -0.00964347179979086, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 799, "train_speed(iter/s)": 0.022565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 68.36458539962769, "completions/min_length": 30.25, "epoch": 1.5897244973938944, "grad_norm": 0.017975294219367728, "kl": 0.11297607421875, "learning_rate": 9.451949144745795e-07, "loss": 0.00011297940363874659, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 800, "train_speed(iter/s)": 0.022565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 74.3854193687439, "completions/min_length": 37.0, "epoch": 1.591710101762224, "grad_norm": 0.026791448363209736, "kl": 0.1650390625, "learning_rate": 9.450512177870662e-07, "loss": 0.00016505643725395203, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 801, "train_speed(iter/s)": 0.022565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.75, "completions/mean_length": 77.29166793823242, "completions/min_length": 33.0, "epoch": 1.5936957061305534, "grad_norm": 2.4540021883543295, "kl": 0.134033203125, "learning_rate": 9.449073439151572e-07, "loss": 0.0023204542230814695, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 802, "train_speed(iter/s)": 0.022562 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.875, "completions/mean_length": 86.13541984558105, "completions/min_length": 31.25, "epoch": 1.595681310498883, "grad_norm": 0.010410936371844324, "kl": 0.149627685546875, "learning_rate": 9.447632929161314e-07, "loss": 0.0001496023323852569, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 803, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 69.63541889190674, "completions/min_length": 30.5, "epoch": 1.5976669148672127, "grad_norm": 0.008618382565457883, "kl": 0.13995361328125, "learning_rate": 9.446190648473389e-07, "loss": 0.00013988380669616163, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 804, "train_speed(iter/s)": 0.022558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 345.625, "completions/mean_length": 92.69791889190674, "completions/min_length": 28.0, "epoch": 1.5996525192355424, "grad_norm": 1.3339048678842351, "kl": 1.57891845703125, "learning_rate": 9.444746597661997e-07, "loss": 0.03770984709262848, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.10206206887960434, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.29628782719373703, "rewards/Format/mean": 0.9791666716337204, "rewards/Format/std": 0.07216878235340118, "step": 805, "train_speed(iter/s)": 0.022547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 68.66666841506958, "completions/min_length": 32.75, "epoch": 1.601638123603872, "grad_norm": 0.008956449233589486, "kl": 0.11895751953125, "learning_rate": 9.443300777302049e-07, "loss": 0.00011887378059327602, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 806, "train_speed(iter/s)": 0.02255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.5, "completions/mean_length": 75.47916889190674, "completions/min_length": 32.75, "epoch": 1.6036237279722014, "grad_norm": 1.615914651645132, "kl": 0.1416015625, "learning_rate": 9.441853187969153e-07, "loss": -0.00377776101231575, "memory(GiB)": 94.21, "reward": 1.6666666865348816, "reward_std": 0.08330589532852173, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.38611526414752007, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 807, "train_speed(iter/s)": 0.022546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 78.29166889190674, "completions/min_length": 38.75, "epoch": 1.6056093323405312, "grad_norm": 0.00690676209294394, "kl": 0.117431640625, "learning_rate": 9.440403830239628e-07, "loss": 0.00011741237540263683, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 808, "train_speed(iter/s)": 0.022546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.25, "completions/mean_length": 74.20833587646484, "completions/min_length": 30.0, "epoch": 1.6075949367088609, "grad_norm": 0.008620998577279193, "kl": 0.1343994140625, "learning_rate": 9.438952704690492e-07, "loss": 0.0001343963813269511, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 809, "train_speed(iter/s)": 0.022545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 69.03125143051147, "completions/min_length": 31.625, "epoch": 1.6095805410771904, "grad_norm": 0.00776802004671236, "kl": 0.12030029296875, "learning_rate": 9.437499811899472e-07, "loss": 0.00012033487291773781, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 810, "train_speed(iter/s)": 0.02255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 77.46875333786011, "completions/min_length": 35.0, "epoch": 1.61156614544552, "grad_norm": 0.00893684517133934, "kl": 0.12255859375, "learning_rate": 9.436045152444995e-07, "loss": 0.00012254255125299096, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 811, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.5, "completions/mean_length": 72.34375286102295, "completions/min_length": 31.75, "epoch": 1.6135517498138496, "grad_norm": 0.007839196633365264, "kl": 0.09814453125, "learning_rate": 9.434588726906189e-07, "loss": 9.823910659179091e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 812, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 65.08333683013916, "completions/min_length": 28.25, "epoch": 1.6155373541821794, "grad_norm": 0.00821263143516481, "kl": 0.1158447265625, "learning_rate": 9.43313053586289e-07, "loss": 0.00011564564192667603, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 813, "train_speed(iter/s)": 0.022551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.625, "completions/mean_length": 73.76042032241821, "completions/min_length": 35.375, "epoch": 1.6175229585505089, "grad_norm": 0.01971462604692211, "kl": 0.130859375, "learning_rate": 9.431670579895637e-07, "loss": 0.00013073688023723662, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 814, "train_speed(iter/s)": 0.02255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 72.55208587646484, "completions/min_length": 35.75, "epoch": 1.6195085629188384, "grad_norm": 0.008274004499679967, "kl": 0.104248046875, "learning_rate": 9.430208859585666e-07, "loss": 0.00010424414358567446, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 815, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.625, "completions/mean_length": 75.75000238418579, "completions/min_length": 37.125, "epoch": 1.6214941672871679, "grad_norm": 1.395976126387643, "kl": 0.11334228515625, "learning_rate": 9.428745375514924e-07, "loss": 0.009927384555339813, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 816, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 71.34375286102295, "completions/min_length": 27.125, "epoch": 1.6234797716554976, "grad_norm": 0.00677818219589261, "kl": 0.11260986328125, "learning_rate": 9.427280128266049e-07, "loss": 0.00011265358625678346, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 817, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.875, "completions/mean_length": 74.48958539962769, "completions/min_length": 34.25, "epoch": 1.6254653760238273, "grad_norm": 0.008810646902532714, "kl": 0.1170654296875, "learning_rate": 9.425813118422392e-07, "loss": 0.00011714122956618667, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 818, "train_speed(iter/s)": 0.022557 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 73.87500381469727, "completions/min_length": 33.75, "epoch": 1.6274509803921569, "grad_norm": 0.8717749467815266, "kl": 0.115447998046875, "learning_rate": 9.424344346567999e-07, "loss": 0.004413970746099949, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 819, "train_speed(iter/s)": 0.02256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 74.708336353302, "completions/min_length": 34.125, "epoch": 1.6294365847604864, "grad_norm": 0.00917564527205472, "kl": 0.1219482421875, "learning_rate": 9.42287381328762e-07, "loss": 0.00012195239833090454, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 820, "train_speed(iter/s)": 0.022563 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 72.34375238418579, "completions/min_length": 36.625, "epoch": 1.631422189128816, "grad_norm": 1.1646514068054032, "kl": 0.14031982421875, "learning_rate": 9.421401519166705e-07, "loss": -0.00496150366961956, "memory(GiB)": 94.21, "reward": 1.4479166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.4479166669771075, "rewards/CineAccuracyORM/std": 0.3624799847602844, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 821, "train_speed(iter/s)": 0.022561 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.375, "completions/mean_length": 77.63541841506958, "completions/min_length": 39.25, "epoch": 1.6334077934971458, "grad_norm": 1.0734429124995073, "kl": 0.13720703125, "learning_rate": 9.419927464791406e-07, "loss": -0.017771240323781967, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 822, "train_speed(iter/s)": 0.022564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.375, "completions/mean_length": 66.03125238418579, "completions/min_length": 30.125, "epoch": 1.6353933978654753, "grad_norm": 0.009837349457302495, "kl": 0.1162109375, "learning_rate": 9.418451650748576e-07, "loss": 0.0001161246545962058, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 823, "train_speed(iter/s)": 0.022567 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 76.177086353302, "completions/min_length": 35.125, "epoch": 1.6373790022338048, "grad_norm": 1.2371917012477311, "kl": 0.12396240234375, "learning_rate": 9.416974077625768e-07, "loss": 0.006578117609024048, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 824, "train_speed(iter/s)": 0.022569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.625, "completions/mean_length": 70.69791889190674, "completions/min_length": 34.5, "epoch": 1.6393646066021346, "grad_norm": 0.008997371818953376, "kl": 0.13775634765625, "learning_rate": 9.415494746011236e-07, "loss": 0.00013793342805001885, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 825, "train_speed(iter/s)": 0.022572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 74.77083539962769, "completions/min_length": 33.75, "epoch": 1.6413502109704643, "grad_norm": 0.008095205405073396, "kl": 0.13104248046875, "learning_rate": 9.414013656493933e-07, "loss": 0.00013096435577608645, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 826, "train_speed(iter/s)": 0.022572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 287.125, "completions/mean_length": 85.65625143051147, "completions/min_length": 36.875, "epoch": 1.6433358153387938, "grad_norm": 0.4205598680692579, "kl": 0.12908935546875, "learning_rate": 9.412530809663511e-07, "loss": 0.02198687568306923, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.05103103816509247, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 827, "train_speed(iter/s)": 0.022564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.75, "completions/mean_length": 84.09375190734863, "completions/min_length": 36.5, "epoch": 1.6453214197071233, "grad_norm": 0.7356170916547373, "kl": 0.12030029296875, "learning_rate": 9.411046206110324e-07, "loss": -5.795123797724955e-05, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 828, "train_speed(iter/s)": 0.02256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 74.77083492279053, "completions/min_length": 31.875, "epoch": 1.6473070240754528, "grad_norm": 0.010240680763762378, "kl": 0.15985107421875, "learning_rate": 9.409559846425425e-07, "loss": 0.0001596625952515751, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 829, "train_speed(iter/s)": 0.022557 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.875, "completions/mean_length": 77.8854193687439, "completions/min_length": 31.875, "epoch": 1.6492926284437825, "grad_norm": 0.011050179391073294, "kl": 0.16876220703125, "learning_rate": 9.408071731200567e-07, "loss": 0.00016890847473405302, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 830, "train_speed(iter/s)": 0.022553 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 79.614586353302, "completions/min_length": 36.875, "epoch": 1.6512782328121123, "grad_norm": 0.009944493614827271, "kl": 0.13702392578125, "learning_rate": 9.406581861028196e-07, "loss": 0.00013694663357455283, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 831, "train_speed(iter/s)": 0.022551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 69.364586353302, "completions/min_length": 30.125, "epoch": 1.6532638371804418, "grad_norm": 0.010700740199548475, "kl": 0.13641357421875, "learning_rate": 9.405090236501465e-07, "loss": 0.00013627774023916572, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 832, "train_speed(iter/s)": 0.02255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 71.77083492279053, "completions/min_length": 35.125, "epoch": 1.6552494415487713, "grad_norm": 0.010773221818202457, "kl": 0.154296875, "learning_rate": 9.40359685821422e-07, "loss": 0.00015416624955832958, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 833, "train_speed(iter/s)": 0.022545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.75, "completions/mean_length": 75.82291841506958, "completions/min_length": 32.125, "epoch": 1.657235045917101, "grad_norm": 0.8533079746770784, "kl": 0.1595458984375, "learning_rate": 9.402101726761007e-07, "loss": 0.00359630910679698, "memory(GiB)": 94.21, "reward": 1.59375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.59375, "rewards/CineAccuracyORM/std": 0.4482080899178982, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 834, "train_speed(iter/s)": 0.022542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 70.01041841506958, "completions/min_length": 32.5, "epoch": 1.6592206502854308, "grad_norm": 0.010378373688934767, "kl": 0.13818359375, "learning_rate": 9.400604842737071e-07, "loss": 0.0001382166228722781, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 835, "train_speed(iter/s)": 0.022544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.875, "completions/mean_length": 66.92708492279053, "completions/min_length": 34.625, "epoch": 1.6612062546537603, "grad_norm": 0.011075477637148815, "kl": 0.15032958984375, "learning_rate": 9.399106206738352e-07, "loss": 0.0001504713000031188, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 836, "train_speed(iter/s)": 0.022547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 67.66666841506958, "completions/min_length": 35.875, "epoch": 1.6631918590220898, "grad_norm": 0.0470884733661479, "kl": 0.169677734375, "learning_rate": 9.397605819361488e-07, "loss": 0.00016952768783085048, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 837, "train_speed(iter/s)": 0.022547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 71.20833587646484, "completions/min_length": 31.5, "epoch": 1.6651774633904195, "grad_norm": 0.010485424927340087, "kl": 0.148681640625, "learning_rate": 9.396103681203818e-07, "loss": 0.00014874427870381624, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 838, "train_speed(iter/s)": 0.022549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.5, "completions/mean_length": 68.83333492279053, "completions/min_length": 34.875, "epoch": 1.6671630677587492, "grad_norm": 1.5172244051541726, "kl": 0.16845703125, "learning_rate": 9.394599792863373e-07, "loss": -0.0060480027459561825, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 839, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 76.43750286102295, "completions/min_length": 37.375, "epoch": 1.6691486721270787, "grad_norm": 0.010490664202563641, "kl": 0.15625, "learning_rate": 9.393094154938884e-07, "loss": 0.0001561249082442373, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 840, "train_speed(iter/s)": 0.022551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 73.85416841506958, "completions/min_length": 35.5, "epoch": 1.6711342764954082, "grad_norm": 0.010349280649444464, "kl": 0.13507080078125, "learning_rate": 9.391586768029778e-07, "loss": 0.00013509612472262233, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 841, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 68.239586353302, "completions/min_length": 32.125, "epoch": 1.6731198808637378, "grad_norm": 0.010679222826676917, "kl": 0.14532470703125, "learning_rate": 9.390077632736177e-07, "loss": 0.00014528073370456696, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 842, "train_speed(iter/s)": 0.02255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 69.85416841506958, "completions/min_length": 41.5, "epoch": 1.6751054852320675, "grad_norm": 0.010478496725245676, "kl": 0.1351318359375, "learning_rate": 9.388566749658902e-07, "loss": 0.00013509014388546348, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 843, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 69.51041841506958, "completions/min_length": 32.375, "epoch": 1.6770910896003972, "grad_norm": 0.010293932236527227, "kl": 0.12689208984375, "learning_rate": 9.387054119399465e-07, "loss": 0.00012689763389062136, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 844, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.625, "completions/mean_length": 65.39583492279053, "completions/min_length": 37.125, "epoch": 1.6790766939687267, "grad_norm": 0.009183468936833618, "kl": 0.11871337890625, "learning_rate": 9.385539742560078e-07, "loss": 0.00011860028462251648, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 845, "train_speed(iter/s)": 0.02256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 71.59375095367432, "completions/min_length": 32.625, "epoch": 1.6810622983370562, "grad_norm": 0.012968932210123681, "kl": 0.143798828125, "learning_rate": 9.384023619743646e-07, "loss": 0.00014387266128323972, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 846, "train_speed(iter/s)": 0.02256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 74.05208539962769, "completions/min_length": 35.25, "epoch": 1.683047902705386, "grad_norm": 1.232171409556621, "kl": 0.1568603515625, "learning_rate": 9.382505751553771e-07, "loss": -0.021087627857923508, "memory(GiB)": 94.21, "reward": 1.9583333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9583333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 847, "train_speed(iter/s)": 0.022556 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.875, "completions/mean_length": 65.69791889190674, "completions/min_length": 31.375, "epoch": 1.6850335070737157, "grad_norm": 0.9966612978467505, "kl": 0.12884521484375, "learning_rate": 9.380986138594748e-07, "loss": -0.0031176875345408916, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 848, "train_speed(iter/s)": 0.022558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.125, "completions/mean_length": 73.96875190734863, "completions/min_length": 31.75, "epoch": 1.6870191114420452, "grad_norm": 0.008043221089977237, "kl": 0.11553955078125, "learning_rate": 9.379464781471569e-07, "loss": 0.0001155805221060291, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 849, "train_speed(iter/s)": 0.022563 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.875, "completions/mean_length": 63.781251430511475, "completions/min_length": 30.875, "epoch": 1.6890047158103747, "grad_norm": 0.04178812752756811, "kl": 0.13671875, "learning_rate": 9.377941680789915e-07, "loss": 0.00013680793927051127, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 850, "train_speed(iter/s)": 0.022565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 70.19791841506958, "completions/min_length": 33.5, "epoch": 1.6909903201787044, "grad_norm": 0.010513364300764605, "kl": 0.1287841796875, "learning_rate": 9.376416837156169e-07, "loss": 0.00012870121281594038, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 851, "train_speed(iter/s)": 0.022569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 82.21875238418579, "completions/min_length": 37.25, "epoch": 1.6929759245470342, "grad_norm": 0.8661182400745436, "kl": 0.134521484375, "learning_rate": 9.3748902511774e-07, "loss": 0.015169864520430565, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 852, "train_speed(iter/s)": 0.022566 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 78.62500381469727, "completions/min_length": 37.125, "epoch": 1.6949615289153637, "grad_norm": 1.0640528014409796, "kl": 0.117431640625, "learning_rate": 9.373361923461378e-07, "loss": -0.004249073565006256, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166669771075, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 853, "train_speed(iter/s)": 0.022563 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.125, "completions/mean_length": 69.64583539962769, "completions/min_length": 32.625, "epoch": 1.6969471332836932, "grad_norm": 0.04133701901778342, "kl": 0.178466796875, "learning_rate": 9.371831854616561e-07, "loss": 0.00017831040895543993, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 854, "train_speed(iter/s)": 0.022562 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 78.16666984558105, "completions/min_length": 40.875, "epoch": 1.6989327376520227, "grad_norm": 0.8444156214137051, "kl": 0.15545654296875, "learning_rate": 9.370300045252103e-07, "loss": -0.007017943542450666, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 855, "train_speed(iter/s)": 0.022565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.25, "completions/mean_length": 68.03125143051147, "completions/min_length": 31.75, "epoch": 1.7009183420203524, "grad_norm": 0.028400781178857627, "kl": 0.11895751953125, "learning_rate": 9.368766495977849e-07, "loss": 0.00011885771527886391, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 856, "train_speed(iter/s)": 0.022565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.375, "completions/mean_length": 75.25000286102295, "completions/min_length": 40.625, "epoch": 1.7029039463886821, "grad_norm": 1.0036904740880648, "kl": 0.14837646484375, "learning_rate": 9.367231207404339e-07, "loss": 0.009655861184000969, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.05653337761759758, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 857, "train_speed(iter/s)": 0.022565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 88.52083587646484, "completions/min_length": 32.625, "epoch": 1.7048895507570117, "grad_norm": 1.3447539571216185, "kl": 0.14324951171875, "learning_rate": 9.365694180142802e-07, "loss": 0.0016064762603491545, "memory(GiB)": 94.21, "reward": 1.9062500149011612, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.9062500074505806, "rewards/CineAccuracyORM/std": 0.12591182813048363, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 858, "train_speed(iter/s)": 0.022564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.125, "completions/mean_length": 79.32291793823242, "completions/min_length": 35.5, "epoch": 1.7068751551253412, "grad_norm": 0.009023147790031025, "kl": 0.13037109375, "learning_rate": 9.364155414805164e-07, "loss": 0.0001304107572650537, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 859, "train_speed(iter/s)": 0.022565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 77.84375286102295, "completions/min_length": 34.5, "epoch": 1.7088607594936709, "grad_norm": 1.0928371921396525, "kl": 0.1142578125, "learning_rate": 9.362614912004039e-07, "loss": -0.005867685191333294, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 860, "train_speed(iter/s)": 0.022563 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 78.37500190734863, "completions/min_length": 34.25, "epoch": 1.7108463638620006, "grad_norm": 0.0068493316499581075, "kl": 0.11376953125, "learning_rate": 9.361072672352735e-07, "loss": 0.00011375833855709061, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 861, "train_speed(iter/s)": 0.022564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.75, "completions/mean_length": 84.4791693687439, "completions/min_length": 38.5, "epoch": 1.7128319682303301, "grad_norm": 0.007296132282546416, "kl": 0.09765625, "learning_rate": 9.359528696465248e-07, "loss": 9.752794721862301e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 862, "train_speed(iter/s)": 0.022558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 70.01041841506958, "completions/min_length": 30.0, "epoch": 1.7148175725986596, "grad_norm": 0.006151935780164156, "kl": 0.0947265625, "learning_rate": 9.357982984956271e-07, "loss": 9.478656284045428e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 863, "train_speed(iter/s)": 0.022558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 78.239586353302, "completions/min_length": 37.25, "epoch": 1.7168031769669894, "grad_norm": 1.186740585060766, "kl": 0.13818359375, "learning_rate": 9.356435538441183e-07, "loss": 0.006953645497560501, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 864, "train_speed(iter/s)": 0.022559 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.125, "completions/mean_length": 85.62500286102295, "completions/min_length": 35.25, "epoch": 1.718788781335319, "grad_norm": 0.008003721739039711, "kl": 0.1339111328125, "learning_rate": 9.354886357536056e-07, "loss": 0.00013405291247181594, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 865, "train_speed(iter/s)": 0.022557 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.375, "completions/mean_length": 82.90625143051147, "completions/min_length": 36.875, "epoch": 1.7207743857036486, "grad_norm": 2.0441648839302324, "kl": 0.11358642578125, "learning_rate": 9.353335442857651e-07, "loss": -0.002224589465186, "memory(GiB)": 94.21, "reward": 1.8125000149011612, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.8125000055879354, "rewards/CineAccuracyORM/std": 0.16259148344397545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 866, "train_speed(iter/s)": 0.022556 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.875, "completions/mean_length": 82.44791793823242, "completions/min_length": 39.75, "epoch": 1.722759990071978, "grad_norm": 0.00844535637371365, "kl": 0.12396240234375, "learning_rate": 9.351782795023421e-07, "loss": 0.00012400794366840273, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 867, "train_speed(iter/s)": 0.022555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 75.770836353302, "completions/min_length": 38.125, "epoch": 1.7247455944403076, "grad_norm": 1.4014405933264378, "kl": 0.1292724609375, "learning_rate": 9.350228414651509e-07, "loss": 0.005324948579072952, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 868, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 70.90625238418579, "completions/min_length": 34.125, "epoch": 1.7267311988086373, "grad_norm": 1.2667894439507483, "kl": 0.1318359375, "learning_rate": 9.348672302360747e-07, "loss": 0.007825274020433426, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 869, "train_speed(iter/s)": 0.022555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.625, "completions/mean_length": 81.5104193687439, "completions/min_length": 37.5, "epoch": 1.728716803176967, "grad_norm": 0.008836764003010587, "kl": 0.121337890625, "learning_rate": 9.347114458770655e-07, "loss": 0.00012140587205067277, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 870, "train_speed(iter/s)": 0.022551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 82.84375143051147, "completions/min_length": 31.875, "epoch": 1.7307024075452966, "grad_norm": 0.6426275280785991, "kl": 0.1240234375, "learning_rate": 9.345554884501446e-07, "loss": -0.015416160225868225, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 871, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.625, "completions/mean_length": 87.37500381469727, "completions/min_length": 41.0, "epoch": 1.732688011913626, "grad_norm": 0.008714394265764763, "kl": 0.13043212890625, "learning_rate": 9.343993580174017e-07, "loss": 0.00013047503307461739, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 872, "train_speed(iter/s)": 0.022551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 78.2916693687439, "completions/min_length": 33.875, "epoch": 1.7346736162819558, "grad_norm": 0.00848953472067005, "kl": 0.11932373046875, "learning_rate": 9.342430546409959e-07, "loss": 0.00011928447929676622, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 873, "train_speed(iter/s)": 0.022551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 80.44791841506958, "completions/min_length": 38.5, "epoch": 1.7366592206502856, "grad_norm": 0.006718660070962197, "kl": 0.1209716796875, "learning_rate": 9.340865783831548e-07, "loss": 0.0001210131958941929, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 874, "train_speed(iter/s)": 0.022549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.125, "completions/mean_length": 75.83333492279053, "completions/min_length": 36.5, "epoch": 1.738644825018615, "grad_norm": 0.9307086120262128, "kl": 0.122802734375, "learning_rate": 9.33929929306175e-07, "loss": 0.00012271106243133545, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 875, "train_speed(iter/s)": 0.022553 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.875, "completions/mean_length": 86.46875190734863, "completions/min_length": 34.875, "epoch": 1.7406304293869446, "grad_norm": 0.006989194255262667, "kl": 0.1280517578125, "learning_rate": 9.337731074724217e-07, "loss": 0.00012786986189894378, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 876, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 76.50000238418579, "completions/min_length": 33.625, "epoch": 1.7426160337552743, "grad_norm": 0.8613938715692814, "kl": 0.12774658203125, "learning_rate": 9.336161129443294e-07, "loss": -0.002113398164510727, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 877, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 89.64583587646484, "completions/min_length": 37.0, "epoch": 1.744601638123604, "grad_norm": 0.006956607435981466, "kl": 0.13836669921875, "learning_rate": 9.334589457844005e-07, "loss": 0.00013832177501171827, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 878, "train_speed(iter/s)": 0.022549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 75.89583587646484, "completions/min_length": 36.0, "epoch": 1.7465872424919335, "grad_norm": 0.018903615038227694, "kl": 0.12786865234375, "learning_rate": 9.333016060552068e-07, "loss": 0.00012793888163287193, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 879, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.625, "completions/mean_length": 79.77083539962769, "completions/min_length": 42.375, "epoch": 1.748572846860263, "grad_norm": 0.009768558421778843, "kl": 0.13153076171875, "learning_rate": 9.331440938193886e-07, "loss": 0.00013149468577466905, "memory(GiB)": 94.21, "reward": 1.5, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 880, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 75.87500190734863, "completions/min_length": 38.25, "epoch": 1.7505584512285925, "grad_norm": 0.007302215714605456, "kl": 0.1165771484375, "learning_rate": 9.329864091396551e-07, "loss": 0.00011654042464215308, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 881, "train_speed(iter/s)": 0.022551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 80.26041841506958, "completions/min_length": 35.0, "epoch": 1.7525440555969223, "grad_norm": 0.008200742112699761, "kl": 0.1165771484375, "learning_rate": 9.328285520787836e-07, "loss": 0.00011656450806185603, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 882, "train_speed(iter/s)": 0.022551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.625, "completions/mean_length": 69.63541889190674, "completions/min_length": 36.375, "epoch": 1.754529659965252, "grad_norm": 0.9512648471477639, "kl": 0.13714599609375, "learning_rate": 9.326705226996205e-07, "loss": -0.004535972140729427, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 883, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 75.4791693687439, "completions/min_length": 36.0, "epoch": 1.7565152643335815, "grad_norm": 0.010957816091449385, "kl": 0.12603759765625, "learning_rate": 9.325123210650808e-07, "loss": 0.00012580420298036188, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 884, "train_speed(iter/s)": 0.022555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 76.67708587646484, "completions/min_length": 35.625, "epoch": 1.758500868701911, "grad_norm": 0.7032466702633126, "kl": 0.11761474609375, "learning_rate": 9.323539472381478e-07, "loss": 0.00044058263301849365, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 885, "train_speed(iter/s)": 0.022555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.75, "completions/mean_length": 81.71875286102295, "completions/min_length": 36.375, "epoch": 1.7604864730702408, "grad_norm": 0.007635854293946776, "kl": 0.10650634765625, "learning_rate": 9.321954012818736e-07, "loss": 0.00010652549099177122, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 886, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 73.52083587646484, "completions/min_length": 36.875, "epoch": 1.7624720774385705, "grad_norm": 0.012792755398809138, "kl": 0.127197265625, "learning_rate": 9.320366832593784e-07, "loss": 0.00012726319255307317, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 887, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 75.42708587646484, "completions/min_length": 33.625, "epoch": 1.7644576818069, "grad_norm": 0.05229859065893468, "kl": 0.133056640625, "learning_rate": 9.318777932338518e-07, "loss": 0.00013305262837093323, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 888, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.25, "completions/mean_length": 83.645836353302, "completions/min_length": 37.125, "epoch": 1.7664432861752295, "grad_norm": 0.0062669782192380655, "kl": 0.1102294921875, "learning_rate": 9.317187312685508e-07, "loss": 0.00011017799261026084, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 889, "train_speed(iter/s)": 0.022553 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 72.34375190734863, "completions/min_length": 33.125, "epoch": 1.7684288905435592, "grad_norm": 0.9103954532426873, "kl": 0.12982177734375, "learning_rate": 9.315594974268017e-07, "loss": 0.001666487893089652, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 890, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 79.53125190734863, "completions/min_length": 35.125, "epoch": 1.770414494911889, "grad_norm": 0.0077098493005936025, "kl": 0.1195068359375, "learning_rate": 9.314000917719989e-07, "loss": 0.0001194100477732718, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 891, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 76.677086353302, "completions/min_length": 39.75, "epoch": 1.7724000992802185, "grad_norm": 1.7381051796500595, "kl": 0.13616943359375, "learning_rate": 9.312405143676049e-07, "loss": 0.00013609975576400757, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 892, "train_speed(iter/s)": 0.022555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 77.083336353302, "completions/min_length": 39.875, "epoch": 1.774385703648548, "grad_norm": 0.008840746026892974, "kl": 0.12017822265625, "learning_rate": 9.31080765277151e-07, "loss": 0.00012022841110592708, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 893, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.875, "completions/mean_length": 79.97916889190674, "completions/min_length": 37.25, "epoch": 1.7763713080168775, "grad_norm": 0.9613325652328614, "kl": 0.107177734375, "learning_rate": 9.309208445642369e-07, "loss": 0.0014907767763361335, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 894, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 85.91666889190674, "completions/min_length": 43.375, "epoch": 1.7783569123852072, "grad_norm": 0.6204390112258411, "kl": 0.10943603515625, "learning_rate": 9.307607522925302e-07, "loss": 0.007937220856547356, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 895, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.375, "completions/mean_length": 84.09375381469727, "completions/min_length": 39.0, "epoch": 1.780342516753537, "grad_norm": 0.006596129050990661, "kl": 0.12164306640625, "learning_rate": 9.306004885257673e-07, "loss": 0.00012155827425885946, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 896, "train_speed(iter/s)": 0.022545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 75.9791693687439, "completions/min_length": 33.125, "epoch": 1.7823281211218664, "grad_norm": 0.8419564927483318, "kl": 0.1160888671875, "learning_rate": 9.304400533277526e-07, "loss": -0.005623520817607641, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 897, "train_speed(iter/s)": 0.022544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.375, "completions/mean_length": 88.34375286102295, "completions/min_length": 39.875, "epoch": 1.784313725490196, "grad_norm": 0.007441250195927309, "kl": 0.150390625, "learning_rate": 9.302794467623584e-07, "loss": 0.00015034168609417975, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 898, "train_speed(iter/s)": 0.022541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.375, "completions/mean_length": 77.03125190734863, "completions/min_length": 42.625, "epoch": 1.7862993298585257, "grad_norm": 0.007088094241563425, "kl": 0.1243896484375, "learning_rate": 9.301186688935261e-07, "loss": 0.00012434719246812165, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 899, "train_speed(iter/s)": 0.02254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.0, "completions/mean_length": 73.864586353302, "completions/min_length": 32.0, "epoch": 1.7882849342268554, "grad_norm": 1.5123569867386064, "kl": 0.112548828125, "learning_rate": 9.299577197852644e-07, "loss": 0.008591524325311184, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.833333333954215, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 900, "train_speed(iter/s)": 0.022539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.625, "completions/mean_length": 83.61458587646484, "completions/min_length": 37.375, "epoch": 1.790270538595185, "grad_norm": 0.8705223897291671, "kl": 0.11187744140625, "learning_rate": 9.297965995016511e-07, "loss": 0.013531377539038658, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 901, "train_speed(iter/s)": 0.022541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 79.28125190734863, "completions/min_length": 39.5, "epoch": 1.7922561429635144, "grad_norm": 0.00771821969783833, "kl": 0.11553955078125, "learning_rate": 9.296353081068309e-07, "loss": 0.00011562125291675329, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 902, "train_speed(iter/s)": 0.022538 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 78.25000286102295, "completions/min_length": 35.75, "epoch": 1.7942417473318442, "grad_norm": 1.0038647966048213, "kl": 0.12664794921875, "learning_rate": 9.29473845665018e-07, "loss": -0.008175798691809177, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 903, "train_speed(iter/s)": 0.022537 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 83.48958778381348, "completions/min_length": 36.875, "epoch": 1.796227351700174, "grad_norm": 0.6299893745867365, "kl": 0.126953125, "learning_rate": 9.293122122404937e-07, "loss": -0.005444802343845367, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6666666679084301, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 904, "train_speed(iter/s)": 0.022534 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 76.0729193687439, "completions/min_length": 36.125, "epoch": 1.7982129560685034, "grad_norm": 0.006683691544014729, "kl": 0.1065673828125, "learning_rate": 9.291504078976078e-07, "loss": 0.00010656008817022666, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 905, "train_speed(iter/s)": 0.022531 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 74.89583539962769, "completions/min_length": 33.125, "epoch": 1.800198560436833, "grad_norm": 0.8765895081658152, "kl": 0.12567138671875, "learning_rate": 9.289884327007782e-07, "loss": -0.006656238343566656, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 906, "train_speed(iter/s)": 0.022531 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.25, "completions/mean_length": 80.28125238418579, "completions/min_length": 34.25, "epoch": 1.8021841648051624, "grad_norm": 0.6817033388841132, "kl": 0.121429443359375, "learning_rate": 9.288262867144905e-07, "loss": 0.013028094545006752, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 907, "train_speed(iter/s)": 0.022532 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.875, "completions/mean_length": 75.08333492279053, "completions/min_length": 33.125, "epoch": 1.8041697691734921, "grad_norm": 0.009188903603812635, "kl": 0.12896728515625, "learning_rate": 9.286639700032984e-07, "loss": 0.00012894233805127442, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 908, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.875, "completions/mean_length": 95.34375286102295, "completions/min_length": 36.0, "epoch": 1.8061553735418219, "grad_norm": 0.021478704084264718, "kl": 0.16107177734375, "learning_rate": 9.28501482631824e-07, "loss": 0.00016091513680294156, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 909, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.125, "completions/mean_length": 75.58333492279053, "completions/min_length": 33.5, "epoch": 1.8081409779101514, "grad_norm": 1.4350633322121908, "kl": 0.122802734375, "learning_rate": 9.283388246647565e-07, "loss": -0.004109155386686325, "memory(GiB)": 94.21, "reward": 1.8125000149011612, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.2407601661980152, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 910, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 77.41666793823242, "completions/min_length": 32.75, "epoch": 1.810126582278481, "grad_norm": 0.009620782843549312, "kl": 0.14208984375, "learning_rate": 9.281759961668541e-07, "loss": 0.00014202986494638026, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 911, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.125, "completions/mean_length": 78.75000190734863, "completions/min_length": 36.375, "epoch": 1.8121121866468106, "grad_norm": 0.008964483892672192, "kl": 0.115234375, "learning_rate": 9.280129972029418e-07, "loss": 0.00011530831397976726, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 912, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.625, "completions/mean_length": 77.364586353302, "completions/min_length": 36.375, "epoch": 1.8140977910151403, "grad_norm": 0.009324073321952835, "kl": 0.13043212890625, "learning_rate": 9.278498278379134e-07, "loss": 0.0001303391472902149, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 913, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 70.73958492279053, "completions/min_length": 35.5, "epoch": 1.8160833953834699, "grad_norm": 0.008782209003762748, "kl": 0.13873291015625, "learning_rate": 9.276864881367297e-07, "loss": 0.00013890663103666157, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 914, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.375, "completions/mean_length": 74.28125190734863, "completions/min_length": 33.125, "epoch": 1.8180689997517994, "grad_norm": 1.2257257286392702, "kl": 0.12548828125, "learning_rate": 9.275229781644199e-07, "loss": -0.010700320824980736, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666679084301, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 915, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.875, "completions/mean_length": 66.36458587646484, "completions/min_length": 35.125, "epoch": 1.820054604120129, "grad_norm": 0.015152993301309597, "kl": 0.141845703125, "learning_rate": 9.273592979860808e-07, "loss": 0.0001418372557964176, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 916, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.875, "completions/mean_length": 79.78125190734863, "completions/min_length": 36.75, "epoch": 1.8220402084884588, "grad_norm": 1.164141743224586, "kl": 0.1375732421875, "learning_rate": 9.271954476668771e-07, "loss": -0.013721026480197906, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 917, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 76.08333587646484, "completions/min_length": 34.75, "epoch": 1.8240258128567883, "grad_norm": 0.9718368314130161, "kl": 0.11944580078125, "learning_rate": 9.27031427272041e-07, "loss": -0.0009028440108522773, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.31764985248446465, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 918, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 81.333336353302, "completions/min_length": 36.625, "epoch": 1.8260114172251178, "grad_norm": 0.008453763390296949, "kl": 0.12518310546875, "learning_rate": 9.268672368668724e-07, "loss": 0.0001252280198968947, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 919, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 79.92708587646484, "completions/min_length": 35.75, "epoch": 1.8279970215934473, "grad_norm": 0.015727045827087466, "kl": 0.13104248046875, "learning_rate": 9.267028765167391e-07, "loss": 0.00013102087541483343, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 920, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.125, "completions/mean_length": 78.92708539962769, "completions/min_length": 34.0, "epoch": 1.829982625961777, "grad_norm": 0.007909932223443645, "kl": 0.125732421875, "learning_rate": 9.265383462870766e-07, "loss": 0.00012571165279950947, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 921, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 82.93750190734863, "completions/min_length": 36.75, "epoch": 1.8319682303301068, "grad_norm": 0.0077653537751326725, "kl": 0.10980224609375, "learning_rate": 9.263736462433878e-07, "loss": 0.00010963292879750952, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 922, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 80.95833587646484, "completions/min_length": 35.75, "epoch": 1.8339538346984363, "grad_norm": 0.007045395069392264, "kl": 0.11553955078125, "learning_rate": 9.262087764512432e-07, "loss": 0.00011566970351850614, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 923, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 80.20833492279053, "completions/min_length": 39.0, "epoch": 1.8359394390667658, "grad_norm": 0.7339331511708622, "kl": 0.12884521484375, "learning_rate": 9.260437369762812e-07, "loss": 0.010075999423861504, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 924, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 69.0104193687439, "completions/min_length": 31.25, "epoch": 1.8379250434350956, "grad_norm": 0.7720731741630739, "kl": 0.11944580078125, "learning_rate": 9.258785278842074e-07, "loss": 0.0039865667931735516, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 925, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 77.78125333786011, "completions/min_length": 34.5, "epoch": 1.8399106478034253, "grad_norm": 0.006722094555020101, "kl": 0.125244140625, "learning_rate": 9.257131492407951e-07, "loss": 0.00012531019456218928, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 926, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.625, "completions/mean_length": 72.2916693687439, "completions/min_length": 38.375, "epoch": 1.8418962521717548, "grad_norm": 1.0453864667150048, "kl": 0.1173095703125, "learning_rate": 9.255476011118851e-07, "loss": 0.007351234555244446, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 927, "train_speed(iter/s)": 0.022532 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.25, "completions/mean_length": 79.70833587646484, "completions/min_length": 35.375, "epoch": 1.8438818565400843, "grad_norm": 0.007729291555005753, "kl": 0.12371826171875, "learning_rate": 9.253818835633855e-07, "loss": 0.00012368014722596854, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 928, "train_speed(iter/s)": 0.022532 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.625, "completions/mean_length": 76.55208683013916, "completions/min_length": 33.75, "epoch": 1.845867460908414, "grad_norm": 0.008353250396239633, "kl": 0.11431884765625, "learning_rate": 9.252159966612722e-07, "loss": 0.0001142207402153872, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 929, "train_speed(iter/s)": 0.022534 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.125, "completions/mean_length": 74.30208587646484, "completions/min_length": 33.875, "epoch": 1.8478530652767438, "grad_norm": 0.008869208536607625, "kl": 0.12701416015625, "learning_rate": 9.250499404715882e-07, "loss": 0.00012701813830062747, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 930, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.875, "completions/mean_length": 88.63541984558105, "completions/min_length": 37.875, "epoch": 1.8498386696450733, "grad_norm": 0.007087772849495382, "kl": 0.13250732421875, "learning_rate": 9.248837150604441e-07, "loss": 0.00013244128786027431, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 931, "train_speed(iter/s)": 0.022531 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 77.92708587646484, "completions/min_length": 40.125, "epoch": 1.8518242740134028, "grad_norm": 0.0065413932307173605, "kl": 0.11614990234375, "learning_rate": 9.247173204940176e-07, "loss": 0.00011607255146373063, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 932, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 81.83333396911621, "completions/min_length": 35.25, "epoch": 1.8538098783817323, "grad_norm": 0.007473283230136595, "kl": 0.12152099609375, "learning_rate": 9.245507568385541e-07, "loss": 0.00012152847921242937, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 933, "train_speed(iter/s)": 0.022532 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.375, "completions/mean_length": 74.65625190734863, "completions/min_length": 29.125, "epoch": 1.855795482750062, "grad_norm": 1.1503934173831112, "kl": 0.13226318359375, "learning_rate": 9.243840241603662e-07, "loss": 0.007513184100389481, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.31764985248446465, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 934, "train_speed(iter/s)": 0.022531 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 71.38541889190674, "completions/min_length": 33.875, "epoch": 1.8577810871183917, "grad_norm": 0.8074052184135067, "kl": 0.1397705078125, "learning_rate": 9.242171225258335e-07, "loss": -0.0060031963512301445, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 935, "train_speed(iter/s)": 0.022533 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 68.11458587646484, "completions/min_length": 37.0, "epoch": 1.8597666914867212, "grad_norm": 0.007230071813683354, "kl": 0.12274169921875, "learning_rate": 9.240500520014034e-07, "loss": 0.0001225871965289116, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 936, "train_speed(iter/s)": 0.022536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.5, "completions/mean_length": 69.83333444595337, "completions/min_length": 35.625, "epoch": 1.8617522958550508, "grad_norm": 0.812551116027472, "kl": 0.1329345703125, "learning_rate": 9.238828126535901e-07, "loss": 0.007950139231979847, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 937, "train_speed(iter/s)": 0.022537 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 71.45833539962769, "completions/min_length": 34.375, "epoch": 1.8637379002233805, "grad_norm": 0.007693412059398112, "kl": 0.13232421875, "learning_rate": 9.23715404548975e-07, "loss": 0.0001322162861470133, "memory(GiB)": 94.21, "reward": 1.5, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 938, "train_speed(iter/s)": 0.02254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 70.93750143051147, "completions/min_length": 37.375, "epoch": 1.8657235045917102, "grad_norm": 1.3189935162431023, "kl": 0.1307373046875, "learning_rate": 9.23547827754207e-07, "loss": 0.0037108275573700666, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 939, "train_speed(iter/s)": 0.022543 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.75, "completions/mean_length": 75.17708539962769, "completions/min_length": 35.875, "epoch": 1.8677091089600397, "grad_norm": 0.007031096849271503, "kl": 0.12457275390625, "learning_rate": 9.233800823360022e-07, "loss": 0.00012451031943783164, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 940, "train_speed(iter/s)": 0.02254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 71.17708587646484, "completions/min_length": 34.125, "epoch": 1.8696947133283692, "grad_norm": 1.5032450276054758, "kl": 0.1480712890625, "learning_rate": 9.232121683611434e-07, "loss": 0.005250070244073868, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 941, "train_speed(iter/s)": 0.022539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 73.73958587646484, "completions/min_length": 34.25, "epoch": 1.871680317696699, "grad_norm": 0.006310721623254409, "kl": 0.0950927734375, "learning_rate": 9.230440858964805e-07, "loss": 9.510396921541542e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 942, "train_speed(iter/s)": 0.022541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.125, "completions/mean_length": 76.10416889190674, "completions/min_length": 39.375, "epoch": 1.8736659220650287, "grad_norm": 0.9377452971936192, "kl": 0.110595703125, "learning_rate": 9.228758350089313e-07, "loss": 0.00011061008262913674, "memory(GiB)": 94.21, "reward": 1.9791666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9791666716337204, "rewards/CineAccuracyORM/std": 0.04865618050098419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 943, "train_speed(iter/s)": 0.022541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 67.77083492279053, "completions/min_length": 33.5, "epoch": 1.8756515264333582, "grad_norm": 0.013447880880508639, "kl": 0.114990234375, "learning_rate": 9.227074157654796e-07, "loss": 0.00011495831131469458, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 944, "train_speed(iter/s)": 0.02254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.875, "completions/mean_length": 74.95833539962769, "completions/min_length": 32.125, "epoch": 1.8776371308016877, "grad_norm": 0.010348588107422185, "kl": 0.1312255859375, "learning_rate": 9.225388282331769e-07, "loss": 0.00013122425298206508, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 945, "train_speed(iter/s)": 0.022543 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 77.81250190734863, "completions/min_length": 35.125, "epoch": 1.8796227351700172, "grad_norm": 0.012148346145160555, "kl": 0.14202880859375, "learning_rate": 9.223700724791416e-07, "loss": 0.00014211777306627482, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 946, "train_speed(iter/s)": 0.022542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 70.677086353302, "completions/min_length": 32.5, "epoch": 1.881608339538347, "grad_norm": 1.1922451130482545, "kl": 0.1201171875, "learning_rate": 9.222011485705585e-07, "loss": -0.0010681685525923967, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 947, "train_speed(iter/s)": 0.022544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.125, "completions/mean_length": 84.36458587646484, "completions/min_length": 37.125, "epoch": 1.8835939439066767, "grad_norm": 0.0073728058023891, "kl": 0.15185546875, "learning_rate": 9.220320565746805e-07, "loss": 0.0001520304795121774, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 948, "train_speed(iter/s)": 0.022544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.625, "completions/mean_length": 83.90625333786011, "completions/min_length": 37.5, "epoch": 1.8855795482750062, "grad_norm": 2.933732312981029, "kl": 0.10418701171875, "learning_rate": 9.218627965588261e-07, "loss": 0.010607926174998283, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 949, "train_speed(iter/s)": 0.022542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 71.770836353302, "completions/min_length": 35.625, "epoch": 1.8875651526433357, "grad_norm": 1.3556053830761863, "kl": 0.11370849609375, "learning_rate": 9.216933685903818e-07, "loss": 0.005140597932040691, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 950, "train_speed(iter/s)": 0.022544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 75.56250143051147, "completions/min_length": 35.625, "epoch": 1.8895507570116654, "grad_norm": 0.01602748227186556, "kl": 0.12286376953125, "learning_rate": 9.215237727368002e-07, "loss": 0.00012275311746634543, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 951, "train_speed(iter/s)": 0.022544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.375, "completions/mean_length": 69.34375190734863, "completions/min_length": 31.5, "epoch": 1.8915363613799951, "grad_norm": 0.008974914022572961, "kl": 0.1324462890625, "learning_rate": 9.213540090656013e-07, "loss": 0.00013249943731352687, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 952, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 68.89583539962769, "completions/min_length": 35.75, "epoch": 1.8935219657483247, "grad_norm": 0.00667277933473568, "kl": 0.11700439453125, "learning_rate": 9.211840776443713e-07, "loss": 0.00011695442663040012, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 953, "train_speed(iter/s)": 0.022547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.875, "completions/mean_length": 68.55208539962769, "completions/min_length": 29.625, "epoch": 1.8955075701166542, "grad_norm": 0.007696458386247361, "kl": 0.11077880859375, "learning_rate": 9.210139785407638e-07, "loss": 0.00011075345537392423, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 954, "train_speed(iter/s)": 0.022547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.125, "completions/mean_length": 67.79166841506958, "completions/min_length": 36.0, "epoch": 1.897493174484984, "grad_norm": 1.3722402511075555, "kl": 0.1376953125, "learning_rate": 9.208437118224987e-07, "loss": -0.003108623204752803, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 955, "train_speed(iter/s)": 0.022546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.5, "completions/mean_length": 75.61458539962769, "completions/min_length": 36.125, "epoch": 1.8994787788533136, "grad_norm": 0.060150345637783584, "kl": 0.13623046875, "learning_rate": 9.20673277557363e-07, "loss": 0.0001361073082080111, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 956, "train_speed(iter/s)": 0.022549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.375, "completions/mean_length": 79.25000238418579, "completions/min_length": 34.0, "epoch": 1.9014643832216431, "grad_norm": 0.7202040623211893, "kl": 0.12237548828125, "learning_rate": 9.205026758132102e-07, "loss": -0.00564401363953948, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 957, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.375, "completions/mean_length": 75.70833683013916, "completions/min_length": 35.5, "epoch": 1.9034499875899726, "grad_norm": 0.009994373339757778, "kl": 0.10723876953125, "learning_rate": 9.203319066579603e-07, "loss": 0.00010721605212893337, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 958, "train_speed(iter/s)": 0.022549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.0, "completions/mean_length": 64.51041793823242, "completions/min_length": 32.75, "epoch": 1.9054355919583021, "grad_norm": 0.012637668678729314, "kl": 0.1270751953125, "learning_rate": 9.201609701596003e-07, "loss": 0.00012694580073002726, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 959, "train_speed(iter/s)": 0.022553 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.875, "completions/mean_length": 75.42708492279053, "completions/min_length": 35.25, "epoch": 1.9074211963266319, "grad_norm": 0.7784184826385184, "kl": 0.11529541015625, "learning_rate": 9.199898663861836e-07, "loss": -0.004092290066182613, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 960, "train_speed(iter/s)": 0.022551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 74.32291889190674, "completions/min_length": 36.625, "epoch": 1.9094068006949616, "grad_norm": 1.3496105805474334, "kl": 0.131561279296875, "learning_rate": 9.198185954058304e-07, "loss": 0.008745516650378704, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.770833333954215, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 961, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 70.59375143051147, "completions/min_length": 36.5, "epoch": 1.9113924050632911, "grad_norm": 0.031016430453186167, "kl": 0.116455078125, "learning_rate": 9.196471572867272e-07, "loss": 0.00011654614354483783, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 962, "train_speed(iter/s)": 0.022553 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 71.30208444595337, "completions/min_length": 34.0, "epoch": 1.9133780094316206, "grad_norm": 0.008761079412151937, "kl": 0.11737060546875, "learning_rate": 9.194755520971272e-07, "loss": 0.00011732772691175342, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 963, "train_speed(iter/s)": 0.022554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.375, "completions/mean_length": 74.95833539962769, "completions/min_length": 35.75, "epoch": 1.9153636137999503, "grad_norm": 0.007919570470525118, "kl": 0.1171875, "learning_rate": 9.193037799053502e-07, "loss": 0.00011698844900820404, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 964, "train_speed(iter/s)": 0.022555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 81.46875286102295, "completions/min_length": 42.25, "epoch": 1.91734921816828, "grad_norm": 1.0164432816307807, "kl": 0.13189697265625, "learning_rate": 9.191318407797823e-07, "loss": 0.004111527930945158, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666679084301, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 965, "train_speed(iter/s)": 0.022553 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 74.94791984558105, "completions/min_length": 32.375, "epoch": 1.9193348225366096, "grad_norm": 0.0062646852844806585, "kl": 0.11651611328125, "learning_rate": 9.189597347888761e-07, "loss": 0.00011658138100756332, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 966, "train_speed(iter/s)": 0.022551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.625, "completions/mean_length": 76.32291889190674, "completions/min_length": 33.875, "epoch": 1.921320426904939, "grad_norm": 0.9086865320292101, "kl": 0.13519287109375, "learning_rate": 9.187874620011506e-07, "loss": -0.0014109518378973007, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 967, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 72.81250190734863, "completions/min_length": 32.875, "epoch": 1.9233060312732688, "grad_norm": 0.006868939690706016, "kl": 0.10931396484375, "learning_rate": 9.186150224851916e-07, "loss": 0.00010914703307207674, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 968, "train_speed(iter/s)": 0.022551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 72.18750143051147, "completions/min_length": 34.0, "epoch": 1.9252916356415986, "grad_norm": 0.006124398119808875, "kl": 0.1202392578125, "learning_rate": 9.184424163096507e-07, "loss": 0.0001203574865940027, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 969, "train_speed(iter/s)": 0.022551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.625, "completions/mean_length": 71.98958539962769, "completions/min_length": 36.25, "epoch": 1.927277240009928, "grad_norm": 1.106137844984077, "kl": 0.12115478515625, "learning_rate": 9.18269643543246e-07, "loss": -0.01140589639544487, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 970, "train_speed(iter/s)": 0.022547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 75.70833587646484, "completions/min_length": 37.5, "epoch": 1.9292628443782576, "grad_norm": 1.408283715374188, "kl": 0.12353515625, "learning_rate": 9.180967042547623e-07, "loss": -0.0031010694801807404, "memory(GiB)": 94.21, "reward": 1.6250000149011612, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.6250000027939677, "rewards/CineAccuracyORM/std": 0.29628782719373703, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 971, "train_speed(iter/s)": 0.022546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.25, "completions/mean_length": 85.0104193687439, "completions/min_length": 33.75, "epoch": 1.931248448746587, "grad_norm": 0.00550594396251065, "kl": 0.10125732421875, "learning_rate": 9.179235985130503e-07, "loss": 0.00010118850332219154, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 972, "train_speed(iter/s)": 0.022545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 68.72916793823242, "completions/min_length": 34.0, "epoch": 1.9332340531149168, "grad_norm": 0.005789034473788149, "kl": 0.09600830078125, "learning_rate": 9.17750326387027e-07, "loss": 9.600781777407974e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 973, "train_speed(iter/s)": 0.022546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 80.53125286102295, "completions/min_length": 35.625, "epoch": 1.9352196574832465, "grad_norm": 0.006653491650915339, "kl": 0.11248779296875, "learning_rate": 9.175768879456758e-07, "loss": 0.00011252841068198904, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 974, "train_speed(iter/s)": 0.022544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.375, "completions/mean_length": 85.53125286102295, "completions/min_length": 42.125, "epoch": 1.937205261851576, "grad_norm": 1.534650739331518, "kl": 0.1253662109375, "learning_rate": 9.174032832580464e-07, "loss": -0.008691448718309402, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.05653337761759758, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 975, "train_speed(iter/s)": 0.022545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.875, "completions/mean_length": 75.3229193687439, "completions/min_length": 32.625, "epoch": 1.9391908662199056, "grad_norm": 1.0375136763787156, "kl": 0.10107421875, "learning_rate": 9.172295123932543e-07, "loss": 0.006832679267972708, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 976, "train_speed(iter/s)": 0.022545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.125, "completions/mean_length": 90.90625286102295, "completions/min_length": 32.125, "epoch": 1.9411764705882353, "grad_norm": 0.014643125295771123, "kl": 0.1373291015625, "learning_rate": 9.170555754204816e-07, "loss": 0.0001373680424876511, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 977, "train_speed(iter/s)": 0.022544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 82.4166693687439, "completions/min_length": 30.0, "epoch": 1.943162074956565, "grad_norm": 0.006284893212359794, "kl": 0.10443115234375, "learning_rate": 9.168814724089762e-07, "loss": 0.00010449031105963513, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 978, "train_speed(iter/s)": 0.022546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 85.84375190734863, "completions/min_length": 34.875, "epoch": 1.9451476793248945, "grad_norm": 0.01115706204516085, "kl": 0.123779296875, "learning_rate": 9.167072034280521e-07, "loss": 0.00012373040954116732, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 979, "train_speed(iter/s)": 0.022546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 80.56250286102295, "completions/min_length": 38.0, "epoch": 1.947133283693224, "grad_norm": 0.006286787971024888, "kl": 0.1273193359375, "learning_rate": 9.165327685470898e-07, "loss": 0.00012719148071482778, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 980, "train_speed(iter/s)": 0.022546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.125, "completions/mean_length": 78.40625190734863, "completions/min_length": 32.125, "epoch": 1.9491188880615538, "grad_norm": 0.01634320920442587, "kl": 0.1168212890625, "learning_rate": 9.163581678355354e-07, "loss": 0.00011691331019392237, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 981, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.25, "completions/mean_length": 78.06250333786011, "completions/min_length": 34.5, "epoch": 1.9511044924298835, "grad_norm": 0.058598749258318386, "kl": 0.1265869140625, "learning_rate": 9.161834013629013e-07, "loss": 0.00012664358655456454, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 982, "train_speed(iter/s)": 0.022549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.375, "completions/mean_length": 83.81250286102295, "completions/min_length": 36.0, "epoch": 1.953090096798213, "grad_norm": 0.0061883987722713436, "kl": 0.1102294921875, "learning_rate": 9.160084691987655e-07, "loss": 0.00011022534454241395, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 983, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.75, "completions/mean_length": 87.33333587646484, "completions/min_length": 41.125, "epoch": 1.9550757011665425, "grad_norm": 0.005076111608750147, "kl": 0.11309814453125, "learning_rate": 9.158333714127724e-07, "loss": 0.00011316935706418008, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 984, "train_speed(iter/s)": 0.022547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 76.03125190734863, "completions/min_length": 33.75, "epoch": 1.9570613055348722, "grad_norm": 0.02151332621758537, "kl": 0.10296630859375, "learning_rate": 9.15658108074632e-07, "loss": 0.00010303198359906673, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 985, "train_speed(iter/s)": 0.022547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.5, "completions/mean_length": 87.80208587646484, "completions/min_length": 43.25, "epoch": 1.9590469099032017, "grad_norm": 0.7284794464559009, "kl": 0.1131591796875, "learning_rate": 9.154826792541208e-07, "loss": 0.005956694483757019, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.05653337761759758, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 986, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.375, "completions/mean_length": 75.23958539962769, "completions/min_length": 37.875, "epoch": 1.9610325142715315, "grad_norm": 0.07276071906342804, "kl": 0.167724609375, "learning_rate": 9.153070850210802e-07, "loss": 0.00016750108625274152, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 987, "train_speed(iter/s)": 0.022549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.875, "completions/mean_length": 93.04166889190674, "completions/min_length": 42.625, "epoch": 1.963018118639861, "grad_norm": 0.8704256667366844, "kl": 0.130126953125, "learning_rate": 9.151313254454185e-07, "loss": 0.0001302175223827362, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 988, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.625, "completions/mean_length": 89.47916984558105, "completions/min_length": 43.375, "epoch": 1.9650037230081905, "grad_norm": 0.005230738743073667, "kl": 0.0809326171875, "learning_rate": 9.149554005971092e-07, "loss": 8.098968828562647e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 989, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 83.302086353302, "completions/min_length": 37.5, "epoch": 1.9669893273765202, "grad_norm": 0.0058885007571412495, "kl": 0.09527587890625, "learning_rate": 9.147793105461915e-07, "loss": 9.51946058194153e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 990, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.125, "completions/mean_length": 82.6666693687439, "completions/min_length": 35.75, "epoch": 1.96897493174485, "grad_norm": 0.005708314042677562, "kl": 0.10791015625, "learning_rate": 9.146030553627708e-07, "loss": 0.00010792820830829442, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 991, "train_speed(iter/s)": 0.022544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.625, "completions/mean_length": 85.27083492279053, "completions/min_length": 37.875, "epoch": 1.9709605361131795, "grad_norm": 1.1799515770943712, "kl": 0.13043212890625, "learning_rate": 9.144266351170183e-07, "loss": -0.00883845891803503, "memory(GiB)": 94.21, "reward": 1.6041666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6041666716337204, "rewards/CineAccuracyORM/std": 0.30977265536785126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 992, "train_speed(iter/s)": 0.022542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 74.79166889190674, "completions/min_length": 34.25, "epoch": 1.972946140481509, "grad_norm": 0.007826950674859187, "kl": 0.11181640625, "learning_rate": 9.142500498791701e-07, "loss": 0.00011182423622813076, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 993, "train_speed(iter/s)": 0.022541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 82.84375286102295, "completions/min_length": 35.0, "epoch": 1.9749317448498387, "grad_norm": 1.2607068242976633, "kl": 0.12030029296875, "learning_rate": 9.14073299719529e-07, "loss": -0.004849262535572052, "memory(GiB)": 94.21, "reward": 1.7291666865348816, "reward_std": 0.05103103630244732, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.33328525722026825, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 994, "train_speed(iter/s)": 0.02254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 76.88541984558105, "completions/min_length": 31.5, "epoch": 1.9769173492181684, "grad_norm": 0.007823724574330898, "kl": 0.12518310546875, "learning_rate": 9.138963847084629e-07, "loss": 0.00012523704208433628, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 995, "train_speed(iter/s)": 0.02254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.375, "completions/mean_length": 69.83333539962769, "completions/min_length": 31.125, "epoch": 1.978902953586498, "grad_norm": 0.008872477191515333, "kl": 0.12982177734375, "learning_rate": 9.137193049164053e-07, "loss": 0.00012983387568965554, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 996, "train_speed(iter/s)": 0.022541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 74.97916841506958, "completions/min_length": 32.125, "epoch": 1.9808885579548274, "grad_norm": 0.00861972477172187, "kl": 0.1126708984375, "learning_rate": 9.135420604138557e-07, "loss": 0.00011255948629695922, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 997, "train_speed(iter/s)": 0.022539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.125, "completions/mean_length": 79.17708444595337, "completions/min_length": 36.125, "epoch": 1.9828741623231572, "grad_norm": 0.00820892144127085, "kl": 0.1422119140625, "learning_rate": 9.133646512713787e-07, "loss": 0.00014218440628610551, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 998, "train_speed(iter/s)": 0.022542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 77.16666793823242, "completions/min_length": 37.75, "epoch": 1.9848597666914867, "grad_norm": 0.008523308134671392, "kl": 0.120849609375, "learning_rate": 9.131870775596049e-07, "loss": 0.00012071570381522179, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 999, "train_speed(iter/s)": 0.02254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 83.71875333786011, "completions/min_length": 35.25, "epoch": 1.9868453710598164, "grad_norm": 0.007716056017196739, "kl": 0.11724853515625, "learning_rate": 9.1300933934923e-07, "loss": 0.00011719940084731206, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1000, "train_speed(iter/s)": 0.022539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 79.3854193687439, "completions/min_length": 33.625, "epoch": 1.988830975428146, "grad_norm": 0.08496033932824887, "kl": 0.2257080078125, "learning_rate": 9.128314367110153e-07, "loss": 0.00022642673866357654, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1001, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 81.94791984558105, "completions/min_length": 39.0, "epoch": 1.9908165797964754, "grad_norm": 0.7926007549032419, "kl": 0.1217041015625, "learning_rate": 9.126533697157878e-07, "loss": 0.0036898914258927107, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1002, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.125, "completions/mean_length": 72.55208492279053, "completions/min_length": 34.125, "epoch": 1.9928021841648051, "grad_norm": 0.008641688889488621, "kl": 0.1109619140625, "learning_rate": 9.124751384344399e-07, "loss": 0.00011089236795669422, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1003, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.125, "completions/mean_length": 83.1666669845581, "completions/min_length": 40.25, "epoch": 1.9947877885331349, "grad_norm": 0.008197931670343743, "kl": 0.1312255859375, "learning_rate": 9.122967429379291e-07, "loss": 0.00013110280269756913, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1004, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.0, "completions/mean_length": 85.54166841506958, "completions/min_length": 37.0, "epoch": 1.9967733929014644, "grad_norm": 0.007737891161198206, "kl": 0.118408203125, "learning_rate": 9.121181832972784e-07, "loss": 0.00011833791359094903, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1005, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 72.94791841506958, "completions/min_length": 37.625, "epoch": 1.998758997269794, "grad_norm": 0.008068687150658662, "kl": 0.11572265625, "learning_rate": 9.119394595835764e-07, "loss": 0.00011578643170651048, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1006, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 69.75000190734863, "completions/min_length": 35.625, "epoch": 2.0019856043683295, "grad_norm": 0.007674557960704678, "kl": 0.119384765625, "learning_rate": 9.117605718679765e-07, "loss": 0.00011955931404372677, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1007, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.125, "completions/mean_length": 73.46875190734863, "completions/min_length": 30.75, "epoch": 2.003971208736659, "grad_norm": 0.006536672252896576, "kl": 0.1129150390625, "learning_rate": 9.115815202216981e-07, "loss": 0.00011291628470644355, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1008, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.875, "completions/mean_length": 75.41666889190674, "completions/min_length": 30.25, "epoch": 2.005956813104989, "grad_norm": 0.8521227456772538, "kl": 0.1092529296875, "learning_rate": 9.114023047160253e-07, "loss": 0.002899589715525508, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1009, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 77.03125190734863, "completions/min_length": 38.125, "epoch": 2.0079424174733185, "grad_norm": 0.006690528016540262, "kl": 0.1190185546875, "learning_rate": 9.112229254223077e-07, "loss": 0.00011910950706806034, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1010, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 67.83333539962769, "completions/min_length": 27.25, "epoch": 2.009928021841648, "grad_norm": 0.0056127323151639675, "kl": 0.12060546875, "learning_rate": 9.110433824119598e-07, "loss": 0.00012048385542584583, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1011, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 74.37500190734863, "completions/min_length": 31.625, "epoch": 2.0119136262099775, "grad_norm": 3.354626009773243, "kl": 0.16973876953125, "learning_rate": 9.108636757564618e-07, "loss": 0.00016950443387031555, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.375051774084568, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1012, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.125, "completions/mean_length": 79.09375333786011, "completions/min_length": 35.0, "epoch": 2.0138992305783074, "grad_norm": 0.00575384200275158, "kl": 0.1220703125, "learning_rate": 9.106838055273587e-07, "loss": 0.00012213800800964236, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1013, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 73.59375286102295, "completions/min_length": 33.0, "epoch": 2.015884834946637, "grad_norm": 0.006643985887206167, "kl": 0.11700439453125, "learning_rate": 9.105037717962604e-07, "loss": 0.00011702807387337089, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1014, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 74.78125286102295, "completions/min_length": 34.375, "epoch": 2.0178704393149665, "grad_norm": 0.006479767740960207, "kl": 0.117919921875, "learning_rate": 9.103235746348426e-07, "loss": 0.00011776233441196382, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1015, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.25, "completions/mean_length": 75.37500286102295, "completions/min_length": 30.625, "epoch": 2.019856043683296, "grad_norm": 0.005070033278772015, "kl": 0.10150146484375, "learning_rate": 9.101432141148453e-07, "loss": 0.00010151336755370721, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1016, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 73.51041793823242, "completions/min_length": 37.0, "epoch": 2.021841648051626, "grad_norm": 0.005564099290104957, "kl": 0.08929443359375, "learning_rate": 9.099626903080742e-07, "loss": 8.932495256885886e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1017, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 70.98958587646484, "completions/min_length": 35.125, "epoch": 2.0238272524199554, "grad_norm": 0.006085392073596954, "kl": 0.12060546875, "learning_rate": 9.097820032863992e-07, "loss": 0.00012077610881533474, "memory(GiB)": 94.21, "reward": 1.5, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1018, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 75.84375143051147, "completions/min_length": 32.875, "epoch": 2.025812856788285, "grad_norm": 0.005497172903734264, "kl": 0.0960693359375, "learning_rate": 9.096011531217561e-07, "loss": 9.60447359830141e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1019, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 78.83333587646484, "completions/min_length": 34.375, "epoch": 2.0277984611566144, "grad_norm": 0.923182384940218, "kl": 0.11322021484375, "learning_rate": 9.094201398861451e-07, "loss": -0.005555596202611923, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1020, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 87.31250286102295, "completions/min_length": 43.0, "epoch": 2.029784065524944, "grad_norm": 0.8479972341631841, "kl": 0.1124267578125, "learning_rate": 9.092389636516313e-07, "loss": -0.00932252500206232, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1021, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.0, "completions/mean_length": 73.57291793823242, "completions/min_length": 32.125, "epoch": 2.031769669893274, "grad_norm": 2.0600097902769576, "kl": 0.15057373046875, "learning_rate": 9.090576244903452e-07, "loss": 0.003625646000728011, "memory(GiB)": 94.21, "reward": 1.59375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.59375, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1022, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.375, "completions/mean_length": 83.42708492279053, "completions/min_length": 33.25, "epoch": 2.0337552742616034, "grad_norm": 0.8182778996245744, "kl": 0.1546630859375, "learning_rate": 9.088761224744812e-07, "loss": 0.007634392939507961, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1023, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.625, "completions/mean_length": 87.76041793823242, "completions/min_length": 36.0, "epoch": 2.035740878629933, "grad_norm": 0.913421053950445, "kl": 0.522216796875, "learning_rate": 9.086944576762996e-07, "loss": -0.009745832532644272, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1024, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 77.83333444595337, "completions/min_length": 35.875, "epoch": 2.0377264829982624, "grad_norm": 0.0074229322061054245, "kl": 0.09735107421875, "learning_rate": 9.085126301681247e-07, "loss": 9.729337762109935e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1025, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.625, "completions/mean_length": 81.39583587646484, "completions/min_length": 31.5, "epoch": 2.0397120873665924, "grad_norm": 0.011784362958371148, "kl": 0.10107421875, "learning_rate": 9.083306400223463e-07, "loss": 0.00010111034498549998, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1026, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.0, "completions/mean_length": 72.42708396911621, "completions/min_length": 35.625, "epoch": 2.041697691734922, "grad_norm": 0.007941043755597331, "kl": 0.09197998046875, "learning_rate": 9.081484873114185e-07, "loss": 9.204033995047212e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1027, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.125, "completions/mean_length": 82.03125190734863, "completions/min_length": 41.875, "epoch": 2.0436832961032514, "grad_norm": 0.7966503026283959, "kl": 0.10577392578125, "learning_rate": 9.079661721078597e-07, "loss": -0.0001545312552480027, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166669771075, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1028, "train_speed(iter/s)": 0.022517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.875, "completions/mean_length": 79.12500190734863, "completions/min_length": 36.5, "epoch": 2.045668900471581, "grad_norm": 0.006309244666020647, "kl": 0.09771728515625, "learning_rate": 9.077836944842541e-07, "loss": 9.768415475264192e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1029, "train_speed(iter/s)": 0.022516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.875, "completions/mean_length": 76.51041841506958, "completions/min_length": 33.375, "epoch": 2.047654504839911, "grad_norm": 0.004705715962469841, "kl": 0.12359619140625, "learning_rate": 9.076010545132496e-07, "loss": 0.00012360823166090995, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1030, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.5, "completions/mean_length": 80.72916984558105, "completions/min_length": 35.625, "epoch": 2.0496401092082404, "grad_norm": 0.006474807694392289, "kl": 0.107666015625, "learning_rate": 9.074182522675591e-07, "loss": 0.00010779083822853863, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1031, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.75, "completions/mean_length": 82.38541793823242, "completions/min_length": 34.5, "epoch": 2.05162571357657, "grad_norm": 0.006962552036572687, "kl": 0.1025390625, "learning_rate": 9.0723528781996e-07, "loss": 0.0001025118981488049, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1032, "train_speed(iter/s)": 0.022519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.125, "completions/mean_length": 89.45833587646484, "completions/min_length": 39.25, "epoch": 2.0536113179448994, "grad_norm": 0.004949851198072381, "kl": 0.1192626953125, "learning_rate": 9.070521612432946e-07, "loss": 0.00011945145524805412, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1033, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 84.56250190734863, "completions/min_length": 33.0, "epoch": 2.055596922313229, "grad_norm": 0.6804908403865937, "kl": 0.11773681640625, "learning_rate": 9.068688726104696e-07, "loss": 0.0001176235600723885, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1034, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 84.18750238418579, "completions/min_length": 41.875, "epoch": 2.057582526681559, "grad_norm": 0.005253636511616478, "kl": 0.12042236328125, "learning_rate": 9.066854219944555e-07, "loss": 0.00012024200987070799, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1035, "train_speed(iter/s)": 0.022519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.125, "completions/mean_length": 88.39583587646484, "completions/min_length": 32.25, "epoch": 2.0595681310498883, "grad_norm": 0.84495968668063, "kl": 0.11614990234375, "learning_rate": 9.065018094682885e-07, "loss": -0.004611310549080372, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1036, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 97.18750190734863, "completions/min_length": 42.5, "epoch": 2.061553735418218, "grad_norm": 0.7685448506632714, "kl": 0.1378173828125, "learning_rate": 9.063180351050685e-07, "loss": 0.0009650582214817405, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.708333333954215, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1037, "train_speed(iter/s)": 0.022516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.25, "completions/mean_length": 78.1354193687439, "completions/min_length": 32.75, "epoch": 2.0635393397865474, "grad_norm": 0.008468823906944947, "kl": 0.11431884765625, "learning_rate": 9.0613409897796e-07, "loss": 0.00011438851652201265, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1038, "train_speed(iter/s)": 0.022516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 90.63541793823242, "completions/min_length": 36.375, "epoch": 2.0655249441548773, "grad_norm": 0.007569330285043941, "kl": 0.107666015625, "learning_rate": 9.059500011601917e-07, "loss": 0.00010769099753815681, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1039, "train_speed(iter/s)": 0.022513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.375, "completions/mean_length": 82.1979193687439, "completions/min_length": 32.5, "epoch": 2.067510548523207, "grad_norm": 0.007785800349068557, "kl": 0.12628173828125, "learning_rate": 9.057657417250572e-07, "loss": 0.00012620570487342775, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1040, "train_speed(iter/s)": 0.022511 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 86.55208683013916, "completions/min_length": 34.75, "epoch": 2.0694961528915363, "grad_norm": 0.0074693102772766294, "kl": 0.11376953125, "learning_rate": 9.05581320745914e-07, "loss": 0.00011375600297469646, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1041, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.125, "completions/mean_length": 81.98958539962769, "completions/min_length": 37.0, "epoch": 2.071481757259866, "grad_norm": 0.0065098106774316095, "kl": 0.111083984375, "learning_rate": 9.053967382961838e-07, "loss": 0.00011106421879958361, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1042, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.875, "completions/mean_length": 84.94791793823242, "completions/min_length": 34.5, "epoch": 2.073467361628196, "grad_norm": 0.007556840446950588, "kl": 0.112548828125, "learning_rate": 9.052119944493531e-07, "loss": 0.00011260159953963012, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1043, "train_speed(iter/s)": 0.022504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 84.10416841506958, "completions/min_length": 43.0, "epoch": 2.0754529659965253, "grad_norm": 1.7653653031823724, "kl": 0.12640380859375, "learning_rate": 9.050270892789724e-07, "loss": -0.0049756355583667755, "memory(GiB)": 94.21, "reward": 1.7604166865348816, "reward_std": 0.05779037997126579, "rewards/CineAccuracyORM/mean": 0.7604166697710752, "rewards/CineAccuracyORM/std": 0.1783013790845871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1044, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 87.04166889190674, "completions/min_length": 46.0, "epoch": 2.077438570364855, "grad_norm": 0.9740486323335305, "kl": 0.1143798828125, "learning_rate": 9.048420228586562e-07, "loss": -0.005567749496549368, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1045, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.5, "completions/mean_length": 89.86458683013916, "completions/min_length": 28.75, "epoch": 2.0794241747331843, "grad_norm": 0.0063396094559172375, "kl": 0.1116943359375, "learning_rate": 9.046567952620834e-07, "loss": 0.000111713758087717, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1046, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 92.41666889190674, "completions/min_length": 39.25, "epoch": 2.081409779101514, "grad_norm": 0.8248505321288238, "kl": 0.10968017578125, "learning_rate": 9.044714065629973e-07, "loss": -0.009053878486156464, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166669771075, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1047, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 87.42708587646484, "completions/min_length": 34.875, "epoch": 2.0833953834698438, "grad_norm": 1.248438982243334, "kl": 0.13739013671875, "learning_rate": 9.042858568352048e-07, "loss": 0.0103627173230052, "memory(GiB)": 94.21, "reward": 1.4791666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.4791666679084301, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1048, "train_speed(iter/s)": 0.022501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 90.11458587646484, "completions/min_length": 37.875, "epoch": 2.0853809878381733, "grad_norm": 1.2050207981394843, "kl": 0.11712646484375, "learning_rate": 9.041001461525773e-07, "loss": 0.00585666298866272, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1049, "train_speed(iter/s)": 0.022501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 87.25000286102295, "completions/min_length": 40.0, "epoch": 2.087366592206503, "grad_norm": 0.0072672567395674105, "kl": 0.1226806640625, "learning_rate": 9.039142745890504e-07, "loss": 0.00012268760474398732, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1050, "train_speed(iter/s)": 0.022498 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.125, "completions/mean_length": 90.84375286102295, "completions/min_length": 40.0, "epoch": 2.0893521965748323, "grad_norm": 1.0383994865817225, "kl": 0.112548828125, "learning_rate": 9.037282422186232e-07, "loss": -0.0059965322725474834, "memory(GiB)": 94.21, "reward": 1.8750000149011612, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.8750000074505806, "rewards/CineAccuracyORM/std": 0.17548104748129845, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1051, "train_speed(iter/s)": 0.0225 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.875, "completions/mean_length": 89.78125190734863, "completions/min_length": 34.125, "epoch": 2.0913378009431622, "grad_norm": 0.007294551673038089, "kl": 0.12835693359375, "learning_rate": 9.035420491153595e-07, "loss": 0.00012832277570851147, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1052, "train_speed(iter/s)": 0.022498 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 80.35416841506958, "completions/min_length": 34.875, "epoch": 2.0933234053114917, "grad_norm": 0.007842351601243245, "kl": 0.11505126953125, "learning_rate": 9.033556953533865e-07, "loss": 0.00011521436681505293, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1053, "train_speed(iter/s)": 0.022499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.875, "completions/mean_length": 97.59375190734863, "completions/min_length": 44.25, "epoch": 2.0953090096798213, "grad_norm": 1.067991023922792, "kl": 0.12652587890625, "learning_rate": 9.031691810068958e-07, "loss": 0.0001265754399355501, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393530294299126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1054, "train_speed(iter/s)": 0.022497 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 80.47916793823242, "completions/min_length": 31.5, "epoch": 2.0972946140481508, "grad_norm": 0.009867823516612157, "kl": 0.1175537109375, "learning_rate": 9.029825061501423e-07, "loss": 0.00011762286158045754, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1055, "train_speed(iter/s)": 0.022497 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.625, "completions/mean_length": 94.07291984558105, "completions/min_length": 37.125, "epoch": 2.0992802184164807, "grad_norm": 0.007818309239812216, "kl": 0.11761474609375, "learning_rate": 9.02795670857446e-07, "loss": 0.00011771730351028964, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1056, "train_speed(iter/s)": 0.022494 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 77.2291693687439, "completions/min_length": 40.25, "epoch": 2.1012658227848102, "grad_norm": 0.008170669737652658, "kl": 0.12646484375, "learning_rate": 9.026086752031895e-07, "loss": 0.00012647028779610991, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1057, "train_speed(iter/s)": 0.022496 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 87.06250190734863, "completions/min_length": 31.5, "epoch": 2.1032514271531397, "grad_norm": 0.00757295019159829, "kl": 0.128662109375, "learning_rate": 9.024215192618199e-07, "loss": 0.000128539526485838, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1058, "train_speed(iter/s)": 0.022492 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 80.56250143051147, "completions/min_length": 30.75, "epoch": 2.1052370315214692, "grad_norm": 0.006256571396549088, "kl": 0.14801025390625, "learning_rate": 9.022342031078478e-07, "loss": 0.0001482020306866616, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1059, "train_speed(iter/s)": 0.022493 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 76.38541841506958, "completions/min_length": 28.0, "epoch": 2.1072226358897987, "grad_norm": 0.00860863893703562, "kl": 0.12225341796875, "learning_rate": 9.020467268158481e-07, "loss": 0.00012234578025527298, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1060, "train_speed(iter/s)": 0.022496 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 77.58333587646484, "completions/min_length": 29.75, "epoch": 2.1092082402581287, "grad_norm": 0.009146683724391846, "kl": 0.135986328125, "learning_rate": 9.018590904604588e-07, "loss": 0.00013591634342446923, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1061, "train_speed(iter/s)": 0.022496 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 78.21875333786011, "completions/min_length": 31.375, "epoch": 2.111193844626458, "grad_norm": 0.008593579235551981, "kl": 0.1422119140625, "learning_rate": 9.016712941163823e-07, "loss": 0.00014218749129213393, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1062, "train_speed(iter/s)": 0.022497 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 83.73958492279053, "completions/min_length": 35.625, "epoch": 2.1131794489947877, "grad_norm": 0.007734910252291212, "kl": 0.1141357421875, "learning_rate": 9.014833378583839e-07, "loss": 0.00011415178596507758, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1063, "train_speed(iter/s)": 0.022494 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.375, "completions/mean_length": 85.76041889190674, "completions/min_length": 35.875, "epoch": 2.115165053363117, "grad_norm": 0.007636118890987433, "kl": 0.10968017578125, "learning_rate": 9.012952217612933e-07, "loss": 0.00010967616981361061, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1064, "train_speed(iter/s)": 0.022489 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 88.33333492279053, "completions/min_length": 32.5, "epoch": 2.117150657731447, "grad_norm": 0.007225957331834589, "kl": 0.11090087890625, "learning_rate": 9.011069459000034e-07, "loss": 0.00011098825780209154, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1065, "train_speed(iter/s)": 0.022485 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.75, "completions/mean_length": 79.92708587646484, "completions/min_length": 32.625, "epoch": 2.1191362620997767, "grad_norm": 0.7410801038965984, "kl": 0.10748291015625, "learning_rate": 9.00918510349471e-07, "loss": 0.01757071539759636, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1066, "train_speed(iter/s)": 0.022485 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 83.03125238418579, "completions/min_length": 27.5, "epoch": 2.121121866468106, "grad_norm": 0.007781396418530573, "kl": 0.11456298828125, "learning_rate": 9.007299151847161e-07, "loss": 0.00011462807015050203, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1067, "train_speed(iter/s)": 0.022484 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.25, "completions/mean_length": 71.04166793823242, "completions/min_length": 28.625, "epoch": 2.1231074708364357, "grad_norm": 0.007355446293214044, "kl": 0.10284423828125, "learning_rate": 9.005411604808226e-07, "loss": 0.00010296957771060988, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1068, "train_speed(iter/s)": 0.022487 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.25, "completions/mean_length": 78.66666841506958, "completions/min_length": 35.5, "epoch": 2.1250930752047656, "grad_norm": 0.17747489489888124, "kl": 0.228271484375, "learning_rate": 9.003522463129377e-07, "loss": 0.00022821266611572355, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1069, "train_speed(iter/s)": 0.022487 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.625, "completions/mean_length": 79.28125143051147, "completions/min_length": 29.625, "epoch": 2.127078679573095, "grad_norm": 0.0060307482758721765, "kl": 0.13299560546875, "learning_rate": 9.001631727562723e-07, "loss": 0.0001330701052211225, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1070, "train_speed(iter/s)": 0.022487 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.875, "completions/mean_length": 75.63541841506958, "completions/min_length": 33.125, "epoch": 2.1290642839414247, "grad_norm": 0.007732253928501234, "kl": 0.10595703125, "learning_rate": 8.999739398861005e-07, "loss": 0.00010610777826514095, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1071, "train_speed(iter/s)": 0.022488 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.25, "completions/mean_length": 64.06250143051147, "completions/min_length": 31.75, "epoch": 2.131049888309754, "grad_norm": 0.006459854447195919, "kl": 0.11016845703125, "learning_rate": 8.9978454777776e-07, "loss": 0.00011024588457075879, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1072, "train_speed(iter/s)": 0.022487 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 70.14583587646484, "completions/min_length": 25.75, "epoch": 2.1330354926780837, "grad_norm": 0.016317735798137844, "kl": 0.1470947265625, "learning_rate": 8.995949965066518e-07, "loss": 0.0001472176518291235, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1073, "train_speed(iter/s)": 0.022485 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 66.00000238418579, "completions/min_length": 28.25, "epoch": 2.1350210970464136, "grad_norm": 0.0065803074565239875, "kl": 0.108642578125, "learning_rate": 8.994052861482404e-07, "loss": 0.00010851970000658184, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1074, "train_speed(iter/s)": 0.022487 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 76.14583444595337, "completions/min_length": 38.25, "epoch": 2.137006701414743, "grad_norm": 0.007288344398015291, "kl": 0.10345458984375, "learning_rate": 8.992154167780535e-07, "loss": 0.00010349667718401179, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1075, "train_speed(iter/s)": 0.022487 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 75.72916889190674, "completions/min_length": 32.125, "epoch": 2.1389923057830726, "grad_norm": 0.010876767868097825, "kl": 0.110595703125, "learning_rate": 8.99025388471682e-07, "loss": 0.00011058989184675738, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1076, "train_speed(iter/s)": 0.022489 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.125, "completions/mean_length": 73.35416841506958, "completions/min_length": 28.75, "epoch": 2.140977910151402, "grad_norm": 0.006220464356667581, "kl": 0.097412109375, "learning_rate": 8.988352013047804e-07, "loss": 9.744061389937997e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1077, "train_speed(iter/s)": 0.022486 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.875, "completions/mean_length": 68.70833492279053, "completions/min_length": 26.5, "epoch": 2.142963514519732, "grad_norm": 0.005311529691512592, "kl": 0.0982666015625, "learning_rate": 8.986448553530663e-07, "loss": 9.82498750090599e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1078, "train_speed(iter/s)": 0.022486 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.625, "completions/mean_length": 88.31250333786011, "completions/min_length": 35.375, "epoch": 2.1449491188880616, "grad_norm": 0.0056511354602318455, "kl": 0.104736328125, "learning_rate": 8.984543506923204e-07, "loss": 0.0001046436809701845, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1079, "train_speed(iter/s)": 0.022484 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.375, "completions/mean_length": 76.46875143051147, "completions/min_length": 35.875, "epoch": 2.146934723256391, "grad_norm": 0.005674849337966061, "kl": 0.121826171875, "learning_rate": 8.982636873983866e-07, "loss": 0.00012185898231109604, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1080, "train_speed(iter/s)": 0.022486 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 72.55208587646484, "completions/min_length": 34.875, "epoch": 2.1489203276247206, "grad_norm": 1.0265947895890666, "kl": 0.10137939453125, "learning_rate": 8.980728655471723e-07, "loss": 0.008604519069194794, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1081, "train_speed(iter/s)": 0.022486 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.25, "completions/mean_length": 70.87500095367432, "completions/min_length": 19.5, "epoch": 2.1509059319930506, "grad_norm": 0.005386526977581848, "kl": 0.103363037109375, "learning_rate": 8.978818852146476e-07, "loss": 0.00010324212780687958, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1082, "train_speed(iter/s)": 0.022485 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.625, "completions/mean_length": 83.82291984558105, "completions/min_length": 39.25, "epoch": 2.15289153636138, "grad_norm": 0.006918107163843888, "kl": 0.11614990234375, "learning_rate": 8.976907464768458e-07, "loss": 0.00011630808876361698, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1083, "train_speed(iter/s)": 0.022482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.5, "completions/mean_length": 61.73958444595337, "completions/min_length": 26.25, "epoch": 2.1548771407297096, "grad_norm": 0.00863867048723788, "kl": 0.108795166015625, "learning_rate": 8.974994494098634e-07, "loss": 0.00010880948684643954, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1084, "train_speed(iter/s)": 0.022482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 77.50000381469727, "completions/min_length": 38.0, "epoch": 2.156862745098039, "grad_norm": 0.018856742454225597, "kl": 0.124755859375, "learning_rate": 8.973079940898596e-07, "loss": 0.0001246890751644969, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1085, "train_speed(iter/s)": 0.022483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.25, "completions/mean_length": 67.0104193687439, "completions/min_length": 27.5, "epoch": 2.1588483494663686, "grad_norm": 0.02102008715921952, "kl": 0.1031494140625, "learning_rate": 8.971163805930572e-07, "loss": 0.00010311186633771285, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1086, "train_speed(iter/s)": 0.022486 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 66.42708539962769, "completions/min_length": 31.375, "epoch": 2.1608339538346986, "grad_norm": 0.00669141793201854, "kl": 0.0999755859375, "learning_rate": 8.969246089957414e-07, "loss": 9.997960296459496e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1087, "train_speed(iter/s)": 0.022489 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.0, "completions/mean_length": 68.00000238418579, "completions/min_length": 28.75, "epoch": 2.162819558203028, "grad_norm": 0.01247525483515014, "kl": 0.08660888671875, "learning_rate": 8.967326793742606e-07, "loss": 8.656363934278488e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1088, "train_speed(iter/s)": 0.022493 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.125, "completions/mean_length": 68.50000238418579, "completions/min_length": 29.75, "epoch": 2.1648051625713576, "grad_norm": 0.00547189786184364, "kl": 0.10394287109375, "learning_rate": 8.965405918050263e-07, "loss": 0.00010401105100754648, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1089, "train_speed(iter/s)": 0.022494 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 72.7604193687439, "completions/min_length": 25.5, "epoch": 2.166790766939687, "grad_norm": 1.0410738737807892, "kl": 0.1177978515625, "learning_rate": 8.963483463645124e-07, "loss": 0.012273518368601799, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1090, "train_speed(iter/s)": 0.022492 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.75, "completions/mean_length": 65.38541841506958, "completions/min_length": 21.625, "epoch": 2.168776371308017, "grad_norm": 0.005649499770030795, "kl": 0.0968017578125, "learning_rate": 8.96155943129256e-07, "loss": 9.678161586634815e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1091, "train_speed(iter/s)": 0.022493 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 70.20833492279053, "completions/min_length": 28.375, "epoch": 2.1707619756763465, "grad_norm": 0.006201369966540897, "kl": 0.0999755859375, "learning_rate": 8.95963382175857e-07, "loss": 9.990476246457547e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1092, "train_speed(iter/s)": 0.022495 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 66.96875143051147, "completions/min_length": 26.0, "epoch": 2.172747580044676, "grad_norm": 0.005604728673251856, "kl": 0.08880615234375, "learning_rate": 8.957706635809779e-07, "loss": 8.882778638508171e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1093, "train_speed(iter/s)": 0.022497 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 59.57291793823242, "completions/min_length": 25.75, "epoch": 2.1747331844130056, "grad_norm": 1.473368609186764, "kl": 0.09686279296875, "learning_rate": 8.955777874213443e-07, "loss": 0.0037911073304712772, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1094, "train_speed(iter/s)": 0.022498 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.625, "completions/mean_length": 61.625000953674316, "completions/min_length": 30.75, "epoch": 2.1767187887813355, "grad_norm": 0.007552661565256119, "kl": 0.09954833984375, "learning_rate": 8.953847537737441e-07, "loss": 9.963195043383166e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1095, "train_speed(iter/s)": 0.022501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 66.14583587646484, "completions/min_length": 22.875, "epoch": 2.178704393149665, "grad_norm": 0.005680964975348198, "kl": 0.097900390625, "learning_rate": 8.951915627150282e-07, "loss": 9.784003486856818e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1096, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 76.58333587646484, "completions/min_length": 29.625, "epoch": 2.1806899975179945, "grad_norm": 0.0752183079850825, "kl": 0.2479248046875, "learning_rate": 8.9499821432211e-07, "loss": 0.00024750837474130094, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1097, "train_speed(iter/s)": 0.022506 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.375, "completions/mean_length": 63.8854193687439, "completions/min_length": 21.375, "epoch": 2.182675601886324, "grad_norm": 0.005957281964740377, "kl": 0.09930419921875, "learning_rate": 8.948047086719658e-07, "loss": 9.928665531333536e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1098, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.625, "completions/mean_length": 62.17708492279053, "completions/min_length": 24.375, "epoch": 2.1846612062546535, "grad_norm": 0.006526659169941049, "kl": 0.1005859375, "learning_rate": 8.946110458416343e-07, "loss": 0.00010057906911242753, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1099, "train_speed(iter/s)": 0.022511 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.625, "completions/mean_length": 65.38541889190674, "completions/min_length": 32.0, "epoch": 2.1866468106229835, "grad_norm": 0.004639691676157582, "kl": 0.09539794921875, "learning_rate": 8.944172259082165e-07, "loss": 9.535030403640121e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1100, "train_speed(iter/s)": 0.022513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.5, "completions/mean_length": 64.28125143051147, "completions/min_length": 25.125, "epoch": 2.188632414991313, "grad_norm": 0.005595707415697775, "kl": 0.107666015625, "learning_rate": 8.942232489488768e-07, "loss": 0.0001077161286957562, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1101, "train_speed(iter/s)": 0.022517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.375, "completions/mean_length": 72.50000190734863, "completions/min_length": 27.75, "epoch": 2.1906180193596425, "grad_norm": 0.0057670649819886515, "kl": 0.10455322265625, "learning_rate": 8.940291150408412e-07, "loss": 0.00010457433381816372, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1102, "train_speed(iter/s)": 0.022517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.125, "completions/mean_length": 66.34375238418579, "completions/min_length": 27.875, "epoch": 2.192603623727972, "grad_norm": 0.005926890020697525, "kl": 0.10186767578125, "learning_rate": 8.938348242613985e-07, "loss": 0.00010185446444666013, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1103, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 68.21875143051147, "completions/min_length": 26.25, "epoch": 2.194589228096302, "grad_norm": 1.2148774553492736, "kl": 0.106689453125, "learning_rate": 8.936403766879003e-07, "loss": 0.0001067506818799302, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1104, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.5, "completions/mean_length": 62.895835876464844, "completions/min_length": 21.0, "epoch": 2.1965748324646315, "grad_norm": 0.007109448105471694, "kl": 0.11492919921875, "learning_rate": 8.934457723977601e-07, "loss": 0.0001150758471339941, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1105, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.625, "completions/mean_length": 63.63541889190674, "completions/min_length": 22.5, "epoch": 2.198560436832961, "grad_norm": 0.007050399758887717, "kl": 0.09521484375, "learning_rate": 8.932510114684542e-07, "loss": 9.520860476186499e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1106, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 73.57291793823242, "completions/min_length": 22.625, "epoch": 2.2005460412012905, "grad_norm": 0.006297160309753514, "kl": 0.0894775390625, "learning_rate": 8.930560939775207e-07, "loss": 8.949158655013889e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1107, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 68.83333587646484, "completions/min_length": 31.25, "epoch": 2.2025316455696204, "grad_norm": 0.006087930528336131, "kl": 0.08905029296875, "learning_rate": 8.92861020002561e-07, "loss": 8.9189488789998e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1108, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 69.37500238418579, "completions/min_length": 29.375, "epoch": 2.20451724993795, "grad_norm": 0.00517770403285071, "kl": 0.11773681640625, "learning_rate": 8.926657896212379e-07, "loss": 0.00011782505316659808, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1109, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 65.28125238418579, "completions/min_length": 22.75, "epoch": 2.2065028543062795, "grad_norm": 0.005232882423403717, "kl": 0.10479736328125, "learning_rate": 8.924704029112767e-07, "loss": 0.00010488222324056551, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1110, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 73.22916889190674, "completions/min_length": 27.75, "epoch": 2.208488458674609, "grad_norm": 1.1187840684918657, "kl": 0.110595703125, "learning_rate": 8.922748599504653e-07, "loss": 0.007763413246721029, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1111, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 76.35416889190674, "completions/min_length": 31.375, "epoch": 2.2104740630429385, "grad_norm": 0.005699094021220918, "kl": 0.11663818359375, "learning_rate": 8.920791608166532e-07, "loss": 0.0001166003494290635, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1112, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.875, "completions/mean_length": 64.26041793823242, "completions/min_length": 17.0, "epoch": 2.2124596674112684, "grad_norm": 1.4140946198929851, "kl": 0.0966796875, "learning_rate": 8.918833055877526e-07, "loss": -0.010779529809951782, "memory(GiB)": 94.21, "reward": 1.895833358168602, "reward_std": 0.08330589719116688, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.16199621930718422, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1113, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 64.21875238418579, "completions/min_length": 22.5, "epoch": 2.214445271779598, "grad_norm": 0.007454915580742328, "kl": 0.08990478515625, "learning_rate": 8.916872943417375e-07, "loss": 8.991503273136914e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1114, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.375, "completions/mean_length": 59.437501430511475, "completions/min_length": 17.75, "epoch": 2.2164308761479274, "grad_norm": 0.006675639053861326, "kl": 0.10076904296875, "learning_rate": 8.914911271566444e-07, "loss": 0.0001005775629892014, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1115, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.125, "completions/mean_length": 72.71875190734863, "completions/min_length": 26.875, "epoch": 2.218416480516257, "grad_norm": 0.007704553634753637, "kl": 0.10906982421875, "learning_rate": 8.912948041105714e-07, "loss": 0.00010915064194705337, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1116, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 72.52083587646484, "completions/min_length": 25.75, "epoch": 2.220402084884587, "grad_norm": 1.5760444915876597, "kl": 0.108367919921875, "learning_rate": 8.910983252816793e-07, "loss": 0.022869249805808067, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1117, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 68.47916984558105, "completions/min_length": 23.5, "epoch": 2.2223876892529164, "grad_norm": 0.006970770514677127, "kl": 0.1058349609375, "learning_rate": 8.909016907481899e-07, "loss": 0.00010587855649646372, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1118, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 73.65625286102295, "completions/min_length": 28.0, "epoch": 2.224373293621246, "grad_norm": 0.007271300911421996, "kl": 0.09588623046875, "learning_rate": 8.907049005883882e-07, "loss": 9.595022129360586e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1119, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 70.06250190734863, "completions/min_length": 21.25, "epoch": 2.2263588979895754, "grad_norm": 0.008990944019646025, "kl": 0.1044921875, "learning_rate": 8.905079548806203e-07, "loss": 0.0001044741366058588, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1120, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.125, "completions/mean_length": 73.56250143051147, "completions/min_length": 20.0, "epoch": 2.2283445023579054, "grad_norm": 0.009539653340183514, "kl": 0.12603759765625, "learning_rate": 8.903108537032943e-07, "loss": 0.00012604176299646497, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1121, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.75, "completions/mean_length": 72.21875333786011, "completions/min_length": 17.75, "epoch": 2.230330106726235, "grad_norm": 0.5214478080123781, "kl": 0.1253662109375, "learning_rate": 8.901135971348807e-07, "loss": 0.021506179124116898, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1122, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 70.44791889190674, "completions/min_length": 20.125, "epoch": 2.2323157110945644, "grad_norm": 1.5761155413424663, "kl": 0.1195068359375, "learning_rate": 8.899161852539115e-07, "loss": 0.021353479474782944, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.05974817834794521, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.25145769491791725, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1123, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.875, "completions/mean_length": 65.62500190734863, "completions/min_length": 15.25, "epoch": 2.234301315462894, "grad_norm": 1.179133881102878, "kl": 0.1259765625, "learning_rate": 8.897186181389804e-07, "loss": 0.0004408457316458225, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1124, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.875, "completions/mean_length": 63.76041841506958, "completions/min_length": 16.375, "epoch": 2.2362869198312234, "grad_norm": 0.011007733049022849, "kl": 0.1217041015625, "learning_rate": 8.895208958687435e-07, "loss": 0.00012160721234977245, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1125, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.75, "completions/mean_length": 64.18750190734863, "completions/min_length": 16.875, "epoch": 2.2382725241995534, "grad_norm": 0.714532313149669, "kl": 0.16357421875, "learning_rate": 8.893230185219176e-07, "loss": -0.013636423274874687, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1126, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.125, "completions/mean_length": 76.60416793823242, "completions/min_length": 18.375, "epoch": 2.240258128567883, "grad_norm": 1.320655274004454, "kl": 0.13323974609375, "learning_rate": 8.891249861772826e-07, "loss": 0.007070520427078009, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.31391648203134537, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1127, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.0, "completions/mean_length": 71.16666889190674, "completions/min_length": 23.0, "epoch": 2.2422437329362124, "grad_norm": 0.00492348795733696, "kl": 0.0859375, "learning_rate": 8.88926798913679e-07, "loss": 8.598122803959996e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1128, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 59.25000190734863, "completions/min_length": 23.375, "epoch": 2.244229337304542, "grad_norm": 0.005775537796384591, "kl": 0.11737060546875, "learning_rate": 8.887284568100094e-07, "loss": 0.00011733293649740517, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1129, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.125, "completions/mean_length": 78.80208683013916, "completions/min_length": 21.25, "epoch": 2.246214941672872, "grad_norm": 0.020279422360607837, "kl": 0.1282958984375, "learning_rate": 8.88529959945238e-07, "loss": 0.00012847153993789107, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1130, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 68.73958444595337, "completions/min_length": 19.0, "epoch": 2.2482005460412013, "grad_norm": 0.04876869427415476, "kl": 0.168212890625, "learning_rate": 8.88331308398391e-07, "loss": 0.00016822278848849237, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1131, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.25, "completions/mean_length": 71.41666889190674, "completions/min_length": 16.0, "epoch": 2.250186150409531, "grad_norm": 2.2953937110179563, "kl": 0.15020751953125, "learning_rate": 8.881325022485554e-07, "loss": 0.020766139030456543, "memory(GiB)": 94.21, "reward": 1.6875000149011612, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.2407601661980152, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1132, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.375, "completions/mean_length": 84.04166984558105, "completions/min_length": 28.375, "epoch": 2.2521717547778604, "grad_norm": 0.048150000507869155, "kl": 0.1700439453125, "learning_rate": 8.879335415748803e-07, "loss": 0.00017000603838823736, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1133, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.5, "completions/mean_length": 65.95833492279053, "completions/min_length": 24.375, "epoch": 2.2541573591461903, "grad_norm": 0.030746473142806686, "kl": 0.143310546875, "learning_rate": 8.877344264565764e-07, "loss": 0.00014336864114739, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1134, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.75, "completions/mean_length": 79.82291984558105, "completions/min_length": 21.875, "epoch": 2.25614296351452, "grad_norm": 0.035033529873740685, "kl": 0.14453125, "learning_rate": 8.875351569729155e-07, "loss": 0.00014458972145803273, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1135, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.125, "completions/mean_length": 75.25000190734863, "completions/min_length": 21.75, "epoch": 2.2581285678828493, "grad_norm": 0.5105725162771391, "kl": 0.1651611328125, "learning_rate": 8.873357332032308e-07, "loss": 0.023152565583586693, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1136, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.875, "completions/mean_length": 73.51041889190674, "completions/min_length": 29.625, "epoch": 2.260114172251179, "grad_norm": 1.0343112659956455, "kl": 0.15692138671875, "learning_rate": 8.871361552269176e-07, "loss": -0.0024548075161874294, "memory(GiB)": 94.21, "reward": 1.6145833432674408, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.6145833386108279, "rewards/CineAccuracyORM/std": 0.21529880911111832, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1137, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 71.2604193687439, "completions/min_length": 17.625, "epoch": 2.2620997766195083, "grad_norm": 0.014767302642669088, "kl": 0.09637451171875, "learning_rate": 8.86936423123432e-07, "loss": 9.631804277887568e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1138, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 74.3229193687439, "completions/min_length": 26.75, "epoch": 2.2640853809878383, "grad_norm": 0.6742620185526368, "kl": 0.15142822265625, "learning_rate": 8.867365369722914e-07, "loss": 0.004045408219099045, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1139, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 68.37500143051147, "completions/min_length": 24.375, "epoch": 2.266070985356168, "grad_norm": 0.005572392431531506, "kl": 0.10986328125, "learning_rate": 8.865364968530751e-07, "loss": 0.0001099392757168971, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1140, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.125, "completions/mean_length": 69.83333539962769, "completions/min_length": 25.25, "epoch": 2.2680565897244973, "grad_norm": 0.004426089086485255, "kl": 0.09466552734375, "learning_rate": 8.863363028454231e-07, "loss": 9.467983909416944e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1141, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 92.48958492279053, "completions/min_length": 39.375, "epoch": 2.270042194092827, "grad_norm": 1.2811652679665897, "kl": 0.11492919921875, "learning_rate": 8.861359550290371e-07, "loss": 0.006492177955806255, "memory(GiB)": 94.21, "reward": 1.6354166865348816, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.2934674955904484, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1142, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 83.82291984558105, "completions/min_length": 31.375, "epoch": 2.2720277984611568, "grad_norm": 0.9211205091348875, "kl": 0.1025390625, "learning_rate": 8.859354534836796e-07, "loss": 0.006440839730203152, "memory(GiB)": 94.21, "reward": 1.96875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.05653337761759758, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1143, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.625, "completions/mean_length": 82.26041793823242, "completions/min_length": 23.625, "epoch": 2.2740134028294863, "grad_norm": 1.3242005158688273, "kl": 0.110107421875, "learning_rate": 8.857347982891748e-07, "loss": -0.003253697184845805, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1144, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.375, "completions/mean_length": 79.12500238418579, "completions/min_length": 32.625, "epoch": 2.275999007197816, "grad_norm": 0.007456759149966841, "kl": 0.10943603515625, "learning_rate": 8.855339895254076e-07, "loss": 0.00010936538456007838, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1145, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.625, "completions/mean_length": 68.48958444595337, "completions/min_length": 21.25, "epoch": 2.2779846115661453, "grad_norm": 0.0076247539582459155, "kl": 0.11993408203125, "learning_rate": 8.853330272723242e-07, "loss": 0.00011989235645160079, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1146, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 67.7916693687439, "completions/min_length": 16.5, "epoch": 2.2799702159344752, "grad_norm": 0.005242432177648585, "kl": 0.09405517578125, "learning_rate": 8.851319116099325e-07, "loss": 9.39883611863479e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1147, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 68.90625190734863, "completions/min_length": 22.25, "epoch": 2.2819558203028047, "grad_norm": 0.8355247066094097, "kl": 0.1044921875, "learning_rate": 8.849306426183004e-07, "loss": -0.005248870700597763, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1148, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 73.65625143051147, "completions/min_length": 23.125, "epoch": 2.2839414246711343, "grad_norm": 0.9478221971336297, "kl": 0.096221923828125, "learning_rate": 8.847292203775574e-07, "loss": -0.011721128597855568, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1149, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.75, "completions/mean_length": 77.36458683013916, "completions/min_length": 24.75, "epoch": 2.2859270290394638, "grad_norm": 0.005528089719292132, "kl": 0.09344482421875, "learning_rate": 8.845276449678942e-07, "loss": 9.348720777779818e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1150, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.0, "completions/mean_length": 79.21875238418579, "completions/min_length": 24.875, "epoch": 2.2879126334077933, "grad_norm": 0.7386188458974698, "kl": 0.09613037109375, "learning_rate": 8.843259164695624e-07, "loss": 0.00979701615869999, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1151, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 81.05208587646484, "completions/min_length": 29.125, "epoch": 2.2898982377761232, "grad_norm": 1.348597986088773, "kl": 0.11956787109375, "learning_rate": 8.84124034962874e-07, "loss": -0.0016678448300808668, "memory(GiB)": 94.21, "reward": 1.927083358168602, "reward_std": 0.07654655165970325, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.13653486222028732, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1152, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 73.23958587646484, "completions/min_length": 26.625, "epoch": 2.2918838421444527, "grad_norm": 0.6987493894869504, "kl": 0.1328125, "learning_rate": 8.839220005282026e-07, "loss": 0.021390598267316818, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1153, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 68.93750190734863, "completions/min_length": 24.625, "epoch": 2.2938694465127822, "grad_norm": 0.005944869094262386, "kl": 0.10888671875, "learning_rate": 8.837198132459827e-07, "loss": 0.00010898825712502003, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1154, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.875, "completions/mean_length": 72.36458587646484, "completions/min_length": 29.375, "epoch": 2.2958550508811117, "grad_norm": 0.007026317985682431, "kl": 0.097442626953125, "learning_rate": 8.835174731967087e-07, "loss": 9.741432586451992e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1155, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 70.96875238418579, "completions/min_length": 23.875, "epoch": 2.2978406552494417, "grad_norm": 0.00559194582259412, "kl": 0.1007080078125, "learning_rate": 8.833149804609371e-07, "loss": 0.00010077822662424296, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1156, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.0, "completions/mean_length": 80.87500286102295, "completions/min_length": 27.625, "epoch": 2.299826259617771, "grad_norm": 0.0056649427093886946, "kl": 0.1041259765625, "learning_rate": 8.831123351192844e-07, "loss": 0.00010407360969111323, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1157, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 75.36458539962769, "completions/min_length": 24.75, "epoch": 2.3018118639861007, "grad_norm": 0.7607588873475549, "kl": 0.11187744140625, "learning_rate": 8.829095372524278e-07, "loss": -0.0063299760222435, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1158, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.75, "completions/mean_length": 75.61458539962769, "completions/min_length": 18.5, "epoch": 2.3037974683544302, "grad_norm": 0.6261683644274237, "kl": 0.1658935546875, "learning_rate": 8.827065869411059e-07, "loss": 0.0054514347575604916, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1159, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.375, "completions/mean_length": 74.12500190734863, "completions/min_length": 18.0, "epoch": 2.30578307272276, "grad_norm": 0.013372830515073058, "kl": 0.111083984375, "learning_rate": 8.825034842661171e-07, "loss": 0.00011106727470178157, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1160, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 80.91666984558105, "completions/min_length": 28.875, "epoch": 2.3077686770910897, "grad_norm": 0.011937438269505276, "kl": 0.10406494140625, "learning_rate": 8.823002293083213e-07, "loss": 0.00010405677312519401, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1161, "train_speed(iter/s)": 0.022517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.75, "completions/mean_length": 78.98958539962769, "completions/min_length": 27.0, "epoch": 2.309754281459419, "grad_norm": 0.8855845150495578, "kl": 0.13714599609375, "learning_rate": 8.820968221486382e-07, "loss": 0.005876713898032904, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1162, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 75.35416841506958, "completions/min_length": 25.875, "epoch": 2.3117398858277487, "grad_norm": 0.031529671487283195, "kl": 0.12542724609375, "learning_rate": 8.818932628680491e-07, "loss": 0.0001254369708476588, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1163, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.125, "completions/mean_length": 68.04166889190674, "completions/min_length": 24.25, "epoch": 2.313725490196078, "grad_norm": 0.06521873477939312, "kl": 0.20062255859375, "learning_rate": 8.816895515475948e-07, "loss": 0.00020067018340341747, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1164, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.625, "completions/mean_length": 78.07291984558105, "completions/min_length": 28.25, "epoch": 2.315711094564408, "grad_norm": 0.016632657710600417, "kl": 0.1331787109375, "learning_rate": 8.814856882683774e-07, "loss": 0.0001331377134192735, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1165, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 87.92708683013916, "completions/min_length": 26.0, "epoch": 2.3176966989327377, "grad_norm": 0.012337044537867275, "kl": 0.1123046875, "learning_rate": 8.812816731115594e-07, "loss": 0.00011230561358388513, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1166, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.0, "completions/mean_length": 82.27083683013916, "completions/min_length": 31.0, "epoch": 2.319682303301067, "grad_norm": 0.028205607552615104, "kl": 0.138916015625, "learning_rate": 8.81077506158363e-07, "loss": 0.0001389819517498836, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1167, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 77.87500190734863, "completions/min_length": 28.875, "epoch": 2.3216679076693967, "grad_norm": 0.018319302237655877, "kl": 0.10888671875, "learning_rate": 8.808731874900719e-07, "loss": 0.00010872550774365664, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1168, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.875, "completions/mean_length": 81.60416793823242, "completions/min_length": 31.25, "epoch": 2.3236535120377266, "grad_norm": 0.9703407456113551, "kl": 0.12335205078125, "learning_rate": 8.806687171880296e-07, "loss": 0.0016537992050871253, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1169, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.25, "completions/mean_length": 85.02083683013916, "completions/min_length": 28.875, "epoch": 2.325639116406056, "grad_norm": 1.1244762493953908, "kl": 0.1112060546875, "learning_rate": 8.8046409533364e-07, "loss": 0.005542024038732052, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1170, "train_speed(iter/s)": 0.022516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 76.22916889190674, "completions/min_length": 21.75, "epoch": 2.3276247207743856, "grad_norm": 0.009141530987340777, "kl": 0.11712646484375, "learning_rate": 8.802593220083676e-07, "loss": 0.00011731675476767123, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1171, "train_speed(iter/s)": 0.022514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 81.21875286102295, "completions/min_length": 29.5, "epoch": 2.329610325142715, "grad_norm": 0.008837238644923327, "kl": 0.11541748046875, "learning_rate": 8.80054397293737e-07, "loss": 0.00011541452840901911, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1172, "train_speed(iter/s)": 0.022513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.875, "completions/mean_length": 80.59375190734863, "completions/min_length": 30.875, "epoch": 2.331595929511045, "grad_norm": 0.009101768879644433, "kl": 0.112060546875, "learning_rate": 8.79849321271333e-07, "loss": 0.00011213422840228304, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1173, "train_speed(iter/s)": 0.022516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.25, "completions/mean_length": 86.65625095367432, "completions/min_length": 36.875, "epoch": 2.3335815338793746, "grad_norm": 0.007434057245448074, "kl": 0.11505126953125, "learning_rate": 8.796440940228009e-07, "loss": 0.00011487871961435303, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1174, "train_speed(iter/s)": 0.022512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.875, "completions/mean_length": 92.020836353302, "completions/min_length": 34.0, "epoch": 2.335567138247704, "grad_norm": 1.023240112078257, "kl": 0.124755859375, "learning_rate": 8.794387156298458e-07, "loss": 0.001069599180482328, "memory(GiB)": 94.21, "reward": 1.5520833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.5520833358168602, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1175, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.875, "completions/mean_length": 77.36458539962769, "completions/min_length": 31.0, "epoch": 2.3375527426160336, "grad_norm": 0.007349093758855563, "kl": 0.11334228515625, "learning_rate": 8.792331861742335e-07, "loss": 0.00011330414417898282, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1176, "train_speed(iter/s)": 0.02251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 73.68750143051147, "completions/min_length": 22.0, "epoch": 2.339538346984363, "grad_norm": 0.007021138723531153, "kl": 0.094482421875, "learning_rate": 8.790275057377896e-07, "loss": 9.447755292057991e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1177, "train_speed(iter/s)": 0.022512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.0, "completions/mean_length": 80.36458683013916, "completions/min_length": 34.875, "epoch": 2.341523951352693, "grad_norm": 0.009904770515817793, "kl": 0.10528564453125, "learning_rate": 8.788216744023997e-07, "loss": 0.00010507451952435076, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1178, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 72.85416889190674, "completions/min_length": 18.75, "epoch": 2.3435095557210226, "grad_norm": 0.012841128010172167, "kl": 0.10296630859375, "learning_rate": 8.786156922500098e-07, "loss": 0.00010292684601154178, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1179, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.375, "completions/mean_length": 75.50000286102295, "completions/min_length": 27.125, "epoch": 2.345495160089352, "grad_norm": 0.020190310360136656, "kl": 0.135009765625, "learning_rate": 8.784095593626258e-07, "loss": 0.00013489092816598713, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1180, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.125, "completions/mean_length": 84.47916889190674, "completions/min_length": 29.25, "epoch": 2.347480764457682, "grad_norm": 0.8718207861699887, "kl": 0.12261962890625, "learning_rate": 8.782032758223137e-07, "loss": 0.00225023808889091, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1181, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 84.302086353302, "completions/min_length": 30.875, "epoch": 2.3494663688260116, "grad_norm": 0.026752438334008994, "kl": 0.143310546875, "learning_rate": 8.77996841711199e-07, "loss": 0.0001431004930054769, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1182, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 72.72917032241821, "completions/min_length": 26.25, "epoch": 2.351451973194341, "grad_norm": 1.1784184054246503, "kl": 0.11566162109375, "learning_rate": 8.77790257111468e-07, "loss": -0.0012859385460615158, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1183, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 77.04166889190674, "completions/min_length": 33.5, "epoch": 2.3534375775626706, "grad_norm": 0.01927069965275057, "kl": 0.13763427734375, "learning_rate": 8.775835221053662e-07, "loss": 0.00013751001097261906, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1184, "train_speed(iter/s)": 0.022504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.625, "completions/mean_length": 77.02083539962769, "completions/min_length": 34.625, "epoch": 2.355423181931, "grad_norm": 0.02217662582705634, "kl": 0.13751220703125, "learning_rate": 8.773766367751992e-07, "loss": 0.00013747252523899078, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1185, "train_speed(iter/s)": 0.022506 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.375, "completions/mean_length": 75.31250190734863, "completions/min_length": 25.375, "epoch": 2.35740878629933, "grad_norm": 0.020428484098549613, "kl": 0.10638427734375, "learning_rate": 8.771696012033325e-07, "loss": 0.00010635077342158183, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1186, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 76.13541841506958, "completions/min_length": 34.875, "epoch": 2.3593943906676595, "grad_norm": 0.0428335817202583, "kl": 0.17236328125, "learning_rate": 8.769624154721915e-07, "loss": 0.00017243428737856448, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1187, "train_speed(iter/s)": 0.022502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 77.06250190734863, "completions/min_length": 31.875, "epoch": 2.361379995035989, "grad_norm": 0.02224551925648367, "kl": 0.1319580078125, "learning_rate": 8.767550796642611e-07, "loss": 0.00013200273679103702, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1188, "train_speed(iter/s)": 0.022504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 74.95833444595337, "completions/min_length": 30.625, "epoch": 2.3633655994043186, "grad_norm": 0.017314477823991956, "kl": 0.1260986328125, "learning_rate": 8.76547593862086e-07, "loss": 0.00012607741518877447, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1189, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 80.09375190734863, "completions/min_length": 36.5, "epoch": 2.365351203772648, "grad_norm": 0.8632068783026354, "kl": 0.10430908203125, "learning_rate": 8.763399581482712e-07, "loss": 0.009489011950790882, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666679084301, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1190, "train_speed(iter/s)": 0.022504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.125, "completions/mean_length": 69.94791793823242, "completions/min_length": 27.75, "epoch": 2.367336808140978, "grad_norm": 1.0407693807454945, "kl": 0.1737060546875, "learning_rate": 8.761321726054805e-07, "loss": 0.007885940372943878, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6354166669771075, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1191, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.625, "completions/mean_length": 80.95833587646484, "completions/min_length": 31.5, "epoch": 2.3693224125093075, "grad_norm": 0.9518744178247593, "kl": 0.12188720703125, "learning_rate": 8.759242373164379e-07, "loss": 0.0028071056585758924, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1192, "train_speed(iter/s)": 0.022507 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.375, "completions/mean_length": 81.28125190734863, "completions/min_length": 36.375, "epoch": 2.371308016877637, "grad_norm": 0.008708198488745992, "kl": 0.11004638671875, "learning_rate": 8.757161523639269e-07, "loss": 0.00011010012531187385, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1193, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 76.22916889190674, "completions/min_length": 36.0, "epoch": 2.373293621245967, "grad_norm": 0.9432423355965983, "kl": 0.10760498046875, "learning_rate": 8.755079178307906e-07, "loss": -0.00908761378377676, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1194, "train_speed(iter/s)": 0.022506 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 69.89583492279053, "completions/min_length": 28.125, "epoch": 2.3752792256142965, "grad_norm": 0.008827989042322936, "kl": 0.10565185546875, "learning_rate": 8.752995337999315e-07, "loss": 0.00010562210809439421, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1195, "train_speed(iter/s)": 0.022504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 79.79166984558105, "completions/min_length": 39.25, "epoch": 2.377264829982626, "grad_norm": 0.0059477336151238, "kl": 0.1082763671875, "learning_rate": 8.750910003543117e-07, "loss": 0.00010813317203428596, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1196, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.625, "completions/mean_length": 81.83333587646484, "completions/min_length": 28.25, "epoch": 2.3792504343509555, "grad_norm": 0.005592197007666392, "kl": 0.11285400390625, "learning_rate": 8.74882317576953e-07, "loss": 0.00011294342402834445, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1197, "train_speed(iter/s)": 0.022502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 70.083336353302, "completions/min_length": 28.125, "epoch": 2.381236038719285, "grad_norm": 0.007282324312840882, "kl": 0.10418701171875, "learning_rate": 8.746734855509363e-07, "loss": 0.00010426364315208048, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1198, "train_speed(iter/s)": 0.0225 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 74.09375286102295, "completions/min_length": 26.625, "epoch": 2.383221643087615, "grad_norm": 0.007868494682890831, "kl": 0.1396484375, "learning_rate": 8.744645043594023e-07, "loss": 0.0001394712453475222, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1199, "train_speed(iter/s)": 0.022499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.125, "completions/mean_length": 73.73958492279053, "completions/min_length": 32.0, "epoch": 2.3852072474559445, "grad_norm": 0.008215865347069914, "kl": 0.149169921875, "learning_rate": 8.742553740855505e-07, "loss": 0.000149146027979441, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1200, "train_speed(iter/s)": 0.022502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 71.47916889190674, "completions/min_length": 31.875, "epoch": 2.387192851824274, "grad_norm": 0.0069754904572254086, "kl": 0.11346435546875, "learning_rate": 8.740460948126405e-07, "loss": 0.00011325508239679039, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1201, "train_speed(iter/s)": 0.022499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.875, "completions/mean_length": 66.84375238418579, "completions/min_length": 32.625, "epoch": 2.3891784561926035, "grad_norm": 0.0094413946466077, "kl": 0.14093017578125, "learning_rate": 8.738366666239907e-07, "loss": 0.00014111213386058807, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1202, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 74.54166889190674, "completions/min_length": 38.25, "epoch": 2.391164060560933, "grad_norm": 0.006099917995374272, "kl": 0.10516357421875, "learning_rate": 8.736270896029789e-07, "loss": 0.0001052111474564299, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1203, "train_speed(iter/s)": 0.022504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 68.58333492279053, "completions/min_length": 32.75, "epoch": 2.393149664929263, "grad_norm": 0.00804770697128756, "kl": 0.11505126953125, "learning_rate": 8.734173638330425e-07, "loss": 0.00011503610585350543, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1204, "train_speed(iter/s)": 0.022504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.125, "completions/mean_length": 77.03125286102295, "completions/min_length": 34.625, "epoch": 2.3951352692975925, "grad_norm": 1.0915854298598817, "kl": 0.63726806640625, "learning_rate": 8.732074893976773e-07, "loss": 0.005252781789749861, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1205, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 78.34375286102295, "completions/min_length": 34.125, "epoch": 2.397120873665922, "grad_norm": 0.007907001958771044, "kl": 0.1240234375, "learning_rate": 8.72997466380439e-07, "loss": 0.000123914098367095, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1206, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.75, "completions/mean_length": 67.15625286102295, "completions/min_length": 25.375, "epoch": 2.399106478034252, "grad_norm": 0.00932980057288349, "kl": 0.11688232421875, "learning_rate": 8.727872948649424e-07, "loss": 0.00011681941396091133, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1207, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 72.35416889190674, "completions/min_length": 33.25, "epoch": 2.4010920824025814, "grad_norm": 1.020334589866774, "kl": 0.13568115234375, "learning_rate": 8.725769749348612e-07, "loss": -0.009047575294971466, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1208, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.875, "completions/mean_length": 69.09375190734863, "completions/min_length": 34.25, "epoch": 2.403077686770911, "grad_norm": 1.0897846341075441, "kl": 0.1197509765625, "learning_rate": 8.723665066739281e-07, "loss": -0.005535339470952749, "memory(GiB)": 94.21, "reward": 1.7291666865348816, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.18837061524391174, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1209, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 64.70833539962769, "completions/min_length": 26.625, "epoch": 2.4050632911392404, "grad_norm": 1.1656621547897874, "kl": 0.1461181640625, "learning_rate": 8.721558901659352e-07, "loss": 0.02129194885492325, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1210, "train_speed(iter/s)": 0.022506 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.625, "completions/mean_length": 68.46875286102295, "completions/min_length": 28.125, "epoch": 2.40704889550757, "grad_norm": 0.008905996911485681, "kl": 0.13385009765625, "learning_rate": 8.719451254947333e-07, "loss": 0.00013398613373283297, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1211, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 63.083335876464844, "completions/min_length": 30.75, "epoch": 2.4090344998759, "grad_norm": 0.9979978206550499, "kl": 0.11883544921875, "learning_rate": 8.717342127442324e-07, "loss": 0.00011886656284332275, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666679084301, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1212, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.375, "completions/mean_length": 74.68750286102295, "completions/min_length": 28.75, "epoch": 2.4110201042442294, "grad_norm": 1.4113740934437338, "kl": 0.16485595703125, "learning_rate": 8.715231519984014e-07, "loss": -0.0006730010500177741, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.30885961651802063, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1213, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.875, "completions/mean_length": 68.80208492279053, "completions/min_length": 32.0, "epoch": 2.413005708612559, "grad_norm": 0.009435189902738072, "kl": 0.1397705078125, "learning_rate": 8.713119433412681e-07, "loss": 0.0001397547748638317, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1214, "train_speed(iter/s)": 0.02251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.5, "completions/mean_length": 72.9166693687439, "completions/min_length": 33.125, "epoch": 2.4149913129808884, "grad_norm": 0.008912927790464232, "kl": 0.130126953125, "learning_rate": 8.71100586856919e-07, "loss": 0.00013003393542021513, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1215, "train_speed(iter/s)": 0.022507 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.75, "completions/mean_length": 67.37500238418579, "completions/min_length": 31.375, "epoch": 2.416976917349218, "grad_norm": 0.007215197280285389, "kl": 0.09991455078125, "learning_rate": 8.708890826294997e-07, "loss": 0.00010004551586462185, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1216, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.125, "completions/mean_length": 74.36458683013916, "completions/min_length": 33.625, "epoch": 2.418962521717548, "grad_norm": 0.00732358925408056, "kl": 0.12322998046875, "learning_rate": 8.706774307432147e-07, "loss": 0.00012320814130362123, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1217, "train_speed(iter/s)": 0.02251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 65.77083539962769, "completions/min_length": 31.5, "epoch": 2.4209481260858774, "grad_norm": 0.008629798265974205, "kl": 0.134765625, "learning_rate": 8.704656312823271e-07, "loss": 0.00013469200348481536, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1218, "train_speed(iter/s)": 0.022512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 70.4166693687439, "completions/min_length": 28.75, "epoch": 2.422933730454207, "grad_norm": 0.007550538981952436, "kl": 0.12774658203125, "learning_rate": 8.702536843311585e-07, "loss": 0.0001277501869481057, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1219, "train_speed(iter/s)": 0.022512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.5, "completions/mean_length": 71.92708444595337, "completions/min_length": 28.75, "epoch": 2.424919334822537, "grad_norm": 0.007348547344810667, "kl": 0.105010986328125, "learning_rate": 8.7004158997409e-07, "loss": 0.00010493632726138458, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1220, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 68.14583492279053, "completions/min_length": 26.875, "epoch": 2.4269049391908664, "grad_norm": 0.007929975208294991, "kl": 0.12506103515625, "learning_rate": 8.698293482955605e-07, "loss": 0.00012495600094553083, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1221, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 70.95833539962769, "completions/min_length": 27.25, "epoch": 2.428890543559196, "grad_norm": 0.008878034930621323, "kl": 0.12506103515625, "learning_rate": 8.69616959380068e-07, "loss": 0.00012495438568294048, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1222, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 66.65625143051147, "completions/min_length": 27.375, "epoch": 2.4308761479275254, "grad_norm": 0.00811625898495114, "kl": 0.1143798828125, "learning_rate": 8.694044233121693e-07, "loss": 0.00011435621127020568, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1223, "train_speed(iter/s)": 0.02251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 65.88541841506958, "completions/min_length": 27.125, "epoch": 2.432861752295855, "grad_norm": 0.007651631684715641, "kl": 0.10736083984375, "learning_rate": 8.691917401764792e-07, "loss": 0.00010723310697358102, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1224, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.75, "completions/mean_length": 69.45833492279053, "completions/min_length": 26.0, "epoch": 2.434847356664185, "grad_norm": 0.007440471633059999, "kl": 0.1180419921875, "learning_rate": 8.689789100576716e-07, "loss": 0.00011803848610725254, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1225, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 78.33333539962769, "completions/min_length": 31.125, "epoch": 2.4368329610325143, "grad_norm": 0.0076252891489103514, "kl": 0.1207275390625, "learning_rate": 8.687659330404789e-07, "loss": 0.00012083293404430151, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1226, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.875, "completions/mean_length": 67.48958587646484, "completions/min_length": 31.25, "epoch": 2.438818565400844, "grad_norm": 0.7727236106868278, "kl": 0.110595703125, "learning_rate": 8.685528092096914e-07, "loss": -0.0126656424254179, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.30977265536785126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1227, "train_speed(iter/s)": 0.022511 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 70.75000190734863, "completions/min_length": 28.75, "epoch": 2.4408041697691734, "grad_norm": 0.006787463818572936, "kl": 0.10882568359375, "learning_rate": 8.683395386501585e-07, "loss": 0.00010873173596337438, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1228, "train_speed(iter/s)": 0.022513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 71.61458587646484, "completions/min_length": 23.375, "epoch": 2.442789774137503, "grad_norm": 0.8975252343855715, "kl": 0.126220703125, "learning_rate": 8.681261214467877e-07, "loss": 0.012133456766605377, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1229, "train_speed(iter/s)": 0.022514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 63.177086353302, "completions/min_length": 29.125, "epoch": 2.444775378505833, "grad_norm": 1.1452085521087776, "kl": 0.15301513671875, "learning_rate": 8.67912557684545e-07, "loss": 0.016196615993976593, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.30977265536785126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1230, "train_speed(iter/s)": 0.022514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.375, "completions/mean_length": 61.12500238418579, "completions/min_length": 28.375, "epoch": 2.4467609828741623, "grad_norm": 0.005251437163449944, "kl": 0.1148681640625, "learning_rate": 8.676988474484547e-07, "loss": 0.00011488603195175529, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1231, "train_speed(iter/s)": 0.022515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.125, "completions/mean_length": 57.88541889190674, "completions/min_length": 27.25, "epoch": 2.448746587242492, "grad_norm": 0.007546645666077838, "kl": 0.1295166015625, "learning_rate": 8.674849908235993e-07, "loss": 0.00012949170195497572, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1232, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.75, "completions/mean_length": 62.01041793823242, "completions/min_length": 30.25, "epoch": 2.450732191610822, "grad_norm": 0.005556366281487293, "kl": 0.12762451171875, "learning_rate": 8.672709878951198e-07, "loss": 0.00012759763922076672, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1233, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 63.00000238418579, "completions/min_length": 32.0, "epoch": 2.4527177959791513, "grad_norm": 0.014333723316308819, "kl": 0.123809814453125, "learning_rate": 8.670568387482152e-07, "loss": 0.00012387729657348245, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1234, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 66.57291841506958, "completions/min_length": 28.375, "epoch": 2.454703400347481, "grad_norm": 0.9524225547703068, "kl": 0.0809326171875, "learning_rate": 8.66842543468143e-07, "loss": 0.0014120943378657103, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1235, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.5, "completions/mean_length": 60.833335399627686, "completions/min_length": 24.0, "epoch": 2.4566890047158103, "grad_norm": 0.005431258183396817, "kl": 0.097686767578125, "learning_rate": 8.666281021402187e-07, "loss": 9.764005517354235e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1236, "train_speed(iter/s)": 0.022519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 70.31250238418579, "completions/min_length": 30.75, "epoch": 2.45867460908414, "grad_norm": 1.2921519935369679, "kl": 0.11627197265625, "learning_rate": 8.66413514849816e-07, "loss": -0.009187010116875172, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1237, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 68.55208587646484, "completions/min_length": 30.625, "epoch": 2.4606602134524698, "grad_norm": 0.00583919048751687, "kl": 0.1014404296875, "learning_rate": 8.661987816823663e-07, "loss": 0.00010138032666873187, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1238, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 66.23958539962769, "completions/min_length": 29.875, "epoch": 2.4626458178207993, "grad_norm": 0.011416457497404964, "kl": 0.12127685546875, "learning_rate": 8.659839027233602e-07, "loss": 0.00012118097947677597, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1239, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 67.44791841506958, "completions/min_length": 29.75, "epoch": 2.464631422189129, "grad_norm": 0.8326084365128613, "kl": 0.110595703125, "learning_rate": 8.65768878058345e-07, "loss": -0.0035335025750100613, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1240, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.125, "completions/mean_length": 60.79166841506958, "completions/min_length": 27.625, "epoch": 2.4666170265574583, "grad_norm": 0.006716772724864029, "kl": 0.1087646484375, "learning_rate": 8.655537077729268e-07, "loss": 0.00010880798799917102, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1241, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.25, "completions/mean_length": 63.75000238418579, "completions/min_length": 34.75, "epoch": 2.468602630925788, "grad_norm": 0.0052529777711340575, "kl": 0.09698486328125, "learning_rate": 8.653383919527695e-07, "loss": 9.685917757451534e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1242, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.25, "completions/mean_length": 58.062500953674316, "completions/min_length": 25.75, "epoch": 2.4705882352941178, "grad_norm": 2.2772604945789503, "kl": 0.11431884765625, "learning_rate": 8.65122930683595e-07, "loss": -0.007184515707194805, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1243, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.5, "completions/mean_length": 62.04166793823242, "completions/min_length": 22.125, "epoch": 2.4725738396624473, "grad_norm": 0.01782565112665753, "kl": 0.120697021484375, "learning_rate": 8.649073240511829e-07, "loss": 0.00012075152335455641, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1244, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.75, "completions/mean_length": 62.67708492279053, "completions/min_length": 27.375, "epoch": 2.4745594440307768, "grad_norm": 0.005790817326568715, "kl": 0.1043701171875, "learning_rate": 8.646915721413707e-07, "loss": 0.0001043564043357037, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1245, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 64.78125238418579, "completions/min_length": 25.5, "epoch": 2.4765450483991067, "grad_norm": 0.005827619480205973, "kl": 0.1199951171875, "learning_rate": 8.644756750400542e-07, "loss": 0.00011991198698524386, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1246, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.5, "completions/mean_length": 64.76041889190674, "completions/min_length": 29.25, "epoch": 2.4785306527674362, "grad_norm": 0.9690639469618161, "kl": 0.120849609375, "learning_rate": 8.642596328331864e-07, "loss": -0.0034683975391089916, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.3829289712011814, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1247, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 68.79166793823242, "completions/min_length": 32.5, "epoch": 2.4805162571357657, "grad_norm": 1.3023367059067472, "kl": 0.100341796875, "learning_rate": 8.640434456067784e-07, "loss": -0.0025977070908993483, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.05103103630244732, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.3245695158839226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1248, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.75, "completions/mean_length": 69.8854193687439, "completions/min_length": 27.75, "epoch": 2.4825018615040952, "grad_norm": 0.007727919870984804, "kl": 0.110107421875, "learning_rate": 8.638271134468987e-07, "loss": 0.00011016281496267766, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1249, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.875, "completions/mean_length": 72.58333683013916, "completions/min_length": 32.625, "epoch": 2.4844874658724247, "grad_norm": 0.004938654901134946, "kl": 0.08905029296875, "learning_rate": 8.636106364396743e-07, "loss": 8.90390801941976e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1250, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 65.95833539962769, "completions/min_length": 30.75, "epoch": 2.4864730702407547, "grad_norm": 0.8056809430110817, "kl": 0.1300048828125, "learning_rate": 8.633940146712887e-07, "loss": -0.005963850766420364, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1251, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 69.34375238418579, "completions/min_length": 30.25, "epoch": 2.488458674609084, "grad_norm": 0.6215788283193011, "kl": 0.11474609375, "learning_rate": 8.63177248227984e-07, "loss": 0.00595555966719985, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1252, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 67.27083539962769, "completions/min_length": 31.25, "epoch": 2.4904442789774137, "grad_norm": 0.9085952655668573, "kl": 0.1231689453125, "learning_rate": 8.629603371960597e-07, "loss": 0.00015979260206222534, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1253, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.625, "completions/mean_length": 68.18750286102295, "completions/min_length": 31.625, "epoch": 2.4924298833457432, "grad_norm": 1.3074962345170322, "kl": 0.127197265625, "learning_rate": 8.627432816618723e-07, "loss": -0.0011720409383997321, "memory(GiB)": 94.21, "reward": 1.7500000149011612, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.1657295897603035, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1254, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.125, "completions/mean_length": 70.05208492279053, "completions/min_length": 35.5, "epoch": 2.4944154877140727, "grad_norm": 0.004037832876464606, "kl": 0.10247802734375, "learning_rate": 8.625260817118365e-07, "loss": 0.00010241218842566013, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1255, "train_speed(iter/s)": 0.022531 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 70.20833539962769, "completions/min_length": 26.75, "epoch": 2.4964010920824027, "grad_norm": 0.005016021323085701, "kl": 0.10919189453125, "learning_rate": 8.623087374324243e-07, "loss": 0.00010927939729299396, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1256, "train_speed(iter/s)": 0.022533 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 66.56250190734863, "completions/min_length": 27.0, "epoch": 2.498386696450732, "grad_norm": 1.164837192050835, "kl": 0.1170654296875, "learning_rate": 8.620912489101648e-07, "loss": 0.0018511468078941107, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.3829289712011814, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1257, "train_speed(iter/s)": 0.022533 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.875, "completions/mean_length": 67.79166841506958, "completions/min_length": 29.875, "epoch": 2.5003723008190617, "grad_norm": 0.007688784232444334, "kl": 0.10491943359375, "learning_rate": 8.618736162316452e-07, "loss": 0.00010482000652700663, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1258, "train_speed(iter/s)": 0.022535 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 69.72916841506958, "completions/min_length": 27.0, "epoch": 2.5023579051873917, "grad_norm": 0.004010491728274601, "kl": 0.09417724609375, "learning_rate": 8.616558394835094e-07, "loss": 9.424250310985371e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1259, "train_speed(iter/s)": 0.022537 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 67.3229193687439, "completions/min_length": 28.5, "epoch": 2.504343509555721, "grad_norm": 0.00876373767103568, "kl": 0.109619140625, "learning_rate": 8.614379187524592e-07, "loss": 0.00010951112199109048, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1260, "train_speed(iter/s)": 0.022538 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.0, "completions/mean_length": 65.14583539962769, "completions/min_length": 27.125, "epoch": 2.5063291139240507, "grad_norm": 0.01383530453205163, "kl": 0.093170166015625, "learning_rate": 8.612198541252533e-07, "loss": 9.319156379206106e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1261, "train_speed(iter/s)": 0.022536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.25, "completions/mean_length": 71.40625190734863, "completions/min_length": 31.0, "epoch": 2.50831471829238, "grad_norm": 0.004555176220282828, "kl": 0.087493896484375, "learning_rate": 8.610016456887081e-07, "loss": 8.742274803807959e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1262, "train_speed(iter/s)": 0.022537 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.25, "completions/mean_length": 79.64583587646484, "completions/min_length": 27.0, "epoch": 2.5103003226607097, "grad_norm": 0.7435270165886403, "kl": 0.12066650390625, "learning_rate": 8.60783293529697e-07, "loss": 0.004668924957513809, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1263, "train_speed(iter/s)": 0.022536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 67.28125333786011, "completions/min_length": 35.5, "epoch": 2.5122859270290396, "grad_norm": 0.6936743294366742, "kl": 0.09375, "learning_rate": 8.605647977351504e-07, "loss": 0.011062691919505596, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1264, "train_speed(iter/s)": 0.022537 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 77.12500143051147, "completions/min_length": 36.0, "epoch": 2.514271531397369, "grad_norm": 1.3547422065193302, "kl": 0.12725830078125, "learning_rate": 8.603461583920565e-07, "loss": -0.007573738694190979, "memory(GiB)": 94.21, "reward": 1.6979166865348816, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3217491842806339, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1265, "train_speed(iter/s)": 0.022539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.875, "completions/mean_length": 80.9791693687439, "completions/min_length": 34.375, "epoch": 2.5162571357656986, "grad_norm": 0.999756075272307, "kl": 0.11419677734375, "learning_rate": 8.6012737558746e-07, "loss": 0.012018587440252304, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1266, "train_speed(iter/s)": 0.02254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 73.57291984558105, "completions/min_length": 32.5, "epoch": 2.518242740134028, "grad_norm": 0.01325856595419211, "kl": 0.11181640625, "learning_rate": 8.599084494084632e-07, "loss": 0.000111827437649481, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1267, "train_speed(iter/s)": 0.022543 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 69.15625190734863, "completions/min_length": 32.25, "epoch": 2.5202283445023577, "grad_norm": 0.019721392582024988, "kl": 0.12646484375, "learning_rate": 8.596893799422254e-07, "loss": 0.0001263372105313465, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1268, "train_speed(iter/s)": 0.022542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.625, "completions/mean_length": 84.01041984558105, "completions/min_length": 33.375, "epoch": 2.5222139488706876, "grad_norm": 0.02195426559168214, "kl": 0.14727783203125, "learning_rate": 8.594701672759624e-07, "loss": 0.0001472388976253569, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1269, "train_speed(iter/s)": 0.022542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.875, "completions/mean_length": 102.91666889190674, "completions/min_length": 37.25, "epoch": 2.524199553239017, "grad_norm": 0.5849893807496976, "kl": 0.1099853515625, "learning_rate": 8.592508114969478e-07, "loss": -0.013651542365550995, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1270, "train_speed(iter/s)": 0.022539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.625, "completions/mean_length": 80.43750286102295, "completions/min_length": 34.375, "epoch": 2.5261851576073466, "grad_norm": 0.014336507152132232, "kl": 0.1121826171875, "learning_rate": 8.590313126925117e-07, "loss": 0.0001122748653870076, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1271, "train_speed(iter/s)": 0.022539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/mean_length": 77.53125286102295, "completions/min_length": 32.0, "epoch": 2.5281707619756766, "grad_norm": 0.882991996349277, "kl": 0.12274169921875, "learning_rate": 8.588116709500413e-07, "loss": 0.0012082557659596205, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1272, "train_speed(iter/s)": 0.022539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.125, "completions/mean_length": 84.11458587646484, "completions/min_length": 24.5, "epoch": 2.530156366344006, "grad_norm": 0.011855462864836377, "kl": 0.11419677734375, "learning_rate": 8.585918863569806e-07, "loss": 0.00011433433974161744, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1273, "train_speed(iter/s)": 0.022538 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 78.77083587646484, "completions/min_length": 33.625, "epoch": 2.5321419707123356, "grad_norm": 0.010006827491617286, "kl": 0.1190185546875, "learning_rate": 8.583719590008307e-07, "loss": 0.00011909760360140353, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1274, "train_speed(iter/s)": 0.022538 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.375, "completions/mean_length": 85.02083492279053, "completions/min_length": 31.625, "epoch": 2.534127575080665, "grad_norm": 0.8140748755303191, "kl": 0.110107421875, "learning_rate": 8.581518889691492e-07, "loss": 0.008183024823665619, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1275, "train_speed(iter/s)": 0.022537 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.875, "completions/mean_length": 80.47916841506958, "completions/min_length": 31.5, "epoch": 2.5361131794489946, "grad_norm": 0.6115519651841012, "kl": 0.1204833984375, "learning_rate": 8.579316763495508e-07, "loss": -5.078440153738484e-05, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1276, "train_speed(iter/s)": 0.022537 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 270.375, "completions/mean_length": 93.114586353302, "completions/min_length": 33.75, "epoch": 2.5380987838173246, "grad_norm": 0.530750422750695, "kl": 0.1123046875, "learning_rate": 8.577113212297067e-07, "loss": 0.023251429200172424, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.05103103816509247, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 1277, "train_speed(iter/s)": 0.022532 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 82.00000190734863, "completions/min_length": 35.625, "epoch": 2.540084388185654, "grad_norm": 0.9372763006868186, "kl": 0.120849609375, "learning_rate": 8.574908236973453e-07, "loss": -0.002739655552431941, "memory(GiB)": 94.21, "reward": 1.96875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.05653337761759758, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1278, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 85.67708492279053, "completions/min_length": 38.125, "epoch": 2.5420699925539836, "grad_norm": 1.031068644172506, "kl": 0.1212158203125, "learning_rate": 8.572701838402509e-07, "loss": 0.0028132374864071608, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.30885961651802063, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1279, "train_speed(iter/s)": 0.022531 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.25, "completions/mean_length": 82.20833539962769, "completions/min_length": 29.25, "epoch": 2.544055596922313, "grad_norm": 0.6829913359240926, "kl": 0.1195068359375, "learning_rate": 8.570494017462654e-07, "loss": -0.012835456989705563, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1280, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.75, "completions/mean_length": 70.9166693687439, "completions/min_length": 32.125, "epoch": 2.5460412012906426, "grad_norm": 0.007156827865647327, "kl": 0.11328125, "learning_rate": 8.568284775032866e-07, "loss": 0.00011326756066409871, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1281, "train_speed(iter/s)": 0.022532 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.625, "completions/mean_length": 86.48958587646484, "completions/min_length": 32.25, "epoch": 2.5480268056589725, "grad_norm": 0.891820701300227, "kl": 0.14813232421875, "learning_rate": 8.566074111992691e-07, "loss": 0.007472541183233261, "memory(GiB)": 94.21, "reward": 1.5729166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.5729166669771075, "rewards/CineAccuracyORM/std": 0.3624799847602844, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1282, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 80.51041793823242, "completions/min_length": 27.25, "epoch": 2.550012410027302, "grad_norm": 1.2947927361767624, "kl": 0.1209716796875, "learning_rate": 8.563862029222244e-07, "loss": -0.00583060784265399, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.17046867683529854, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1283, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 78.23958587646484, "completions/min_length": 25.875, "epoch": 2.5519980143956316, "grad_norm": 0.009804443375801785, "kl": 0.1531982421875, "learning_rate": 8.561648527602202e-07, "loss": 0.0001531544839963317, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1284, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.375, "completions/mean_length": 78.87500143051147, "completions/min_length": 27.125, "epoch": 2.5539836187639615, "grad_norm": 0.008180539122622494, "kl": 0.1741943359375, "learning_rate": 8.559433608013803e-07, "loss": 0.00017420492076780647, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1285, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 72.27083492279053, "completions/min_length": 33.625, "epoch": 2.555969223132291, "grad_norm": 0.008897538748529256, "kl": 0.122314453125, "learning_rate": 8.557217271338859e-07, "loss": 0.00012234285532031208, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1286, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 80.17708587646484, "completions/min_length": 36.75, "epoch": 2.5579548275006205, "grad_norm": 0.008184283066285509, "kl": 0.1473388671875, "learning_rate": 8.554999518459738e-07, "loss": 0.0001473360462114215, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1287, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 82.25000190734863, "completions/min_length": 38.0, "epoch": 2.55994043186895, "grad_norm": 1.35495675413901, "kl": 0.1729736328125, "learning_rate": 8.552780350259377e-07, "loss": 0.004792386200278997, "memory(GiB)": 94.21, "reward": 1.53125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.53125, "rewards/CineAccuracyORM/std": 0.31764985248446465, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1288, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.375, "completions/mean_length": 72.72916984558105, "completions/min_length": 29.875, "epoch": 2.5619260362372795, "grad_norm": 0.009232165518929135, "kl": 0.15264892578125, "learning_rate": 8.55055976762127e-07, "loss": 0.00015288709255401045, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1289, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 87.40625286102295, "completions/min_length": 34.0, "epoch": 2.5639116406056095, "grad_norm": 0.008074296664395918, "kl": 0.14410400390625, "learning_rate": 8.548337771429483e-07, "loss": 0.00014415831537917256, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1290, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.125, "completions/mean_length": 77.17708587646484, "completions/min_length": 30.875, "epoch": 2.565897244973939, "grad_norm": 0.7284026919730846, "kl": 0.1558837890625, "learning_rate": 8.546114362568639e-07, "loss": -0.006634535267949104, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1291, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.125, "completions/mean_length": 65.59375143051147, "completions/min_length": 31.875, "epoch": 2.5678828493422685, "grad_norm": 0.009507296785289696, "kl": 0.1470947265625, "learning_rate": 8.543889541923924e-07, "loss": 0.00014710548566654325, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1292, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 74.54166793823242, "completions/min_length": 31.25, "epoch": 2.569868453710598, "grad_norm": 1.3247329599925473, "kl": 0.14898681640625, "learning_rate": 8.541663310381086e-07, "loss": 0.0009728459408506751, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1293, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.875, "completions/mean_length": 70.927086353302, "completions/min_length": 26.625, "epoch": 2.5718540580789275, "grad_norm": 0.009207493064240839, "kl": 0.15557861328125, "learning_rate": 8.539435668826436e-07, "loss": 0.00015577912563458085, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1294, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.125, "completions/mean_length": 75.45833492279053, "completions/min_length": 32.625, "epoch": 2.5738396624472575, "grad_norm": 0.009062648059217483, "kl": 0.1727294921875, "learning_rate": 8.537206618146846e-07, "loss": 0.0001725937909213826, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1295, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 76.66666793823242, "completions/min_length": 27.75, "epoch": 2.575825266815587, "grad_norm": 1.066817069442844, "kl": 0.15185546875, "learning_rate": 8.534976159229748e-07, "loss": 0.009802289307117462, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.30977265536785126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1296, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 82.37500095367432, "completions/min_length": 34.875, "epoch": 2.5778108711839165, "grad_norm": 0.8241548990271572, "kl": 0.15191650390625, "learning_rate": 8.532744292963137e-07, "loss": -0.005012545734643936, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1297, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 76.00000190734863, "completions/min_length": 29.625, "epoch": 2.5797964755522464, "grad_norm": 0.7848137503011022, "kl": 0.15087890625, "learning_rate": 8.530511020235564e-07, "loss": -0.0070297615602612495, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1298, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 77.09375238418579, "completions/min_length": 38.375, "epoch": 2.581782079920576, "grad_norm": 0.95367213580266, "kl": 0.13055419921875, "learning_rate": 8.528276341936145e-07, "loss": 0.009501341730356216, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666679084301, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1299, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 78.43750190734863, "completions/min_length": 28.125, "epoch": 2.5837676842889055, "grad_norm": 0.9737161878568172, "kl": 0.1533203125, "learning_rate": 8.52604025895455e-07, "loss": -0.00982861127704382, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1300, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 77.64583587646484, "completions/min_length": 32.625, "epoch": 2.585753288657235, "grad_norm": 1.0194936374565085, "kl": 0.14801025390625, "learning_rate": 8.523802772181015e-07, "loss": 0.0013373075053095818, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666679084301, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1301, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 71.88541793823242, "completions/min_length": 24.75, "epoch": 2.5877388930255645, "grad_norm": 0.008911836301347677, "kl": 0.15283203125, "learning_rate": 8.52156388250633e-07, "loss": 0.00015283057291526347, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1302, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.5, "completions/mean_length": 89.21875286102295, "completions/min_length": 34.0, "epoch": 2.5897244973938944, "grad_norm": 0.007574798749012008, "kl": 0.14727783203125, "learning_rate": 8.519323590821843e-07, "loss": 0.00014703706256113946, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1303, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.75, "completions/mean_length": 77.47916746139526, "completions/min_length": 31.625, "epoch": 2.591710101762224, "grad_norm": 0.00863429389958237, "kl": 0.15069580078125, "learning_rate": 8.517081898019464e-07, "loss": 0.00015076817362569273, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1304, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 82.48958587646484, "completions/min_length": 31.375, "epoch": 2.5936957061305534, "grad_norm": 1.0320039224761541, "kl": 0.17291259765625, "learning_rate": 8.514838804991659e-07, "loss": -0.00018205369997303933, "memory(GiB)": 94.21, "reward": 1.6458333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1305, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.875, "completions/mean_length": 73.37500190734863, "completions/min_length": 33.25, "epoch": 2.595681310498883, "grad_norm": 1.0379692642717284, "kl": 0.1519775390625, "learning_rate": 8.51259431263145e-07, "loss": 0.00015195335436146706, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1306, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 79.50000286102295, "completions/min_length": 38.25, "epoch": 2.5976669148672125, "grad_norm": 0.008103738078906093, "kl": 0.1436767578125, "learning_rate": 8.510348421832419e-07, "loss": 0.00014363221998792142, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1307, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 79.95833492279053, "completions/min_length": 34.375, "epoch": 2.5996525192355424, "grad_norm": 1.0338789266755384, "kl": 0.13372802734375, "learning_rate": 8.508101133488701e-07, "loss": 0.009784862399101257, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1308, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 76.25000286102295, "completions/min_length": 35.0, "epoch": 2.601638123603872, "grad_norm": 1.119029014093334, "kl": 0.158935546875, "learning_rate": 8.50585244849499e-07, "loss": 0.0003059332666452974, "memory(GiB)": 94.21, "reward": 1.9687500149011612, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.9687500074505806, "rewards/CineAccuracyORM/std": 0.08474057167768478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1309, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 70.47916889190674, "completions/min_length": 28.0, "epoch": 2.6036237279722014, "grad_norm": 0.5193173980277985, "kl": 0.1607666015625, "learning_rate": 8.503602367746537e-07, "loss": 0.015041163191199303, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1310, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.375, "completions/mean_length": 79.65625238418579, "completions/min_length": 28.0, "epoch": 2.6056093323405314, "grad_norm": 0.9321107861342776, "kl": 0.15869140625, "learning_rate": 8.501350892139144e-07, "loss": 0.00015891841030679643, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1311, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 70.04166841506958, "completions/min_length": 22.625, "epoch": 2.607594936708861, "grad_norm": 0.9320382310567638, "kl": 0.1514892578125, "learning_rate": 8.499098022569176e-07, "loss": 0.0013471394777297974, "memory(GiB)": 94.21, "reward": 1.5520833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.5520833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1312, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.375, "completions/mean_length": 80.06250190734863, "completions/min_length": 35.125, "epoch": 2.6095805410771904, "grad_norm": 0.00856575391455546, "kl": 0.144775390625, "learning_rate": 8.496843759933546e-07, "loss": 0.0001444444787921384, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1313, "train_speed(iter/s)": 0.022524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 85.11458683013916, "completions/min_length": 36.625, "epoch": 2.61156614544552, "grad_norm": 0.0076121562291528635, "kl": 0.1314697265625, "learning_rate": 8.494588105129723e-07, "loss": 0.0001313299871981144, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1314, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.375, "completions/mean_length": 82.31250190734863, "completions/min_length": 35.875, "epoch": 2.6135517498138494, "grad_norm": 0.007664222850183941, "kl": 0.125732421875, "learning_rate": 8.492331059055733e-07, "loss": 0.0001255877286894247, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1315, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 80.61458587646484, "completions/min_length": 34.0, "epoch": 2.6155373541821794, "grad_norm": 1.0865390814622746, "kl": 0.12030029296875, "learning_rate": 8.490072622610155e-07, "loss": 0.0038716073613613844, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1316, "train_speed(iter/s)": 0.022523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 80.10416841506958, "completions/min_length": 25.25, "epoch": 2.617522958550509, "grad_norm": 0.008505398848228332, "kl": 0.1298828125, "learning_rate": 8.487812796692119e-07, "loss": 0.00012996768055018038, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1317, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.125, "completions/mean_length": 84.3854193687439, "completions/min_length": 36.5, "epoch": 2.6195085629188384, "grad_norm": 0.00893929579214178, "kl": 0.11041259765625, "learning_rate": 8.485551582201311e-07, "loss": 0.0001104215916711837, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1318, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.625, "completions/mean_length": 76.70833492279053, "completions/min_length": 22.75, "epoch": 2.621494167287168, "grad_norm": 0.008245143619473348, "kl": 0.126220703125, "learning_rate": 8.483288980037968e-07, "loss": 0.00012604551739059389, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1319, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.375, "completions/mean_length": 80.18750190734863, "completions/min_length": 28.625, "epoch": 2.6234797716554974, "grad_norm": 0.008405254164002814, "kl": 0.13531494140625, "learning_rate": 8.481024991102881e-07, "loss": 0.00013513855810742825, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1320, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.75, "completions/mean_length": 80.37500429153442, "completions/min_length": 29.25, "epoch": 2.6254653760238273, "grad_norm": 0.016090463890943034, "kl": 0.134521484375, "learning_rate": 8.478759616297391e-07, "loss": 0.00013450313417706639, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1321, "train_speed(iter/s)": 0.022519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.75, "completions/mean_length": 76.92708396911621, "completions/min_length": 27.625, "epoch": 2.627450980392157, "grad_norm": 0.007823547707000309, "kl": 0.1368408203125, "learning_rate": 8.476492856523395e-07, "loss": 0.0001368635566905141, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1322, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.25, "completions/mean_length": 73.27083587646484, "completions/min_length": 26.25, "epoch": 2.6294365847604864, "grad_norm": 0.008968148464996984, "kl": 0.13140869140625, "learning_rate": 8.474224712683336e-07, "loss": 0.00013138602662365884, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1323, "train_speed(iter/s)": 0.022517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 72.65625190734863, "completions/min_length": 32.75, "epoch": 2.6314221891288163, "grad_norm": 0.7785698642165746, "kl": 0.1202392578125, "learning_rate": 8.471955185680211e-07, "loss": -0.006980721838772297, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666679084301, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1324, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.75, "completions/mean_length": 85.31250238418579, "completions/min_length": 42.25, "epoch": 2.633407793497146, "grad_norm": 0.9217489896409802, "kl": 0.12884521484375, "learning_rate": 8.469684276417568e-07, "loss": -0.00790985394269228, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.770833333954215, "rewards/CineAccuracyORM/std": 0.04865618050098419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1325, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 83.41666984558105, "completions/min_length": 34.5, "epoch": 2.6353933978654753, "grad_norm": 0.7917415869369991, "kl": 0.146484375, "learning_rate": 8.467411985799501e-07, "loss": 0.010213149711489677, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1326, "train_speed(iter/s)": 0.022517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 90.020836353302, "completions/min_length": 37.625, "epoch": 2.637379002233805, "grad_norm": 0.005110986229138953, "kl": 0.12506103515625, "learning_rate": 8.465138314730665e-07, "loss": 0.00012517454160843045, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1327, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 239.5, "completions/mean_length": 84.54167032241821, "completions/min_length": 29.125, "epoch": 2.6393646066021343, "grad_norm": 1.0625792399177012, "kl": 0.1378173828125, "learning_rate": 8.462863264116249e-07, "loss": 0.012299126014113426, "memory(GiB)": 94.21, "reward": 1.6562500149011612, "reward_std": 0.0765465535223484, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.23100870847702026, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 1328, "train_speed(iter/s)": 0.022513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.625, "completions/mean_length": 89.43750238418579, "completions/min_length": 29.375, "epoch": 2.6413502109704643, "grad_norm": 0.006112201117864251, "kl": 0.107666015625, "learning_rate": 8.460586834862003e-07, "loss": 0.00010770794324344024, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1329, "train_speed(iter/s)": 0.022512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.25, "completions/mean_length": 94.5416693687439, "completions/min_length": 34.125, "epoch": 2.643335815338794, "grad_norm": 1.5566139667025687, "kl": 0.1217041015625, "learning_rate": 8.458309027874221e-07, "loss": 0.016946181654930115, "memory(GiB)": 94.21, "reward": 1.7812500149011612, "reward_std": 0.05779037997126579, "rewards/CineAccuracyORM/mean": 0.7812500037252903, "rewards/CineAccuracyORM/std": 0.2281883768737316, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1330, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.5, "completions/mean_length": 89.97916889190674, "completions/min_length": 29.75, "epoch": 2.6453214197071233, "grad_norm": 0.771765800031731, "kl": 0.15472412109375, "learning_rate": 8.456029844059749e-07, "loss": 0.0006598147447220981, "memory(GiB)": 94.21, "reward": 1.5520833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.5520833358168602, "rewards/CineAccuracyORM/std": 0.3624799847602844, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1331, "train_speed(iter/s)": 0.02251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 77.05208539962769, "completions/min_length": 35.25, "epoch": 2.647307024075453, "grad_norm": 1.5266480852141266, "kl": 0.14337158203125, "learning_rate": 8.453749284325975e-07, "loss": 0.00491324020549655, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.05974817834794521, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.15789688751101494, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1332, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 82.52083492279053, "completions/min_length": 35.0, "epoch": 2.6492926284437823, "grad_norm": 0.006688470649768745, "kl": 0.12078857421875, "learning_rate": 8.451467349580843e-07, "loss": 0.00012085602793376893, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1333, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 77.22916984558105, "completions/min_length": 29.875, "epoch": 2.6512782328121123, "grad_norm": 0.01877478238714203, "kl": 0.12115478515625, "learning_rate": 8.449184040732835e-07, "loss": 0.00012115540448576212, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1334, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.125, "completions/mean_length": 77.44791841506958, "completions/min_length": 23.375, "epoch": 2.653263837180442, "grad_norm": 0.008819124035832295, "kl": 0.11065673828125, "learning_rate": 8.446899358690988e-07, "loss": 0.00011072001507272944, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1335, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.25, "completions/mean_length": 75.08333587646484, "completions/min_length": 24.125, "epoch": 2.6552494415487713, "grad_norm": 0.013503528681694781, "kl": 0.111328125, "learning_rate": 8.444613304364884e-07, "loss": 0.00011119978444185108, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1336, "train_speed(iter/s)": 0.022506 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.375, "completions/mean_length": 98.88541889190674, "completions/min_length": 44.0, "epoch": 2.6572350459171012, "grad_norm": 0.7448557592854417, "kl": 0.1416015625, "learning_rate": 8.442325878664647e-07, "loss": 0.0029394521843641996, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1337, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 234.0, "completions/mean_length": 80.23958539962769, "completions/min_length": 21.75, "epoch": 2.6592206502854308, "grad_norm": 0.43129873660356244, "kl": 0.1107177734375, "learning_rate": 8.440037082500952e-07, "loss": 0.023522820323705673, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.05103103816509247, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 1338, "train_speed(iter/s)": 0.022502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.625, "completions/mean_length": 89.90625190734863, "completions/min_length": 36.125, "epoch": 2.6612062546537603, "grad_norm": 0.8433296350736127, "kl": 0.1353759765625, "learning_rate": 8.437746916785016e-07, "loss": 0.0008302840287797153, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1339, "train_speed(iter/s)": 0.022502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.125, "completions/mean_length": 86.94791984558105, "completions/min_length": 32.125, "epoch": 2.6631918590220898, "grad_norm": 0.9609011623011292, "kl": 0.144287109375, "learning_rate": 8.435455382428604e-07, "loss": -0.0032931778114289045, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1340, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.125, "completions/mean_length": 77.04166889190674, "completions/min_length": 25.875, "epoch": 2.6651774633904193, "grad_norm": 1.1867880233310772, "kl": 0.12060546875, "learning_rate": 8.433162480344025e-07, "loss": -0.0025646924041211605, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1341, "train_speed(iter/s)": 0.022502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.125, "completions/mean_length": 74.37500095367432, "completions/min_length": 32.0, "epoch": 2.6671630677587492, "grad_norm": 0.8745896428759916, "kl": 0.1317138671875, "learning_rate": 8.430868211444132e-07, "loss": 0.003729822114109993, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1342, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 73.39583444595337, "completions/min_length": 26.5, "epoch": 2.6691486721270787, "grad_norm": 0.9216401697430731, "kl": 0.12255859375, "learning_rate": 8.428572576642323e-07, "loss": 0.0001722034066915512, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.708333333954215, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1343, "train_speed(iter/s)": 0.022504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 81.20833492279053, "completions/min_length": 26.25, "epoch": 2.6711342764954082, "grad_norm": 0.04291237396572143, "kl": 0.180908203125, "learning_rate": 8.426275576852537e-07, "loss": 0.0001807756198104471, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1344, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 85.08333492279053, "completions/min_length": 38.875, "epoch": 2.6731198808637378, "grad_norm": 0.006270111741976449, "kl": 0.113525390625, "learning_rate": 8.423977212989262e-07, "loss": 0.00011347224790370092, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1345, "train_speed(iter/s)": 0.022499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 79.73958492279053, "completions/min_length": 38.375, "epoch": 2.6751054852320673, "grad_norm": 0.0067943967459486165, "kl": 0.116455078125, "learning_rate": 8.421677485967522e-07, "loss": 0.0001163799170171842, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1346, "train_speed(iter/s)": 0.022501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.75, "completions/mean_length": 85.79166793823242, "completions/min_length": 37.75, "epoch": 2.677091089600397, "grad_norm": 2.0153539815768373, "kl": 0.1279296875, "learning_rate": 8.419376396702891e-07, "loss": -0.0022262874990701675, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1347, "train_speed(iter/s)": 0.022499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 71.18750143051147, "completions/min_length": 24.375, "epoch": 2.6790766939687267, "grad_norm": 0.7440739006692163, "kl": 0.120849609375, "learning_rate": 8.41707394611148e-07, "loss": 0.000641676306258887, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1348, "train_speed(iter/s)": 0.022499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.375, "completions/mean_length": 77.95833492279053, "completions/min_length": 33.5, "epoch": 2.6810622983370562, "grad_norm": 1.4485607817041966, "kl": 0.1287841796875, "learning_rate": 8.414770135109944e-07, "loss": 0.025922708213329315, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.05779037997126579, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1349, "train_speed(iter/s)": 0.0225 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 68.17708539962769, "completions/min_length": 20.625, "epoch": 2.683047902705386, "grad_norm": 0.007698389524162958, "kl": 0.11895751953125, "learning_rate": 8.41246496461548e-07, "loss": 0.00011914019705727696, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1350, "train_speed(iter/s)": 0.022499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 71.97916793823242, "completions/min_length": 23.75, "epoch": 2.6850335070737157, "grad_norm": 0.00861903647237541, "kl": 0.11956787109375, "learning_rate": 8.410158435545824e-07, "loss": 0.0001197346136905253, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1351, "train_speed(iter/s)": 0.022499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.125, "completions/mean_length": 74.1041693687439, "completions/min_length": 26.875, "epoch": 2.687019111442045, "grad_norm": 0.006559222700376209, "kl": 0.111083984375, "learning_rate": 8.407850548819256e-07, "loss": 0.00011097540846094489, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1352, "train_speed(iter/s)": 0.022501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 79.20833539962769, "completions/min_length": 35.5, "epoch": 2.6890047158103747, "grad_norm": 0.007154168608604736, "kl": 0.11968994140625, "learning_rate": 8.405541305354595e-07, "loss": 0.00011971910134889185, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1353, "train_speed(iter/s)": 0.022501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.5, "completions/mean_length": 86.81250190734863, "completions/min_length": 29.625, "epoch": 2.690990320178704, "grad_norm": 1.1629349061949692, "kl": 0.12652587890625, "learning_rate": 8.403230706071199e-07, "loss": -0.004309754353016615, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1354, "train_speed(iter/s)": 0.022499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 72.15625238418579, "completions/min_length": 30.5, "epoch": 2.692975924547034, "grad_norm": 0.007050255414054574, "kl": 0.135009765625, "learning_rate": 8.400918751888968e-07, "loss": 0.0001351383834844455, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1355, "train_speed(iter/s)": 0.022497 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 78.18750143051147, "completions/min_length": 32.0, "epoch": 2.6949615289153637, "grad_norm": 0.006916079687601611, "kl": 0.12237548828125, "learning_rate": 8.39860544372834e-07, "loss": 0.00012238250928930938, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1356, "train_speed(iter/s)": 0.022497 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.875, "completions/mean_length": 71.93750238418579, "completions/min_length": 35.375, "epoch": 2.696947133283693, "grad_norm": 1.3044029231440082, "kl": 0.116943359375, "learning_rate": 8.396290782510291e-07, "loss": -0.0025276965461671352, "memory(GiB)": 94.21, "reward": 1.8854166865348816, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.16290925815701485, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1357, "train_speed(iter/s)": 0.022497 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.625, "completions/mean_length": 83.58333492279053, "completions/min_length": 29.75, "epoch": 2.6989327376520227, "grad_norm": 0.92937170599385, "kl": 0.16668701171875, "learning_rate": 8.393974769156341e-07, "loss": 0.01582413725554943, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1358, "train_speed(iter/s)": 0.022497 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 70.26041889190674, "completions/min_length": 31.875, "epoch": 2.700918342020352, "grad_norm": 1.086936993087712, "kl": 0.1446533203125, "learning_rate": 8.391657404588539e-07, "loss": 0.0037132850848138332, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1359, "train_speed(iter/s)": 0.0225 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.625, "completions/mean_length": 85.17708587646484, "completions/min_length": 37.0, "epoch": 2.702903946388682, "grad_norm": 0.007170555789073165, "kl": 0.1513671875, "learning_rate": 8.389338689729482e-07, "loss": 0.00015143706696107984, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.45695383101701736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1360, "train_speed(iter/s)": 0.022501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 79.07291889190674, "completions/min_length": 37.5, "epoch": 2.7048895507570117, "grad_norm": 0.006994287834309996, "kl": 0.14501953125, "learning_rate": 8.387018625502296e-07, "loss": 0.00014508981257677078, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1361, "train_speed(iter/s)": 0.022503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 71.71875238418579, "completions/min_length": 30.625, "epoch": 2.706875155125341, "grad_norm": 1.062936932162798, "kl": 0.123779296875, "learning_rate": 8.384697212830651e-07, "loss": 0.0001237119286088273, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.04865618050098419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1362, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.875, "completions/mean_length": 71.79166841506958, "completions/min_length": 28.0, "epoch": 2.708860759493671, "grad_norm": 0.006636822809046916, "kl": 0.12030029296875, "learning_rate": 8.382374452638752e-07, "loss": 0.0001202690982609056, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1363, "train_speed(iter/s)": 0.022504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.375, "completions/mean_length": 74.47916984558105, "completions/min_length": 29.625, "epoch": 2.7108463638620006, "grad_norm": 0.007775758167551887, "kl": 0.135498046875, "learning_rate": 8.380050345851337e-07, "loss": 0.0001355545682599768, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1364, "train_speed(iter/s)": 0.022505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 71.90625095367432, "completions/min_length": 29.5, "epoch": 2.71283196823033, "grad_norm": 0.008164639734205177, "kl": 0.1463623046875, "learning_rate": 8.377724893393681e-07, "loss": 0.00014629887300543487, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1365, "train_speed(iter/s)": 0.022506 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 74.96875143051147, "completions/min_length": 29.5, "epoch": 2.7148175725986596, "grad_norm": 0.842683376715099, "kl": 0.15594482421875, "learning_rate": 8.375398096191599e-07, "loss": 0.0029901652596890926, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1366, "train_speed(iter/s)": 0.022506 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 69.19791889190674, "completions/min_length": 24.0, "epoch": 2.716803176966989, "grad_norm": 0.006092739181903076, "kl": 0.14398193359375, "learning_rate": 8.373069955171439e-07, "loss": 0.00014385800750460476, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1367, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 70.80208683013916, "completions/min_length": 31.25, "epoch": 2.718788781335319, "grad_norm": 0.9550216108791946, "kl": 0.17108154296875, "learning_rate": 8.370740471260083e-07, "loss": -0.012372470460832119, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1368, "train_speed(iter/s)": 0.022507 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 67.25000190734863, "completions/min_length": 31.75, "epoch": 2.7207743857036486, "grad_norm": 1.500281898064778, "kl": 0.125, "learning_rate": 8.368409645384948e-07, "loss": 0.010152310132980347, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.2486373633146286, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1369, "train_speed(iter/s)": 0.022506 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 74.27083587646484, "completions/min_length": 27.625, "epoch": 2.722759990071978, "grad_norm": 0.008196801744404895, "kl": 0.17449951171875, "learning_rate": 8.366077478473986e-07, "loss": 0.00017460837261751294, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1370, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 68.84375238418579, "completions/min_length": 24.125, "epoch": 2.7247455944403076, "grad_norm": 0.007700150006095348, "kl": 0.1231689453125, "learning_rate": 8.36374397145568e-07, "loss": 0.00012310505553614348, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1371, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 66.86458492279053, "completions/min_length": 25.875, "epoch": 2.726731198808637, "grad_norm": 0.0058778998107458765, "kl": 0.12078857421875, "learning_rate": 8.361409125259052e-07, "loss": 0.00012079622683813795, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1372, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 61.79166793823242, "completions/min_length": 22.625, "epoch": 2.728716803176967, "grad_norm": 0.009208210840519383, "kl": 0.1243896484375, "learning_rate": 8.359072940813654e-07, "loss": 0.00012440350838005543, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1373, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 71.1666693687439, "completions/min_length": 27.0, "epoch": 2.7307024075452966, "grad_norm": 1.030550533546693, "kl": 0.12689208984375, "learning_rate": 8.35673541904957e-07, "loss": 0.0001268411724595353, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393530294299126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1374, "train_speed(iter/s)": 0.022507 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 68.92708444595337, "completions/min_length": 28.375, "epoch": 2.732688011913626, "grad_norm": 0.007006007264362935, "kl": 0.12786865234375, "learning_rate": 8.354396560897417e-07, "loss": 0.00012788604362867773, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1375, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 71.10416793823242, "completions/min_length": 32.375, "epoch": 2.734673616281956, "grad_norm": 1.4262557443690458, "kl": 0.13214111328125, "learning_rate": 8.352056367288343e-07, "loss": 0.006337085738778114, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1376, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.625, "completions/mean_length": 69.39583492279053, "completions/min_length": 31.875, "epoch": 2.7366592206502856, "grad_norm": 0.8361035810946986, "kl": 0.19073486328125, "learning_rate": 8.349714839154034e-07, "loss": 0.004298907704651356, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1377, "train_speed(iter/s)": 0.022511 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 73.91666889190674, "completions/min_length": 24.5, "epoch": 2.738644825018615, "grad_norm": 0.005415316153608817, "kl": 0.142578125, "learning_rate": 8.347371977426698e-07, "loss": 0.00014272108091972768, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1378, "train_speed(iter/s)": 0.022513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 65.11458683013916, "completions/min_length": 27.875, "epoch": 2.7406304293869446, "grad_norm": 0.006202513753180842, "kl": 0.1195068359375, "learning_rate": 8.34502778303908e-07, "loss": 0.00011957129754591733, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1379, "train_speed(iter/s)": 0.022513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 80.42708587646484, "completions/min_length": 31.875, "epoch": 2.742616033755274, "grad_norm": 0.0053600651643142695, "kl": 0.136474609375, "learning_rate": 8.342682256924452e-07, "loss": 0.00013654532085638493, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1380, "train_speed(iter/s)": 0.022512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 74.59375286102295, "completions/min_length": 29.625, "epoch": 2.744601638123604, "grad_norm": 0.0054725932049495525, "kl": 0.12518310546875, "learning_rate": 8.340335400016622e-07, "loss": 0.00012522964971140027, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1381, "train_speed(iter/s)": 0.02251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 67.85416746139526, "completions/min_length": 27.5, "epoch": 2.7465872424919335, "grad_norm": 1.3390968288556966, "kl": 0.12908935546875, "learning_rate": 8.337987213249919e-07, "loss": -0.003195242490619421, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1382, "train_speed(iter/s)": 0.022508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.125, "completions/mean_length": 62.8229193687439, "completions/min_length": 23.0, "epoch": 2.748572846860263, "grad_norm": 0.02902153182439986, "kl": 0.1741943359375, "learning_rate": 8.33563769755921e-07, "loss": 0.0001743856118991971, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1383, "train_speed(iter/s)": 0.022509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 73.41666984558105, "completions/min_length": 27.875, "epoch": 2.7505584512285925, "grad_norm": 0.00896930133056037, "kl": 0.13958740234375, "learning_rate": 8.333286853879886e-07, "loss": 0.0001395608705934137, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1384, "train_speed(iter/s)": 0.02251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 67.54166841506958, "completions/min_length": 25.875, "epoch": 2.752544055596922, "grad_norm": 0.007423741642248186, "kl": 0.11932373046875, "learning_rate": 8.330934683147868e-07, "loss": 0.00011926879960810766, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1385, "train_speed(iter/s)": 0.022512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 68.35416889190674, "completions/min_length": 26.5, "epoch": 2.754529659965252, "grad_norm": 1.5686116768359737, "kl": 0.115478515625, "learning_rate": 8.328581186299603e-07, "loss": -0.0058824447914958, "memory(GiB)": 94.21, "reward": 1.7812500149011612, "reward_std": 0.05779037997126579, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.2805779278278351, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1386, "train_speed(iter/s)": 0.022513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.125, "completions/mean_length": 63.989585399627686, "completions/min_length": 25.75, "epoch": 2.7565152643335815, "grad_norm": 0.9989886668778429, "kl": 0.11767578125, "learning_rate": 8.326226364272076e-07, "loss": -0.016778334975242615, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1387, "train_speed(iter/s)": 0.022513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.25, "completions/mean_length": 67.802086353302, "completions/min_length": 28.375, "epoch": 2.758500868701911, "grad_norm": 1.0287269146830489, "kl": 0.11572265625, "learning_rate": 8.323870218002782e-07, "loss": 0.0013600036036223173, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166669771075, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1388, "train_speed(iter/s)": 0.022512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.5, "completions/mean_length": 73.14583444595337, "completions/min_length": 26.875, "epoch": 2.760486473070241, "grad_norm": 0.006112785978903015, "kl": 0.12384033203125, "learning_rate": 8.32151274842976e-07, "loss": 0.00012375880032777786, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1389, "train_speed(iter/s)": 0.022512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 73.51041889190674, "completions/min_length": 28.75, "epoch": 2.7624720774385705, "grad_norm": 1.15875894898111, "kl": 0.14385986328125, "learning_rate": 8.319153956491567e-07, "loss": -0.012648469768464565, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.05974818021059036, "rewards/CineAccuracyORM/mean": 0.8541666679084301, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1390, "train_speed(iter/s)": 0.022514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 74.98958587646484, "completions/min_length": 34.0, "epoch": 2.7644576818069, "grad_norm": 0.689123238107339, "kl": 0.116455078125, "learning_rate": 8.31679384312729e-07, "loss": -0.011478595435619354, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1391, "train_speed(iter/s)": 0.022515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 69.37500286102295, "completions/min_length": 27.75, "epoch": 2.7664432861752295, "grad_norm": 0.8361020109355061, "kl": 0.12188720703125, "learning_rate": 8.314432409276537e-07, "loss": 0.012938725762069225, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1392, "train_speed(iter/s)": 0.022515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.375, "completions/mean_length": 71.97916984558105, "completions/min_length": 24.0, "epoch": 2.768428890543559, "grad_norm": 0.006949124885682619, "kl": 0.13018798828125, "learning_rate": 8.312069655879447e-07, "loss": 0.00013012596173211932, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1393, "train_speed(iter/s)": 0.022516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.625, "completions/mean_length": 79.42708539962769, "completions/min_length": 33.75, "epoch": 2.770414494911889, "grad_norm": 0.889382183754225, "kl": 0.155517578125, "learning_rate": 8.309705583876682e-07, "loss": -0.011101406067609787, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1394, "train_speed(iter/s)": 0.022517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.5, "completions/mean_length": 70.35416841506958, "completions/min_length": 31.125, "epoch": 2.7724000992802185, "grad_norm": 0.006303712871865013, "kl": 0.15771484375, "learning_rate": 8.307340194209434e-07, "loss": 0.00015773381164763123, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1395, "train_speed(iter/s)": 0.022519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 77.8541693687439, "completions/min_length": 26.5, "epoch": 2.774385703648548, "grad_norm": 0.007990167338355657, "kl": 0.12969970703125, "learning_rate": 8.304973487819408e-07, "loss": 0.00012961360334884375, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1396, "train_speed(iter/s)": 0.022519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 67.48958539962769, "completions/min_length": 28.875, "epoch": 2.7763713080168775, "grad_norm": 0.008385126680202701, "kl": 0.1513671875, "learning_rate": 8.302605465648846e-07, "loss": 0.00015145835641305894, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1397, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 73.32291889190674, "completions/min_length": 29.0, "epoch": 2.778356912385207, "grad_norm": 0.005962248547681376, "kl": 0.15325927734375, "learning_rate": 8.300236128640506e-07, "loss": 0.00015337191871367395, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1398, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 76.54166793823242, "completions/min_length": 32.375, "epoch": 2.780342516753537, "grad_norm": 0.0073952386973508534, "kl": 0.13641357421875, "learning_rate": 8.297865477737671e-07, "loss": 0.0001365236093988642, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1399, "train_speed(iter/s)": 0.022519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 71.52083539962769, "completions/min_length": 34.25, "epoch": 2.7823281211218664, "grad_norm": 0.008862816460142224, "kl": 0.15911865234375, "learning_rate": 8.295493513884147e-07, "loss": 0.00015923750470392406, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1400, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 76.9791693687439, "completions/min_length": 25.75, "epoch": 2.784313725490196, "grad_norm": 0.008993882924231113, "kl": 0.12677001953125, "learning_rate": 8.293120238024267e-07, "loss": 0.00012676059850491583, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1401, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 72.8854193687439, "completions/min_length": 23.125, "epoch": 2.786299329858526, "grad_norm": 0.9148067337260869, "kl": 0.1453857421875, "learning_rate": 8.290745651102881e-07, "loss": -0.005603249184787273, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1402, "train_speed(iter/s)": 0.022519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 73.33333492279053, "completions/min_length": 27.125, "epoch": 2.7882849342268554, "grad_norm": 0.008487777371436116, "kl": 0.13037109375, "learning_rate": 8.288369754065362e-07, "loss": 0.00013030279660597444, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1403, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.0, "completions/mean_length": 68.69791841506958, "completions/min_length": 25.5, "epoch": 2.790270538595185, "grad_norm": 0.008718197550629272, "kl": 0.12799072265625, "learning_rate": 8.285992547857606e-07, "loss": 0.00012790513574145734, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1404, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.875, "completions/mean_length": 63.645835399627686, "completions/min_length": 30.25, "epoch": 2.7922561429635144, "grad_norm": 0.00936139026550106, "kl": 0.14501953125, "learning_rate": 8.28361403342603e-07, "loss": 0.0001449327974114567, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1405, "train_speed(iter/s)": 0.022519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.75, "completions/mean_length": 67.81250190734863, "completions/min_length": 25.375, "epoch": 2.794241747331844, "grad_norm": 1.2365704567101985, "kl": 0.131591796875, "learning_rate": 8.281234211717571e-07, "loss": -0.019905827939510345, "memory(GiB)": 94.21, "reward": 1.9166666865348816, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 1406, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 78.71875238418579, "completions/min_length": 32.625, "epoch": 2.796227351700174, "grad_norm": 0.5859171366524915, "kl": 0.142822265625, "learning_rate": 8.278853083679686e-07, "loss": -0.011180834844708443, "memory(GiB)": 94.21, "reward": 1.6145833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6145833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1407, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 69.17708539962769, "completions/min_length": 22.875, "epoch": 2.7982129560685034, "grad_norm": 1.3298291526335313, "kl": 0.45050048828125, "learning_rate": 8.276470650260354e-07, "loss": 0.0004501368384808302, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1408, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 75.79166793823242, "completions/min_length": 33.625, "epoch": 2.800198560436833, "grad_norm": 0.007243277595671766, "kl": 0.1629638671875, "learning_rate": 8.274086912408072e-07, "loss": 0.00016277949907816947, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1409, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.25, "completions/mean_length": 76.77083539962769, "completions/min_length": 31.125, "epoch": 2.8021841648051624, "grad_norm": 0.8237896117751228, "kl": 0.1285400390625, "learning_rate": 8.271701871071856e-07, "loss": -0.003687500488013029, "memory(GiB)": 94.21, "reward": 1.9583333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9583333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1410, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 73.00000238418579, "completions/min_length": 31.375, "epoch": 2.804169769173492, "grad_norm": 0.007847470511905078, "kl": 0.14788818359375, "learning_rate": 8.269315527201246e-07, "loss": 0.00014789986016694456, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1411, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 71.11458539962769, "completions/min_length": 29.75, "epoch": 2.806155373541822, "grad_norm": 0.9627034644907219, "kl": 0.16937255859375, "learning_rate": 8.266927881746292e-07, "loss": -0.009243253618478775, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1412, "train_speed(iter/s)": 0.022518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.875, "completions/mean_length": 70.36458492279053, "completions/min_length": 28.375, "epoch": 2.8081409779101514, "grad_norm": 0.008300632538199536, "kl": 0.12542724609375, "learning_rate": 8.26453893565757e-07, "loss": 0.00012547594087664038, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1413, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.375, "completions/mean_length": 74.37500190734863, "completions/min_length": 29.75, "epoch": 2.810126582278481, "grad_norm": 0.007618235772152284, "kl": 0.11700439453125, "learning_rate": 8.262148689886168e-07, "loss": 0.00011700506729539484, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1414, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.625, "completions/mean_length": 67.53125190734863, "completions/min_length": 28.0, "epoch": 2.812112186646811, "grad_norm": 0.007688761501630678, "kl": 0.11102294921875, "learning_rate": 8.259757145383695e-07, "loss": 0.00011099201947217807, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1415, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 69.8854193687439, "completions/min_length": 32.125, "epoch": 2.8140977910151403, "grad_norm": 1.343903973579174, "kl": 0.14813232421875, "learning_rate": 8.257364303102274e-07, "loss": 0.005972947925329208, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1416, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 74.29166889190674, "completions/min_length": 30.875, "epoch": 2.81608339538347, "grad_norm": 0.029783994439841928, "kl": 0.15594482421875, "learning_rate": 8.254970163994548e-07, "loss": 0.00015598084428347647, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1417, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.5, "completions/mean_length": 78.52083587646484, "completions/min_length": 24.875, "epoch": 2.8180689997517994, "grad_norm": 0.7988913801820672, "kl": 0.13018798828125, "learning_rate": 8.252574729013677e-07, "loss": -0.003261163830757141, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1418, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 75.44791889190674, "completions/min_length": 29.375, "epoch": 2.820054604120129, "grad_norm": 0.8552109222444627, "kl": 0.12353515625, "learning_rate": 8.250177999113333e-07, "loss": -0.0049566589295864105, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.31764985248446465, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1419, "train_speed(iter/s)": 0.02252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.375, "completions/mean_length": 73.62500190734863, "completions/min_length": 31.375, "epoch": 2.822040208488459, "grad_norm": 0.9785746544204191, "kl": 0.13995361328125, "learning_rate": 8.247779975247704e-07, "loss": 0.015292221680283546, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1420, "train_speed(iter/s)": 0.022521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.5, "completions/mean_length": 69.30208539962769, "completions/min_length": 29.125, "epoch": 2.8240258128567883, "grad_norm": 0.011251864331453034, "kl": 0.134521484375, "learning_rate": 8.245380658371497e-07, "loss": 0.00013448242680169642, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1421, "train_speed(iter/s)": 0.022522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 75.32292032241821, "completions/min_length": 29.125, "epoch": 2.826011417225118, "grad_norm": 0.007631644570041815, "kl": 0.130615234375, "learning_rate": 8.24298004943993e-07, "loss": 0.00013047009997535497, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1422, "train_speed(iter/s)": 0.022525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 74.35416793823242, "completions/min_length": 34.625, "epoch": 2.8279970215934473, "grad_norm": 0.8628131323138515, "kl": 0.13714599609375, "learning_rate": 8.240578149408736e-07, "loss": 0.013426492922008038, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1423, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 69.36458492279053, "completions/min_length": 31.375, "epoch": 2.829982625961777, "grad_norm": 0.9888953028542412, "kl": 0.10894775390625, "learning_rate": 8.238174959234164e-07, "loss": 0.005210637580603361, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1424, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.125, "completions/mean_length": 68.92708444595337, "completions/min_length": 26.125, "epoch": 2.831968230330107, "grad_norm": 0.0048824230579252725, "kl": 0.1336669921875, "learning_rate": 8.235770479872975e-07, "loss": 0.00013350117660593241, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1425, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 77.50000190734863, "completions/min_length": 28.0, "epoch": 2.8339538346984363, "grad_norm": 0.006484761067630234, "kl": 0.13372802734375, "learning_rate": 8.233364712282444e-07, "loss": 0.0001336161803919822, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1426, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 73.35416889190674, "completions/min_length": 35.25, "epoch": 2.835939439066766, "grad_norm": 0.004804114223585765, "kl": 0.103271484375, "learning_rate": 8.230957657420357e-07, "loss": 0.00010324568575015292, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1427, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 78.89583587646484, "completions/min_length": 33.0, "epoch": 2.8379250434350958, "grad_norm": 0.006104541315067581, "kl": 0.1317138671875, "learning_rate": 8.228549316245015e-07, "loss": 0.0001317253481829539, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1428, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 69.802086353302, "completions/min_length": 28.875, "epoch": 2.8399106478034253, "grad_norm": 0.005271093076398512, "kl": 0.10577392578125, "learning_rate": 8.226139689715231e-07, "loss": 0.00010573517647571862, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1429, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.875, "completions/mean_length": 67.51041793823242, "completions/min_length": 23.625, "epoch": 2.841896252171755, "grad_norm": 0.004990119311447365, "kl": 0.11224365234375, "learning_rate": 8.223728778790327e-07, "loss": 0.00011221389286220074, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1430, "train_speed(iter/s)": 0.022526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.125, "completions/mean_length": 72.083336353302, "completions/min_length": 29.625, "epoch": 2.8438818565400843, "grad_norm": 0.005855503043211252, "kl": 0.1468505859375, "learning_rate": 8.221316584430139e-07, "loss": 0.0001468592236051336, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1431, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 76.84375286102295, "completions/min_length": 30.75, "epoch": 2.845867460908414, "grad_norm": 0.005628915528885774, "kl": 0.1168212890625, "learning_rate": 8.218903107595013e-07, "loss": 0.00011697282752720639, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1432, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 75.43750047683716, "completions/min_length": 28.375, "epoch": 2.8478530652767438, "grad_norm": 0.731331720790925, "kl": 0.113037109375, "learning_rate": 8.216488349245807e-07, "loss": 0.0035803269129246473, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1433, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 79.52083683013916, "completions/min_length": 35.375, "epoch": 2.8498386696450733, "grad_norm": 1.3085785172556932, "kl": 0.144775390625, "learning_rate": 8.214072310343884e-07, "loss": -0.010267219506204128, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1434, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.25, "completions/mean_length": 66.71875190734863, "completions/min_length": 22.0, "epoch": 2.8518242740134028, "grad_norm": 0.0076299383308353645, "kl": 0.11676025390625, "learning_rate": 8.211654991851126e-07, "loss": 0.00011669890955090523, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1435, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 78.30208587646484, "completions/min_length": 31.375, "epoch": 2.8538098783817323, "grad_norm": 0.005879776018045658, "kl": 0.1221923828125, "learning_rate": 8.209236394729915e-07, "loss": 0.00012216811592224985, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1436, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 74.09375286102295, "completions/min_length": 32.25, "epoch": 2.855795482750062, "grad_norm": 0.9273476449528576, "kl": 0.10113525390625, "learning_rate": 8.206816519943147e-07, "loss": 0.01098918728530407, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1437, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.125, "completions/mean_length": 70.26042079925537, "completions/min_length": 25.25, "epoch": 2.8577810871183917, "grad_norm": 0.004866233949659845, "kl": 0.12310791015625, "learning_rate": 8.204395368454227e-07, "loss": 0.00012310323654673994, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1438, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.625, "completions/mean_length": 63.562501430511475, "completions/min_length": 24.0, "epoch": 2.8597666914867212, "grad_norm": 0.7256411017452613, "kl": 0.11138916015625, "learning_rate": 8.201972941227066e-07, "loss": 0.004826472606509924, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1439, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 65.05208587646484, "completions/min_length": 24.5, "epoch": 2.8617522958550508, "grad_norm": 1.0358364028269755, "kl": 0.1162109375, "learning_rate": 8.199549239226087e-07, "loss": -0.0063631231896579266, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1440, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 73.89583587646484, "completions/min_length": 35.75, "epoch": 2.8637379002233807, "grad_norm": 0.20889823968628643, "kl": 0.19183349609375, "learning_rate": 8.197124263416212e-07, "loss": 0.0001918570778798312, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1441, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 77.42708492279053, "completions/min_length": 32.375, "epoch": 2.86572350459171, "grad_norm": 0.005072774086277166, "kl": 0.1138916015625, "learning_rate": 8.19469801476288e-07, "loss": 0.0001139915402745828, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1442, "train_speed(iter/s)": 0.022527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.125, "completions/mean_length": 68.31250190734863, "completions/min_length": 26.5, "epoch": 2.8677091089600397, "grad_norm": 1.1566943000938172, "kl": 0.1531982421875, "learning_rate": 8.192270494232031e-07, "loss": 0.00015316407370846719, "memory(GiB)": 94.21, "reward": 1.5833333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.5833333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1443, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.75, "completions/mean_length": 66.42708539962769, "completions/min_length": 24.375, "epoch": 2.8696947133283692, "grad_norm": 1.1246203247076416, "kl": 0.1102294921875, "learning_rate": 8.189841702790113e-07, "loss": -0.0027241259813308716, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1444, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.25, "completions/mean_length": 62.708335399627686, "completions/min_length": 28.25, "epoch": 2.8716803176966987, "grad_norm": 0.004868078520723859, "kl": 0.1181640625, "learning_rate": 8.187411641404079e-07, "loss": 0.00011820593499578536, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1445, "train_speed(iter/s)": 0.02253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 75.6666693687439, "completions/min_length": 35.875, "epoch": 2.8736659220650287, "grad_norm": 0.00675082123519357, "kl": 0.12176513671875, "learning_rate": 8.184980311041389e-07, "loss": 0.00012161044287495315, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1446, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.625, "completions/mean_length": 65.70833492279053, "completions/min_length": 31.25, "epoch": 2.875651526433358, "grad_norm": 0.0074126792257329065, "kl": 0.10748291015625, "learning_rate": 8.182547712670009e-07, "loss": 0.00010747680062195286, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1447, "train_speed(iter/s)": 0.022528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 72.66666793823242, "completions/min_length": 29.75, "epoch": 2.8776371308016877, "grad_norm": 1.0132898334738905, "kl": 0.1456298828125, "learning_rate": 8.180113847258407e-07, "loss": 0.002355144824832678, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1448, "train_speed(iter/s)": 0.022529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/mean_length": 56.13541841506958, "completions/min_length": 28.875, "epoch": 2.879622735170017, "grad_norm": 0.005886849891316731, "kl": 0.11798095703125, "learning_rate": 8.177678715775555e-07, "loss": 0.00011804667883552611, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1449, "train_speed(iter/s)": 0.022531 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 66.37500286102295, "completions/min_length": 27.5, "epoch": 2.8816083395383467, "grad_norm": 0.8040701841214775, "kl": 0.13275146484375, "learning_rate": 8.175242319190933e-07, "loss": -0.002023050095885992, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.39076167345046997, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1450, "train_speed(iter/s)": 0.022532 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.375, "completions/mean_length": 69.72916889190674, "completions/min_length": 30.375, "epoch": 2.8835939439066767, "grad_norm": 0.6531414831931729, "kl": 0.13861083984375, "learning_rate": 8.172804658474524e-07, "loss": -0.017453964799642563, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1451, "train_speed(iter/s)": 0.022533 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.375, "completions/mean_length": 66.33333587646484, "completions/min_length": 28.875, "epoch": 2.885579548275006, "grad_norm": 0.006807021043302423, "kl": 0.11956787109375, "learning_rate": 8.170365734596809e-07, "loss": 0.00011951306805713102, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1452, "train_speed(iter/s)": 0.022532 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.75, "completions/mean_length": 61.88541793823242, "completions/min_length": 30.125, "epoch": 2.8875651526433357, "grad_norm": 1.9871537706263327, "kl": 0.15252685546875, "learning_rate": 8.167925548528778e-07, "loss": 0.017631176859140396, "memory(GiB)": 94.21, "reward": 1.6875000149011612, "reward_std": 0.05103103630244732, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.23100870847702026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1453, "train_speed(iter/s)": 0.022534 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 64.91666889190674, "completions/min_length": 29.75, "epoch": 2.8895507570116656, "grad_norm": 0.0061998901518053795, "kl": 0.17431640625, "learning_rate": 8.165484101241922e-07, "loss": 0.00017421328811906278, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1454, "train_speed(iter/s)": 0.022535 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.75, "completions/mean_length": 61.98958444595337, "completions/min_length": 29.875, "epoch": 2.891536361379995, "grad_norm": 0.0088234855479808, "kl": 0.1619873046875, "learning_rate": 8.16304139370823e-07, "loss": 0.000161778720212169, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1455, "train_speed(iter/s)": 0.022536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 66.97916841506958, "completions/min_length": 36.0, "epoch": 2.8935219657483247, "grad_norm": 0.007207244463214481, "kl": 0.12506103515625, "learning_rate": 8.1605974269002e-07, "loss": 0.00012480223085731268, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1456, "train_speed(iter/s)": 0.022537 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.875, "completions/mean_length": 65.43750143051147, "completions/min_length": 27.875, "epoch": 2.895507570116654, "grad_norm": 0.009396924191404418, "kl": 0.18670654296875, "learning_rate": 8.158152201790825e-07, "loss": 0.000186647564987652, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1457, "train_speed(iter/s)": 0.02254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.875, "completions/mean_length": 64.0416693687439, "completions/min_length": 27.875, "epoch": 2.8974931744849837, "grad_norm": 0.00837550795957956, "kl": 0.155517578125, "learning_rate": 8.155705719353603e-07, "loss": 0.00015590095426887274, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1458, "train_speed(iter/s)": 0.022541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.625, "completions/mean_length": 60.406250953674316, "completions/min_length": 21.125, "epoch": 2.8994787788533136, "grad_norm": 0.009514208531011406, "kl": 0.1513671875, "learning_rate": 8.153257980562527e-07, "loss": 0.0001515389740234241, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1459, "train_speed(iter/s)": 0.022542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 70.6041693687439, "completions/min_length": 29.375, "epoch": 2.901464383221643, "grad_norm": 1.5024427882299138, "kl": 0.15057373046875, "learning_rate": 8.150808986392099e-07, "loss": -0.011073566973209381, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1460, "train_speed(iter/s)": 0.022542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 66.53125238418579, "completions/min_length": 29.0, "epoch": 2.9034499875899726, "grad_norm": 0.008846818484931816, "kl": 0.1688232421875, "learning_rate": 8.148358737817314e-07, "loss": 0.000168795813806355, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1461, "train_speed(iter/s)": 0.022542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.875, "completions/mean_length": 63.739585876464844, "completions/min_length": 23.25, "epoch": 2.905435591958302, "grad_norm": 0.008808979179739496, "kl": 0.157470703125, "learning_rate": 8.145907235813666e-07, "loss": 0.0001573076588101685, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1462, "train_speed(iter/s)": 0.022543 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.75, "completions/mean_length": 61.864585399627686, "completions/min_length": 30.0, "epoch": 2.9074211963266317, "grad_norm": 0.008767488252256643, "kl": 0.175048828125, "learning_rate": 8.143454481357154e-07, "loss": 0.00017518477397970855, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1463, "train_speed(iter/s)": 0.022544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.375, "completions/mean_length": 64.31250238418579, "completions/min_length": 27.375, "epoch": 2.9094068006949616, "grad_norm": 0.008334862853160221, "kl": 0.1494140625, "learning_rate": 8.14100047542427e-07, "loss": 0.00014919511158950627, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1464, "train_speed(iter/s)": 0.022545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 72.06250238418579, "completions/min_length": 33.875, "epoch": 2.911392405063291, "grad_norm": 0.008241886230823801, "kl": 0.13592529296875, "learning_rate": 8.138545218992007e-07, "loss": 0.00013571848103310913, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1465, "train_speed(iter/s)": 0.022545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.5, "completions/mean_length": 59.84375238418579, "completions/min_length": 27.125, "epoch": 2.9133780094316206, "grad_norm": 1.272127426155022, "kl": 0.1795654296875, "learning_rate": 8.136088713037854e-07, "loss": 0.0034899346064776182, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1466, "train_speed(iter/s)": 0.022546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.875, "completions/mean_length": 68.27083539962769, "completions/min_length": 24.75, "epoch": 2.9153636137999506, "grad_norm": 0.8505587241612501, "kl": 0.126220703125, "learning_rate": 8.133630958539799e-07, "loss": -0.0005638438160531223, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1467, "train_speed(iter/s)": 0.022546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.375, "completions/mean_length": 61.250001430511475, "completions/min_length": 27.75, "epoch": 2.91734921816828, "grad_norm": 0.026378017488213848, "kl": 0.1458740234375, "learning_rate": 8.131171956476327e-07, "loss": 0.00014579706476069987, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1468, "train_speed(iter/s)": 0.022546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.375, "completions/mean_length": 60.385417461395264, "completions/min_length": 24.625, "epoch": 2.9193348225366096, "grad_norm": 0.008137518476156475, "kl": 0.11883544921875, "learning_rate": 8.128711707826419e-07, "loss": 0.00011880746751558036, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1469, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 66.30208492279053, "completions/min_length": 32.625, "epoch": 2.921320426904939, "grad_norm": 0.008175863267556689, "kl": 0.14495849609375, "learning_rate": 8.126250213569552e-07, "loss": 0.00014478585217148066, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1470, "train_speed(iter/s)": 0.022548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.875, "completions/mean_length": 75.1354193687439, "completions/min_length": 32.625, "epoch": 2.9233060312732686, "grad_norm": 1.5416747528024735, "kl": 0.24981689453125, "learning_rate": 8.1237874746857e-07, "loss": -0.00014942388224881142, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.06846532225608826, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.30890411138534546, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1471, "train_speed(iter/s)": 0.022549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 70.79166889190674, "completions/min_length": 32.375, "epoch": 2.9252916356415986, "grad_norm": 0.008609547587590537, "kl": 0.1717529296875, "learning_rate": 8.121323492155331e-07, "loss": 0.0001717947016004473, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1472, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.875, "completions/mean_length": 60.302085399627686, "completions/min_length": 24.0, "epoch": 2.927277240009928, "grad_norm": 1.0367589834391397, "kl": 0.52276611328125, "learning_rate": 8.118858266959411e-07, "loss": 0.0005225278437137604, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1473, "train_speed(iter/s)": 0.022552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.25, "completions/mean_length": 60.47916841506958, "completions/min_length": 26.25, "epoch": 2.9292628443782576, "grad_norm": 1.0673394001006085, "kl": 0.156494140625, "learning_rate": 8.116391800079396e-07, "loss": -0.003934068139642477, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666679084301, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1474, "train_speed(iter/s)": 0.022553 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.125, "completions/mean_length": 65.11458539962769, "completions/min_length": 28.75, "epoch": 2.931248448746587, "grad_norm": 0.006109337117363829, "kl": 0.1097412109375, "learning_rate": 8.113924092497243e-07, "loss": 0.00010968661808874458, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1475, "train_speed(iter/s)": 0.022556 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.75, "completions/mean_length": 63.520835399627686, "completions/min_length": 28.0, "epoch": 2.9332340531149166, "grad_norm": 0.005539485434986862, "kl": 0.1204833984375, "learning_rate": 8.111455145195395e-07, "loss": 0.00012051354860886931, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1476, "train_speed(iter/s)": 0.022558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.25, "completions/mean_length": 59.968751430511475, "completions/min_length": 25.0, "epoch": 2.9352196574832465, "grad_norm": 0.005401624956637818, "kl": 0.1068115234375, "learning_rate": 8.108984959156794e-07, "loss": 0.0001067964913090691, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1477, "train_speed(iter/s)": 0.022558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.5, "completions/mean_length": 65.08333492279053, "completions/min_length": 30.25, "epoch": 2.937205261851576, "grad_norm": 1.397537299436228, "kl": 0.1685791015625, "learning_rate": 8.106513535364879e-07, "loss": 0.00730905681848526, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1478, "train_speed(iter/s)": 0.02256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.5, "completions/mean_length": 65.18750190734863, "completions/min_length": 31.125, "epoch": 2.9391908662199056, "grad_norm": 1.5686493034540239, "kl": 0.14581298828125, "learning_rate": 8.104040874803567e-07, "loss": -0.0014158705016598105, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1479, "train_speed(iter/s)": 0.022563 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.375, "completions/mean_length": 63.22916841506958, "completions/min_length": 23.0, "epoch": 2.9411764705882355, "grad_norm": 0.011239554803719819, "kl": 0.14276123046875, "learning_rate": 8.101566978457283e-07, "loss": 0.00014275385183282197, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1480, "train_speed(iter/s)": 0.022566 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.125, "completions/mean_length": 61.864585399627686, "completions/min_length": 26.125, "epoch": 2.943162074956565, "grad_norm": 0.012829281598694838, "kl": 0.132080078125, "learning_rate": 8.09909184731094e-07, "loss": 0.0001320371957262978, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1481, "train_speed(iter/s)": 0.022568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 62.08333396911621, "completions/min_length": 23.625, "epoch": 2.9451476793248945, "grad_norm": 0.01109441611794084, "kl": 0.15496826171875, "learning_rate": 8.096615482349934e-07, "loss": 0.00015477568376809359, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1482, "train_speed(iter/s)": 0.022567 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.625, "completions/mean_length": 60.62500190734863, "completions/min_length": 31.75, "epoch": 2.947133283693224, "grad_norm": 1.6843770387249337, "kl": 0.15081787109375, "learning_rate": 8.094137884560164e-07, "loss": -0.0042047323659062386, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1483, "train_speed(iter/s)": 0.02257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.375, "completions/mean_length": 67.65625238418579, "completions/min_length": 22.25, "epoch": 2.9491188880615535, "grad_norm": 0.00495848903304674, "kl": 0.09881591796875, "learning_rate": 8.091659054928011e-07, "loss": 9.89195832516998e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1484, "train_speed(iter/s)": 0.02257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 65.15625190734863, "completions/min_length": 29.25, "epoch": 2.9511044924298835, "grad_norm": 0.017875211617790078, "kl": 0.173095703125, "learning_rate": 8.089178994440354e-07, "loss": 0.00017328646208625287, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1485, "train_speed(iter/s)": 0.022569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 224.25, "completions/mean_length": 67.78125190734863, "completions/min_length": 20.625, "epoch": 2.953090096798213, "grad_norm": 0.3371229798900412, "kl": 0.22271728515625, "learning_rate": 8.086697704084555e-07, "loss": 0.022648287937045097, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.05103103816509247, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 1486, "train_speed(iter/s)": 0.022568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 69.05208539962769, "completions/min_length": 35.875, "epoch": 2.9550757011665425, "grad_norm": 0.005369353377220404, "kl": 0.1336669921875, "learning_rate": 8.08421518484847e-07, "loss": 0.00013374185073189437, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1487, "train_speed(iter/s)": 0.02257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 65.42708539962769, "completions/min_length": 22.75, "epoch": 2.9570613055348725, "grad_norm": 0.007293737262870558, "kl": 0.13165283203125, "learning_rate": 8.081731437720443e-07, "loss": 0.00013149608275853097, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1488, "train_speed(iter/s)": 0.02257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 69.37500238418579, "completions/min_length": 29.875, "epoch": 2.9590469099032015, "grad_norm": 1.7411004373327508, "kl": 0.144287109375, "learning_rate": 8.079246463689307e-07, "loss": -0.006723719649016857, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1489, "train_speed(iter/s)": 0.022569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 67.53125190734863, "completions/min_length": 20.625, "epoch": 2.9610325142715315, "grad_norm": 0.005379372389635459, "kl": 0.11724853515625, "learning_rate": 8.07676026374438e-07, "loss": 0.00011731521226465702, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1490, "train_speed(iter/s)": 0.022569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 66.31250095367432, "completions/min_length": 30.625, "epoch": 2.963018118639861, "grad_norm": 0.00802514287380977, "kl": 0.16046142578125, "learning_rate": 8.074272838875476e-07, "loss": 0.00016023658099584281, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1491, "train_speed(iter/s)": 0.022571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.875, "completions/mean_length": 69.52083539962769, "completions/min_length": 30.0, "epoch": 2.9650037230081905, "grad_norm": 0.0064130846191252176, "kl": 0.15924072265625, "learning_rate": 8.07178419007289e-07, "loss": 0.00015932274982333183, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1492, "train_speed(iter/s)": 0.022573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 67.01041841506958, "completions/min_length": 24.25, "epoch": 2.9669893273765204, "grad_norm": 0.007921036904481689, "kl": 0.14178466796875, "learning_rate": 8.069294318327404e-07, "loss": 0.00014174518582876772, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1493, "train_speed(iter/s)": 0.022574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/mean_length": 60.87500190734863, "completions/min_length": 26.125, "epoch": 2.96897493174485, "grad_norm": 0.007579677061138093, "kl": 0.12335205078125, "learning_rate": 8.066803224630294e-07, "loss": 0.0001233743387274444, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1494, "train_speed(iter/s)": 0.022575 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.5, "completions/mean_length": 66.40625286102295, "completions/min_length": 29.75, "epoch": 2.9709605361131795, "grad_norm": 0.006112900260031259, "kl": 0.131591796875, "learning_rate": 8.064310909973314e-07, "loss": 0.0001316461421083659, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1495, "train_speed(iter/s)": 0.022574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 69.81250286102295, "completions/min_length": 23.0, "epoch": 2.972946140481509, "grad_norm": 1.0294497379184306, "kl": 0.155029296875, "learning_rate": 8.061817375348707e-07, "loss": 0.00525694340467453, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1496, "train_speed(iter/s)": 0.022572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.875, "completions/mean_length": 58.864585399627686, "completions/min_length": 21.625, "epoch": 2.9749317448498385, "grad_norm": 0.008121450909917942, "kl": 0.1361083984375, "learning_rate": 8.059322621749205e-07, "loss": 0.00013605313142761588, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1497, "train_speed(iter/s)": 0.022575 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.125, "completions/mean_length": 64.72916889190674, "completions/min_length": 26.75, "epoch": 2.9769173492181684, "grad_norm": 0.9191924081952824, "kl": 0.14227294921875, "learning_rate": 8.056826650168023e-07, "loss": -0.010151153430342674, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1498, "train_speed(iter/s)": 0.022577 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 72.08333587646484, "completions/min_length": 32.75, "epoch": 2.978902953586498, "grad_norm": 0.007612136316154548, "kl": 0.193359375, "learning_rate": 8.054329461598858e-07, "loss": 0.0001932987943291664, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1499, "train_speed(iter/s)": 0.022579 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 73.35417079925537, "completions/min_length": 37.875, "epoch": 2.9808885579548274, "grad_norm": 0.00918510865586141, "kl": 0.1856689453125, "learning_rate": 8.051831057035895e-07, "loss": 0.00018577344599179924, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1500, "train_speed(iter/s)": 0.022579 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.125, "completions/mean_length": 64.47916889190674, "completions/min_length": 28.5, "epoch": 2.9828741623231574, "grad_norm": 0.009015554771147506, "kl": 0.17510986328125, "learning_rate": 8.049331437473803e-07, "loss": 0.0001751594099914655, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1501, "train_speed(iter/s)": 0.02257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.625, "completions/mean_length": 72.90625143051147, "completions/min_length": 32.0, "epoch": 2.9848597666914864, "grad_norm": 1.155739748358851, "kl": 0.1541748046875, "learning_rate": 8.046830603907735e-07, "loss": -0.007544038351625204, "memory(GiB)": 94.21, "reward": 1.5729166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.5729166669771075, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1502, "train_speed(iter/s)": 0.02257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.0, "completions/mean_length": 67.958336353302, "completions/min_length": 22.875, "epoch": 2.9868453710598164, "grad_norm": 0.006895625891506662, "kl": 0.1402587890625, "learning_rate": 8.044328557333322e-07, "loss": 0.00014047148579265922, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1503, "train_speed(iter/s)": 0.022571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 59.854167461395264, "completions/min_length": 22.125, "epoch": 2.988830975428146, "grad_norm": 0.009594255590401089, "kl": 0.16192626953125, "learning_rate": 8.041825298746687e-07, "loss": 0.00016186795255634934, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1504, "train_speed(iter/s)": 0.022573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 64.58333492279053, "completions/min_length": 23.25, "epoch": 2.9908165797964754, "grad_norm": 0.00819396788136529, "kl": 0.16033935546875, "learning_rate": 8.039320829144429e-07, "loss": 0.00016033969586715102, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1505, "train_speed(iter/s)": 0.022575 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.625, "completions/mean_length": 62.06250190734863, "completions/min_length": 28.5, "epoch": 2.9928021841648054, "grad_norm": 0.007635576011688738, "kl": 0.1573486328125, "learning_rate": 8.036815149523629e-07, "loss": 0.00015726122364867479, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1506, "train_speed(iter/s)": 0.022577 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.875, "completions/mean_length": 61.16666793823242, "completions/min_length": 26.75, "epoch": 2.994787788533135, "grad_norm": 1.7164526760418497, "kl": 0.1575927734375, "learning_rate": 8.034308260881853e-07, "loss": -0.013045396655797958, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.05974817834794521, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.15789688751101494, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1507, "train_speed(iter/s)": 0.022578 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.875, "completions/mean_length": 60.36458396911621, "completions/min_length": 25.25, "epoch": 2.9967733929014644, "grad_norm": 0.008682301091845916, "kl": 0.13787841796875, "learning_rate": 8.031800164217149e-07, "loss": 0.00013781135203316808, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1508, "train_speed(iter/s)": 0.022579 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.125, "completions/mean_length": 67.10416889190674, "completions/min_length": 26.375, "epoch": 2.998758997269794, "grad_norm": 0.00516058452820252, "kl": 0.1015625, "learning_rate": 8.02929086052804e-07, "loss": 0.00010153975745197386, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1509, "train_speed(iter/s)": 0.022577 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.375, "completions/mean_length": 66.35416841506958, "completions/min_length": 27.625, "epoch": 3.0019856043683295, "grad_norm": 1.0896806214591497, "kl": 0.129150390625, "learning_rate": 8.026780350813536e-07, "loss": 0.011337703093886375, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1510, "train_speed(iter/s)": 0.022577 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 72.09375333786011, "completions/min_length": 24.625, "epoch": 3.003971208736659, "grad_norm": 0.007754142705121224, "kl": 0.18450927734375, "learning_rate": 8.024268636073124e-07, "loss": 0.00018431153148412704, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1511, "train_speed(iter/s)": 0.022578 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 68.97916793823242, "completions/min_length": 27.75, "epoch": 3.005956813104989, "grad_norm": 0.007182274155993902, "kl": 0.15289306640625, "learning_rate": 8.021755717306771e-07, "loss": 0.0001527169079054147, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1512, "train_speed(iter/s)": 0.022578 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.875, "completions/mean_length": 64.61458444595337, "completions/min_length": 25.125, "epoch": 3.0079424174733185, "grad_norm": 0.006520480244141717, "kl": 0.1590576171875, "learning_rate": 8.019241595514923e-07, "loss": 0.00015905409236438572, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1513, "train_speed(iter/s)": 0.022581 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 67.40625190734863, "completions/min_length": 25.875, "epoch": 3.009928021841648, "grad_norm": 0.00647501823146812, "kl": 0.14263916015625, "learning_rate": 8.016726271698507e-07, "loss": 0.00014270719839259982, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1514, "train_speed(iter/s)": 0.022582 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.875, "completions/mean_length": 67.44791889190674, "completions/min_length": 23.5, "epoch": 3.0119136262099775, "grad_norm": 0.010008172723007629, "kl": 0.17230224609375, "learning_rate": 8.014209746858927e-07, "loss": 0.0001721722073853016, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1515, "train_speed(iter/s)": 0.022582 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.375, "completions/mean_length": 64.13541841506958, "completions/min_length": 26.625, "epoch": 3.0138992305783074, "grad_norm": 0.004923106032262659, "kl": 0.13189697265625, "learning_rate": 8.011692021998063e-07, "loss": 0.00013180731912143528, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1516, "train_speed(iter/s)": 0.022584 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 70.93750286102295, "completions/min_length": 24.75, "epoch": 3.015884834946637, "grad_norm": 1.1770413939614202, "kl": 0.127197265625, "learning_rate": 8.009173098118278e-07, "loss": 0.005528264679014683, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1517, "train_speed(iter/s)": 0.022584 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 69.46875143051147, "completions/min_length": 25.125, "epoch": 3.0178704393149665, "grad_norm": 0.004896991507392525, "kl": 0.1171875, "learning_rate": 8.006652976222408e-07, "loss": 0.00011717713641701266, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1518, "train_speed(iter/s)": 0.022584 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 73.22916889190674, "completions/min_length": 26.875, "epoch": 3.019856043683296, "grad_norm": 0.0053673222848831265, "kl": 0.1318359375, "learning_rate": 8.004131657313767e-07, "loss": 0.00013186127762310207, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1519, "train_speed(iter/s)": 0.022583 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.125, "completions/mean_length": 68.35416793823242, "completions/min_length": 25.75, "epoch": 3.021841648051626, "grad_norm": 0.006364017956235598, "kl": 0.1448974609375, "learning_rate": 8.001609142396149e-07, "loss": 0.00014463487605098635, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1520, "train_speed(iter/s)": 0.022583 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.75, "completions/mean_length": 66.48958396911621, "completions/min_length": 30.375, "epoch": 3.0238272524199554, "grad_norm": 0.0060466522403471255, "kl": 0.15087890625, "learning_rate": 7.999085432473815e-07, "loss": 0.0001509106659796089, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1521, "train_speed(iter/s)": 0.022585 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.375, "completions/mean_length": 69.17708492279053, "completions/min_length": 33.0, "epoch": 3.025812856788285, "grad_norm": 0.005313967523752258, "kl": 0.13458251953125, "learning_rate": 7.996560528551512e-07, "loss": 0.00013458832108881325, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1522, "train_speed(iter/s)": 0.022586 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 71.84375333786011, "completions/min_length": 27.25, "epoch": 3.0277984611566144, "grad_norm": 0.006082233498838991, "kl": 0.14105224609375, "learning_rate": 7.99403443163446e-07, "loss": 0.00014113588258624077, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1523, "train_speed(iter/s)": 0.022589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 65.55208587646484, "completions/min_length": 25.25, "epoch": 3.029784065524944, "grad_norm": 0.006015814920365411, "kl": 0.1278076171875, "learning_rate": 7.991507142728348e-07, "loss": 0.00012768225860781968, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1524, "train_speed(iter/s)": 0.022589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.125, "completions/mean_length": 68.55208587646484, "completions/min_length": 20.25, "epoch": 3.031769669893274, "grad_norm": 0.0065464985752072555, "kl": 0.15277099609375, "learning_rate": 7.988978662839345e-07, "loss": 0.00015271530719473958, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1525, "train_speed(iter/s)": 0.022591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 72.90625238418579, "completions/min_length": 29.75, "epoch": 3.0337552742616034, "grad_norm": 0.0054327752949591065, "kl": 0.14862060546875, "learning_rate": 7.986448992974095e-07, "loss": 0.00014869638835079968, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1526, "train_speed(iter/s)": 0.022591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.875, "completions/mean_length": 68.73958587646484, "completions/min_length": 25.625, "epoch": 3.035740878629933, "grad_norm": 0.004604420692129586, "kl": 0.1121826171875, "learning_rate": 7.983918134139709e-07, "loss": 0.00011213271500309929, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1527, "train_speed(iter/s)": 0.022591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 66.50000286102295, "completions/min_length": 20.75, "epoch": 3.0377264829982624, "grad_norm": 0.004298526752156533, "kl": 0.106689453125, "learning_rate": 7.98138608734378e-07, "loss": 0.00010664231376722455, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1528, "train_speed(iter/s)": 0.022592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.625, "completions/mean_length": 70.13541889190674, "completions/min_length": 30.25, "epoch": 3.0397120873665924, "grad_norm": 2.163072702180223, "kl": 0.31298828125, "learning_rate": 7.978852853594368e-07, "loss": -0.01896550878882408, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1529, "train_speed(iter/s)": 0.022593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.375, "completions/mean_length": 58.42708492279053, "completions/min_length": 19.375, "epoch": 3.041697691734922, "grad_norm": 0.007024291819665847, "kl": 0.1513671875, "learning_rate": 7.976318433900011e-07, "loss": 0.00015135837020352483, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1530, "train_speed(iter/s)": 0.022595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 68.13541746139526, "completions/min_length": 31.625, "epoch": 3.0436832961032514, "grad_norm": 0.006125760556760419, "kl": 0.13555908203125, "learning_rate": 7.97378282926971e-07, "loss": 0.00013557649799622595, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1531, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.875, "completions/mean_length": 68.35416889190674, "completions/min_length": 29.625, "epoch": 3.045668900471581, "grad_norm": 0.006626752656693549, "kl": 0.13238525390625, "learning_rate": 7.971246040712949e-07, "loss": 0.00013254185614641756, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1532, "train_speed(iter/s)": 0.022595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.125, "completions/mean_length": 61.30208492279053, "completions/min_length": 25.125, "epoch": 3.047654504839911, "grad_norm": 0.00742816743800988, "kl": 0.1488037109375, "learning_rate": 7.968708069239672e-07, "loss": 0.00014870602171868086, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1533, "train_speed(iter/s)": 0.022596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.375, "completions/mean_length": 61.843750953674316, "completions/min_length": 25.875, "epoch": 3.0496401092082404, "grad_norm": 1.9699282701268699, "kl": 0.16021728515625, "learning_rate": 7.966168915860303e-07, "loss": 0.004624407738447189, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1534, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.875, "completions/mean_length": 64.4166693687439, "completions/min_length": 26.875, "epoch": 3.05162571357657, "grad_norm": 0.0074266227728457, "kl": 0.14752197265625, "learning_rate": 7.963628581585733e-07, "loss": 0.00014756032032892108, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1535, "train_speed(iter/s)": 0.022598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 64.52083444595337, "completions/min_length": 22.5, "epoch": 3.0536113179448994, "grad_norm": 0.0057018590434599874, "kl": 0.1346435546875, "learning_rate": 7.961087067427323e-07, "loss": 0.00013436665176413953, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1536, "train_speed(iter/s)": 0.022598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 62.84375190734863, "completions/min_length": 29.25, "epoch": 3.055596922313229, "grad_norm": 1.8346653634143228, "kl": 0.14892578125, "learning_rate": 7.958544374396905e-07, "loss": 0.01360081322491169, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1537, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.625, "completions/mean_length": 67.47916793823242, "completions/min_length": 25.125, "epoch": 3.057582526681559, "grad_norm": 0.897447843873728, "kl": 0.15631103515625, "learning_rate": 7.956000503506778e-07, "loss": 0.01820121705532074, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1538, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 65.25000190734863, "completions/min_length": 29.0, "epoch": 3.0595681310498883, "grad_norm": 0.9788267283128639, "kl": 0.15155029296875, "learning_rate": 7.953455455769711e-07, "loss": -0.00905262678861618, "memory(GiB)": 94.21, "reward": 1.5729166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1539, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 69.78125143051147, "completions/min_length": 27.0, "epoch": 3.061553735418218, "grad_norm": 0.005789859360527402, "kl": 0.17724609375, "learning_rate": 7.950909232198943e-07, "loss": 0.00017713612760417163, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1540, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.0, "completions/mean_length": 69.46875286102295, "completions/min_length": 27.125, "epoch": 3.0635393397865474, "grad_norm": 0.00676607615441637, "kl": 0.18475341796875, "learning_rate": 7.94836183380818e-07, "loss": 0.00018466528854332864, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1541, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 70.91666889190674, "completions/min_length": 27.25, "epoch": 3.0655249441548773, "grad_norm": 1.797685149306257, "kl": 0.172607421875, "learning_rate": 7.945813261611596e-07, "loss": -0.004025423899292946, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1542, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.375, "completions/mean_length": 65.23958539962769, "completions/min_length": 26.375, "epoch": 3.067510548523207, "grad_norm": 0.004778188011642266, "kl": 0.1251220703125, "learning_rate": 7.943263516623832e-07, "loss": 0.00012511663953773677, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1543, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 69.81250286102295, "completions/min_length": 28.75, "epoch": 3.0694961528915363, "grad_norm": 0.009174524159952393, "kl": 0.144287109375, "learning_rate": 7.940712599859994e-07, "loss": 0.00014421633386518806, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1544, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 75.1041693687439, "completions/min_length": 27.5, "epoch": 3.071481757259866, "grad_norm": 1.2381296100232917, "kl": 0.17138671875, "learning_rate": 7.938160512335658e-07, "loss": -0.011360193602740765, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1545, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.875, "completions/mean_length": 69.65625190734863, "completions/min_length": 25.5, "epoch": 3.073467361628196, "grad_norm": 1.4010088060761308, "kl": 0.160888671875, "learning_rate": 7.935607255066865e-07, "loss": 0.0196949765086174, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1546, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 70.89583492279053, "completions/min_length": 24.5, "epoch": 3.0754529659965253, "grad_norm": 0.004868994031868303, "kl": 0.12860107421875, "learning_rate": 7.93305282907012e-07, "loss": 0.00012856232933700085, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1547, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.75, "completions/mean_length": 62.156251430511475, "completions/min_length": 26.125, "epoch": 3.077438570364855, "grad_norm": 0.010414335426126249, "kl": 0.12957763671875, "learning_rate": 7.930497235362394e-07, "loss": 0.00012960430467501283, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1548, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.25, "completions/mean_length": 73.78125238418579, "completions/min_length": 25.25, "epoch": 3.0794241747331843, "grad_norm": 0.0042866194595961565, "kl": 0.131103515625, "learning_rate": 7.927940474961127e-07, "loss": 0.00013111383304931223, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1549, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.25, "completions/mean_length": 68.71875286102295, "completions/min_length": 25.875, "epoch": 3.081409779101514, "grad_norm": 0.00534739297847197, "kl": 0.11309814453125, "learning_rate": 7.925382548884216e-07, "loss": 0.00011304750660201535, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1550, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.0, "completions/mean_length": 75.62500143051147, "completions/min_length": 28.5, "epoch": 3.0833953834698438, "grad_norm": 0.9133512132839091, "kl": 0.13824462890625, "learning_rate": 7.922823458150029e-07, "loss": 0.00013828588998876512, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1551, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.125, "completions/mean_length": 76.62500286102295, "completions/min_length": 33.25, "epoch": 3.0853809878381733, "grad_norm": 1.5811772141221476, "kl": 0.17047119140625, "learning_rate": 7.920263203777391e-07, "loss": 0.0076699345372617245, "memory(GiB)": 94.21, "reward": 1.6458333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1552, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 70.63541793823242, "completions/min_length": 30.75, "epoch": 3.087366592206503, "grad_norm": 0.007425176945217928, "kl": 0.1614990234375, "learning_rate": 7.917701786785598e-07, "loss": 0.00016143161337822676, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1553, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 80.958336353302, "completions/min_length": 27.125, "epoch": 3.0893521965748323, "grad_norm": 0.9908209038784358, "kl": 0.1368408203125, "learning_rate": 7.915139208194404e-07, "loss": -0.003357556415721774, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1554, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 77.60416984558105, "completions/min_length": 27.125, "epoch": 3.0913378009431622, "grad_norm": 0.0049730104096962725, "kl": 0.1412353515625, "learning_rate": 7.912575469024022e-07, "loss": 0.00014133579679764807, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1555, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 68.58333539962769, "completions/min_length": 23.75, "epoch": 3.0933234053114917, "grad_norm": 0.004789400367678571, "kl": 0.148681640625, "learning_rate": 7.910010570295136e-07, "loss": 0.00014869125152472407, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1556, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.75, "completions/mean_length": 67.92708492279053, "completions/min_length": 32.875, "epoch": 3.0953090096798213, "grad_norm": 0.02444720777992413, "kl": 0.15936279296875, "learning_rate": 7.907444513028887e-07, "loss": 0.00015949964290484786, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1557, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 69.1041693687439, "completions/min_length": 29.75, "epoch": 3.0972946140481508, "grad_norm": 0.004765885175389616, "kl": 0.1185302734375, "learning_rate": 7.904877298246874e-07, "loss": 0.00011849829752463847, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1558, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 70.8541693687439, "completions/min_length": 25.125, "epoch": 3.0992802184164807, "grad_norm": 0.007165502492879713, "kl": 0.13323974609375, "learning_rate": 7.902308926971164e-07, "loss": 0.00013328055501915514, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1559, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.125, "completions/mean_length": 88.37500381469727, "completions/min_length": 34.75, "epoch": 3.1012658227848102, "grad_norm": 0.006680680389758271, "kl": 0.1689453125, "learning_rate": 7.899739400224277e-07, "loss": 0.00016898289322853088, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1560, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.75, "completions/mean_length": 78.302086353302, "completions/min_length": 33.375, "epoch": 3.1032514271531397, "grad_norm": 0.0051663776460708, "kl": 0.13824462890625, "learning_rate": 7.897168719029197e-07, "loss": 0.00013828356168232858, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1561, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.5, "completions/mean_length": 92.56250286102295, "completions/min_length": 42.125, "epoch": 3.1052370315214692, "grad_norm": 0.004840308813397641, "kl": 0.14678955078125, "learning_rate": 7.894596884409368e-07, "loss": 0.00014671373355668038, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1562, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 69.36458587646484, "completions/min_length": 22.25, "epoch": 3.1072226358897987, "grad_norm": 0.004287770768888566, "kl": 0.12872314453125, "learning_rate": 7.892023897388694e-07, "loss": 0.00012859402340836823, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1563, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.125, "completions/mean_length": 75.82291889190674, "completions/min_length": 23.375, "epoch": 3.1092082402581287, "grad_norm": 1.1276802479244754, "kl": 0.166015625, "learning_rate": 7.889449758991533e-07, "loss": 0.0007246571476571262, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1564, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 77.86458587646484, "completions/min_length": 27.375, "epoch": 3.111193844626458, "grad_norm": 0.004584033784166918, "kl": 0.14996337890625, "learning_rate": 7.886874470242706e-07, "loss": 0.00014988250040914863, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1565, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 69.51041889190674, "completions/min_length": 23.0, "epoch": 3.1131794489947877, "grad_norm": 0.05598645754923216, "kl": 0.1702880859375, "learning_rate": 7.884298032167489e-07, "loss": 0.000170221523148939, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1566, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 77.26041793823242, "completions/min_length": 33.25, "epoch": 3.115165053363117, "grad_norm": 0.004858184001340611, "kl": 0.15155029296875, "learning_rate": 7.88172044579162e-07, "loss": 0.00015155557775869966, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1567, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 67.52083539962769, "completions/min_length": 17.375, "epoch": 3.117150657731447, "grad_norm": 0.004925319584826496, "kl": 0.12481689453125, "learning_rate": 7.879141712141288e-07, "loss": 0.00012457444972824305, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1568, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 74.46875190734863, "completions/min_length": 27.75, "epoch": 3.1191362620997767, "grad_norm": 0.004628264780416145, "kl": 0.12054443359375, "learning_rate": 7.876561832243143e-07, "loss": 0.00012040646834066138, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1569, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 70.37500238418579, "completions/min_length": 24.625, "epoch": 3.121121866468106, "grad_norm": 0.0059331130645311324, "kl": 0.11474609375, "learning_rate": 7.873980807124292e-07, "loss": 0.00011482219997560605, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1570, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.25, "completions/mean_length": 82.93750190734863, "completions/min_length": 36.0, "epoch": 3.1231074708364357, "grad_norm": 0.14866760214561833, "kl": 0.25732421875, "learning_rate": 7.871398637812294e-07, "loss": 0.00025705574080348015, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1571, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 74.42708587646484, "completions/min_length": 31.375, "epoch": 3.1250930752047656, "grad_norm": 0.007044968112130971, "kl": 0.1356201171875, "learning_rate": 7.868815325335168e-07, "loss": 0.0001355307176709175, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1572, "train_speed(iter/s)": 0.022599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 79.46875286102295, "completions/min_length": 26.5, "epoch": 3.127078679573095, "grad_norm": 0.9901700131135319, "kl": 0.1767578125, "learning_rate": 7.866230870721383e-07, "loss": 0.003998658154159784, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1573, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.125, "completions/mean_length": 63.32291841506958, "completions/min_length": 18.125, "epoch": 3.1290642839414247, "grad_norm": 0.005600105945306169, "kl": 0.118896484375, "learning_rate": 7.863645274999868e-07, "loss": 0.0001190089387819171, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1574, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 74.50000190734863, "completions/min_length": 20.75, "epoch": 3.131049888309754, "grad_norm": 0.005303313553400478, "kl": 0.13140869140625, "learning_rate": 7.861058539200003e-07, "loss": 0.00013143512478563935, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1575, "train_speed(iter/s)": 0.022599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 69.71875333786011, "completions/min_length": 19.375, "epoch": 3.1330354926780837, "grad_norm": 0.005189209603158734, "kl": 0.1573486328125, "learning_rate": 7.858470664351622e-07, "loss": 0.00015709316357970238, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1576, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 75.22916889190674, "completions/min_length": 28.5, "epoch": 3.1350210970464136, "grad_norm": 0.9197710343234746, "kl": 0.14508056640625, "learning_rate": 7.855881651485015e-07, "loss": 0.003865651786327362, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1577, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.25, "completions/mean_length": 78.63541984558105, "completions/min_length": 27.375, "epoch": 3.137006701414743, "grad_norm": 0.00627293701984189, "kl": 0.12786865234375, "learning_rate": 7.853291501630921e-07, "loss": 0.0001278404815820977, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1578, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 73.0729193687439, "completions/min_length": 26.0, "epoch": 3.1389923057830726, "grad_norm": 0.0064907021217699016, "kl": 0.15643310546875, "learning_rate": 7.850700215820536e-07, "loss": 0.00015644051018171012, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1579, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.625, "completions/mean_length": 80.37500286102295, "completions/min_length": 22.375, "epoch": 3.140977910151402, "grad_norm": 0.005542807300237455, "kl": 0.1475830078125, "learning_rate": 7.848107795085506e-07, "loss": 0.0001476538018323481, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1580, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.0, "completions/mean_length": 64.08333492279053, "completions/min_length": 25.25, "epoch": 3.142963514519732, "grad_norm": 1.2039381621943268, "kl": 0.11163330078125, "learning_rate": 7.845514240457928e-07, "loss": -0.0027569918893277645, "memory(GiB)": 94.21, "reward": 1.9583333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9583333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1581, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 68.22916889190674, "completions/min_length": 21.75, "epoch": 3.1449491188880616, "grad_norm": 0.0069590060510106655, "kl": 0.16790771484375, "learning_rate": 7.842919552970353e-07, "loss": 0.00016799391596578062, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1582, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 66.29166841506958, "completions/min_length": 19.25, "epoch": 3.146934723256391, "grad_norm": 0.006224080690219878, "kl": 0.13818359375, "learning_rate": 7.840323733655778e-07, "loss": 0.0001380950416205451, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1583, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 56.01041841506958, "completions/min_length": 16.25, "epoch": 3.1489203276247206, "grad_norm": 0.009383479954545693, "kl": 0.14349365234375, "learning_rate": 7.83772678354766e-07, "loss": 0.0001433076395187527, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1584, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 77.71875190734863, "completions/min_length": 25.25, "epoch": 3.1509059319930506, "grad_norm": 0.006408260173547487, "kl": 0.1298828125, "learning_rate": 7.835128703679895e-07, "loss": 0.00012998521560803056, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1585, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 79.94791984558105, "completions/min_length": 28.0, "epoch": 3.15289153636138, "grad_norm": 1.3341875464657413, "kl": 0.141845703125, "learning_rate": 7.832529495086837e-07, "loss": 0.015978978946805, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1586, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 68.17708492279053, "completions/min_length": 22.375, "epoch": 3.1548771407297096, "grad_norm": 0.9855642828401813, "kl": 0.125, "learning_rate": 7.829929158803285e-07, "loss": 0.0016770760994404554, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1587, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 68.34375238418579, "completions/min_length": 24.5, "epoch": 3.156862745098039, "grad_norm": 0.006972472354643738, "kl": 0.1658935546875, "learning_rate": 7.82732769586449e-07, "loss": 0.00016592885367572308, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1588, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.625, "completions/mean_length": 65.15625190734863, "completions/min_length": 18.75, "epoch": 3.1588483494663686, "grad_norm": 0.007042093284151057, "kl": 0.12786865234375, "learning_rate": 7.824725107306148e-07, "loss": 0.00012775440700352192, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1589, "train_speed(iter/s)": 0.022611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.875, "completions/mean_length": 66.01041889190674, "completions/min_length": 21.75, "epoch": 3.1608339538346986, "grad_norm": 0.006367888551967647, "kl": 0.12127685546875, "learning_rate": 7.822121394164406e-07, "loss": 0.0001213420182466507, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1590, "train_speed(iter/s)": 0.022611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 72.98958587646484, "completions/min_length": 31.125, "epoch": 3.162819558203028, "grad_norm": 0.0059194410346197, "kl": 0.13812255859375, "learning_rate": 7.819516557475858e-07, "loss": 0.00013797497376799583, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1591, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/mean_length": 66.64583587646484, "completions/min_length": 29.5, "epoch": 3.1648051625713576, "grad_norm": 0.004671946892964499, "kl": 0.1226806640625, "learning_rate": 7.816910598277545e-07, "loss": 0.0001227743923664093, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1592, "train_speed(iter/s)": 0.022613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.375, "completions/mean_length": 63.90625047683716, "completions/min_length": 18.5, "epoch": 3.166790766939687, "grad_norm": 0.00857730893593178, "kl": 0.1259765625, "learning_rate": 7.814303517606955e-07, "loss": 0.00012605068332049996, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1593, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 61.40625190734863, "completions/min_length": 26.5, "epoch": 3.168776371308017, "grad_norm": 0.00711534361282793, "kl": 0.13616943359375, "learning_rate": 7.811695316502022e-07, "loss": 0.00013601114915218204, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1594, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 66.09375143051147, "completions/min_length": 29.25, "epoch": 3.1707619756763465, "grad_norm": 1.1142694759945453, "kl": 0.171875, "learning_rate": 7.809085996001129e-07, "loss": -0.006257231347262859, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1595, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.625, "completions/mean_length": 75.21875238418579, "completions/min_length": 21.75, "epoch": 3.172747580044676, "grad_norm": 0.00723908059239392, "kl": 0.16400146484375, "learning_rate": 7.8064755571431e-07, "loss": 0.00016430024697910994, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1596, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 61.718751430511475, "completions/min_length": 17.25, "epoch": 3.1747331844130056, "grad_norm": 0.008161048778028125, "kl": 0.12396240234375, "learning_rate": 7.803864000967204e-07, "loss": 0.00012396546662785113, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.45695383101701736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1597, "train_speed(iter/s)": 0.022617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 74.11458539962769, "completions/min_length": 25.875, "epoch": 3.1767187887813355, "grad_norm": 0.00808004752736522, "kl": 0.16107177734375, "learning_rate": 7.801251328513163e-07, "loss": 0.0001611294865142554, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1598, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.75, "completions/mean_length": 63.31250238418579, "completions/min_length": 21.875, "epoch": 3.178704393149665, "grad_norm": 0.005077917861440402, "kl": 0.14080810546875, "learning_rate": 7.798637540821133e-07, "loss": 0.00014077738160267472, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1599, "train_speed(iter/s)": 0.022618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 59.31250286102295, "completions/min_length": 25.375, "epoch": 3.1806899975179945, "grad_norm": 0.004822578514240816, "kl": 0.13470458984375, "learning_rate": 7.796022638931721e-07, "loss": 0.0001347649667877704, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1600, "train_speed(iter/s)": 0.022621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.125, "completions/mean_length": 68.46875286102295, "completions/min_length": 20.75, "epoch": 3.182675601886324, "grad_norm": 1.0845245994786377, "kl": 0.1588134765625, "learning_rate": 7.793406623885975e-07, "loss": -0.006992870010435581, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1601, "train_speed(iter/s)": 0.02262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 70.57291889190674, "completions/min_length": 31.0, "epoch": 3.1846612062546535, "grad_norm": 1.1340500403690734, "kl": 0.16326904296875, "learning_rate": 7.790789496725387e-07, "loss": -0.0015656118048354983, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1602, "train_speed(iter/s)": 0.02262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 78.55208539962769, "completions/min_length": 30.5, "epoch": 3.1866468106229835, "grad_norm": 0.004941984622011276, "kl": 0.1529541015625, "learning_rate": 7.78817125849189e-07, "loss": 0.00015300113591365516, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1603, "train_speed(iter/s)": 0.02262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.125, "completions/mean_length": 70.11458587646484, "completions/min_length": 17.5, "epoch": 3.188632414991313, "grad_norm": 0.011448552207716163, "kl": 0.14312744140625, "learning_rate": 7.78555191022786e-07, "loss": 0.00014319675392471254, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1604, "train_speed(iter/s)": 0.022619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 66.45833492279053, "completions/min_length": 22.125, "epoch": 3.1906180193596425, "grad_norm": 0.8050589697827149, "kl": 0.13763427734375, "learning_rate": 7.782931452976118e-07, "loss": -0.002308471826836467, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1605, "train_speed(iter/s)": 0.02262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 69.25000143051147, "completions/min_length": 26.75, "epoch": 3.192603623727972, "grad_norm": 0.03288996300852182, "kl": 0.1339111328125, "learning_rate": 7.780309887779921e-07, "loss": 0.00013382203178480268, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1606, "train_speed(iter/s)": 0.02262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.125, "completions/mean_length": 61.52083492279053, "completions/min_length": 25.625, "epoch": 3.194589228096302, "grad_norm": 0.006532461641287733, "kl": 0.14849853515625, "learning_rate": 7.777687215682972e-07, "loss": 0.0001485254178987816, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1607, "train_speed(iter/s)": 0.022621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 70.11458492279053, "completions/min_length": 25.0, "epoch": 3.1965748324646315, "grad_norm": 0.005846996579979615, "kl": 0.13287353515625, "learning_rate": 7.775063437729413e-07, "loss": 0.00013302621664479375, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1608, "train_speed(iter/s)": 0.022623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 64.44791793823242, "completions/min_length": 24.0, "epoch": 3.198560436832961, "grad_norm": 0.030037338411353125, "kl": 0.14862060546875, "learning_rate": 7.772438554963826e-07, "loss": 0.00014869551523588598, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1609, "train_speed(iter/s)": 0.022623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.875, "completions/mean_length": 69.30208444595337, "completions/min_length": 24.25, "epoch": 3.2005460412012905, "grad_norm": 0.012863827599513405, "kl": 0.12738037109375, "learning_rate": 7.769812568431231e-07, "loss": 0.0001272565859835595, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1610, "train_speed(iter/s)": 0.022623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.25, "completions/mean_length": 67.23958587646484, "completions/min_length": 24.25, "epoch": 3.2025316455696204, "grad_norm": 0.9533867712540602, "kl": 0.11541748046875, "learning_rate": 7.767185479177092e-07, "loss": -0.004328140988945961, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1611, "train_speed(iter/s)": 0.022624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 74.2916693687439, "completions/min_length": 28.125, "epoch": 3.20451724993795, "grad_norm": 0.008510642382629566, "kl": 0.1422119140625, "learning_rate": 7.764557288247307e-07, "loss": 0.00014209389337338507, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1612, "train_speed(iter/s)": 0.022623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 62.083335399627686, "completions/min_length": 23.125, "epoch": 3.2065028543062795, "grad_norm": 0.004026934549716962, "kl": 0.10986328125, "learning_rate": 7.761927996688217e-07, "loss": 0.00010998039215337485, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1613, "train_speed(iter/s)": 0.022623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.375, "completions/mean_length": 65.22916889190674, "completions/min_length": 14.625, "epoch": 3.208488458674609, "grad_norm": 0.02000490640244023, "kl": 0.1162109375, "learning_rate": 7.759297605546596e-07, "loss": 0.00011624234321061522, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1614, "train_speed(iter/s)": 0.022624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 65.00000333786011, "completions/min_length": 20.5, "epoch": 3.2104740630429385, "grad_norm": 0.005323828133178569, "kl": 0.1217041015625, "learning_rate": 7.756666115869664e-07, "loss": 0.00012178064207546413, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1615, "train_speed(iter/s)": 0.022625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 73.08333492279053, "completions/min_length": 22.875, "epoch": 3.2124596674112684, "grad_norm": 0.9789591487573561, "kl": 0.15283203125, "learning_rate": 7.754033528705069e-07, "loss": -0.009583230130374432, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1616, "train_speed(iter/s)": 0.022625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.125, "completions/mean_length": 72.00000143051147, "completions/min_length": 15.625, "epoch": 3.214445271779598, "grad_norm": 1.206109593050698, "kl": 0.1192626953125, "learning_rate": 7.751399845100899e-07, "loss": -0.009789164178073406, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1617, "train_speed(iter/s)": 0.022624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.625, "completions/mean_length": 78.8229193687439, "completions/min_length": 28.375, "epoch": 3.2164308761479274, "grad_norm": 1.3171703713872511, "kl": 0.14453125, "learning_rate": 7.748765066105684e-07, "loss": 0.013453269377350807, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.05974818021059036, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12089945748448372, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1618, "train_speed(iter/s)": 0.022623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.125, "completions/mean_length": 68.83333539962769, "completions/min_length": 26.375, "epoch": 3.218416480516257, "grad_norm": 0.004575279815882035, "kl": 0.11480712890625, "learning_rate": 7.746129192768385e-07, "loss": 0.0001146999275078997, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1619, "train_speed(iter/s)": 0.022625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 71.58333539962769, "completions/min_length": 29.625, "epoch": 3.220402084884587, "grad_norm": 0.004567171997031353, "kl": 0.10455322265625, "learning_rate": 7.743492226138397e-07, "loss": 0.00010440638288855553, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1620, "train_speed(iter/s)": 0.022625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 74.48958444595337, "completions/min_length": 25.625, "epoch": 3.2223876892529164, "grad_norm": 0.8820630926357819, "kl": 0.15264892578125, "learning_rate": 7.740854167265556e-07, "loss": -0.016015177592635155, "memory(GiB)": 94.21, "reward": 1.6145833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6145833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1621, "train_speed(iter/s)": 0.022627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 67.95833539962769, "completions/min_length": 18.375, "epoch": 3.224373293621246, "grad_norm": 0.0052832388436746735, "kl": 0.14251708984375, "learning_rate": 7.738215017200126e-07, "loss": 0.00014245050260797143, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1622, "train_speed(iter/s)": 0.022627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 76.21875286102295, "completions/min_length": 27.125, "epoch": 3.2263588979895754, "grad_norm": 0.00569019222712594, "kl": 0.122802734375, "learning_rate": 7.735574776992812e-07, "loss": 0.00012273839092813432, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1623, "train_speed(iter/s)": 0.022627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 82.76042079925537, "completions/min_length": 28.875, "epoch": 3.2283445023579054, "grad_norm": 0.89556832340245, "kl": 0.141357421875, "learning_rate": 7.732933447694748e-07, "loss": -0.004790339153259993, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1624, "train_speed(iter/s)": 0.022628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 75.28125190734863, "completions/min_length": 28.75, "epoch": 3.230330106726235, "grad_norm": 0.0040374499945258924, "kl": 0.11529541015625, "learning_rate": 7.730291030357504e-07, "loss": 0.00011524982255650684, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1625, "train_speed(iter/s)": 0.022628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.875, "completions/mean_length": 68.5104193687439, "completions/min_length": 23.875, "epoch": 3.2323157110945644, "grad_norm": 0.018725914039625435, "kl": 0.16650390625, "learning_rate": 7.727647526033083e-07, "loss": 0.0001662827271502465, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1626, "train_speed(iter/s)": 0.022628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 83.29166984558105, "completions/min_length": 36.625, "epoch": 3.234301315462894, "grad_norm": 0.7779134959574959, "kl": 0.1260986328125, "learning_rate": 7.725002935773921e-07, "loss": 0.0023582628928124905, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.04865618050098419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1627, "train_speed(iter/s)": 0.022627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.5, "completions/mean_length": 69.64583683013916, "completions/min_length": 27.5, "epoch": 3.2362869198312234, "grad_norm": 0.00375191172626384, "kl": 0.11846923828125, "learning_rate": 7.722357260632886e-07, "loss": 0.00011851306771859527, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1628, "train_speed(iter/s)": 0.022626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 76.75000190734863, "completions/min_length": 29.125, "epoch": 3.2382725241995534, "grad_norm": 0.9177772428418636, "kl": 0.16412353515625, "learning_rate": 7.719710501663277e-07, "loss": 0.010430104099214077, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1629, "train_speed(iter/s)": 0.022625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.0, "completions/mean_length": 82.70833539962769, "completions/min_length": 31.125, "epoch": 3.240258128567883, "grad_norm": 0.00582702891715206, "kl": 0.14862060546875, "learning_rate": 7.717062659918825e-07, "loss": 0.00014866201672703028, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1630, "train_speed(iter/s)": 0.022625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.125, "completions/mean_length": 75.29166841506958, "completions/min_length": 28.25, "epoch": 3.2422437329362124, "grad_norm": 0.010167066215420423, "kl": 0.13433837890625, "learning_rate": 7.714413736453693e-07, "loss": 0.00013425570796243846, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1631, "train_speed(iter/s)": 0.022626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.625, "completions/mean_length": 87.26041841506958, "completions/min_length": 28.625, "epoch": 3.244229337304542, "grad_norm": 0.003856639380792482, "kl": 0.12005615234375, "learning_rate": 7.711763732322475e-07, "loss": 0.00011997627734672278, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1632, "train_speed(iter/s)": 0.022625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.25, "completions/mean_length": 70.29166889190674, "completions/min_length": 23.625, "epoch": 3.246214941672872, "grad_norm": 0.0047362573819903645, "kl": 0.10357666015625, "learning_rate": 7.709112648580194e-07, "loss": 0.00010354580444982275, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1633, "train_speed(iter/s)": 0.022624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 74.01041889190674, "completions/min_length": 26.0, "epoch": 3.2482005460412013, "grad_norm": 0.13616774127687276, "kl": 0.1837158203125, "learning_rate": 7.7064604862823e-07, "loss": 0.0001835908042266965, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1634, "train_speed(iter/s)": 0.022626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 77.78125190734863, "completions/min_length": 35.25, "epoch": 3.250186150409531, "grad_norm": 0.004290667829682497, "kl": 0.1402587890625, "learning_rate": 7.703807246484679e-07, "loss": 0.0001403296337230131, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1635, "train_speed(iter/s)": 0.022626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 79.3854193687439, "completions/min_length": 27.75, "epoch": 3.2521717547778604, "grad_norm": 0.007670113679000537, "kl": 0.12652587890625, "learning_rate": 7.701152930243641e-07, "loss": 0.00012663830420933664, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1636, "train_speed(iter/s)": 0.022627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 77.61458587646484, "completions/min_length": 31.5, "epoch": 3.2541573591461903, "grad_norm": 1.205690192312357, "kl": 0.1190185546875, "learning_rate": 7.698497538615927e-07, "loss": 0.014132445678114891, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1637, "train_speed(iter/s)": 0.022627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 64.57291793823242, "completions/min_length": 25.5, "epoch": 3.25614296351452, "grad_norm": 0.003923149670308882, "kl": 0.1015625, "learning_rate": 7.695841072658702e-07, "loss": 0.00010168996959691867, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1638, "train_speed(iter/s)": 0.022628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 83.75000286102295, "completions/min_length": 40.625, "epoch": 3.2581285678828493, "grad_norm": 0.004258045179516714, "kl": 0.1298828125, "learning_rate": 7.693183533429566e-07, "loss": 0.00012992811389267445, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1639, "train_speed(iter/s)": 0.022627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 82.63541984558105, "completions/min_length": 33.5, "epoch": 3.260114172251179, "grad_norm": 0.003951889895466826, "kl": 0.10870361328125, "learning_rate": 7.690524921986541e-07, "loss": 0.00010872267739614472, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1640, "train_speed(iter/s)": 0.022624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.125, "completions/mean_length": 63.781251430511475, "completions/min_length": 25.375, "epoch": 3.2620997766195083, "grad_norm": 1.0159283661393874, "kl": 0.561767578125, "learning_rate": 7.687865239388074e-07, "loss": -0.010935746133327484, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1641, "train_speed(iter/s)": 0.022625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 72.52083587646484, "completions/min_length": 31.125, "epoch": 3.2640853809878383, "grad_norm": 0.004971517003292599, "kl": 0.114013671875, "learning_rate": 7.685204486693046e-07, "loss": 0.00011402781819924712, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1642, "train_speed(iter/s)": 0.022626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.5, "completions/mean_length": 69.55208539962769, "completions/min_length": 28.0, "epoch": 3.266070985356168, "grad_norm": 0.014124415914013335, "kl": 0.1290283203125, "learning_rate": 7.682542664960756e-07, "loss": 0.00012876864639110863, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1643, "train_speed(iter/s)": 0.022627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 82.364586353302, "completions/min_length": 36.375, "epoch": 3.2680565897244973, "grad_norm": 0.9671038369069495, "kl": 0.145263671875, "learning_rate": 7.679879775250933e-07, "loss": 0.00014527887105941772, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1644, "train_speed(iter/s)": 0.022626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.375, "completions/mean_length": 79.09375238418579, "completions/min_length": 31.125, "epoch": 3.270042194092827, "grad_norm": 1.215322346308037, "kl": 0.12432861328125, "learning_rate": 7.67721581862373e-07, "loss": -0.0004590476746670902, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1645, "train_speed(iter/s)": 0.022626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.375, "completions/mean_length": 72.18750095367432, "completions/min_length": 24.625, "epoch": 3.2720277984611568, "grad_norm": 0.004351051363441635, "kl": 0.1134033203125, "learning_rate": 7.674550796139728e-07, "loss": 0.00011344471568008885, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1646, "train_speed(iter/s)": 0.022625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 79.81250286102295, "completions/min_length": 33.625, "epoch": 3.2740134028294863, "grad_norm": 1.4283401699234863, "kl": 0.12200927734375, "learning_rate": 7.671884708859926e-07, "loss": -0.00461210822686553, "memory(GiB)": 94.21, "reward": 1.7604166865348816, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.2934674955904484, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1647, "train_speed(iter/s)": 0.022624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 77.61458539962769, "completions/min_length": 23.875, "epoch": 3.275999007197816, "grad_norm": 0.004063448915065144, "kl": 0.1136474609375, "learning_rate": 7.66921755784575e-07, "loss": 0.00011347376130288467, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1648, "train_speed(iter/s)": 0.022625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.875, "completions/mean_length": 78.81250238418579, "completions/min_length": 32.25, "epoch": 3.2779846115661453, "grad_norm": 1.2102170022127292, "kl": 0.16961669921875, "learning_rate": 7.666549344159053e-07, "loss": 0.0011187988566234708, "memory(GiB)": 94.21, "reward": 1.7812500149011612, "reward_std": 0.05779037997126579, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.15001969039440155, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1649, "train_speed(iter/s)": 0.022625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.375, "completions/mean_length": 82.43750286102295, "completions/min_length": 31.125, "epoch": 3.2799702159344752, "grad_norm": 0.004321021860241708, "kl": 0.13226318359375, "learning_rate": 7.663880068862105e-07, "loss": 0.00013228798343334347, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1650, "train_speed(iter/s)": 0.022626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 70.55208539962769, "completions/min_length": 32.75, "epoch": 3.2819558203028047, "grad_norm": 0.006157701966564095, "kl": 0.13311767578125, "learning_rate": 7.661209733017602e-07, "loss": 0.00013323294115252793, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1651, "train_speed(iter/s)": 0.022628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 68.8229193687439, "completions/min_length": 28.625, "epoch": 3.2839414246711343, "grad_norm": 0.006003061476838553, "kl": 0.1304931640625, "learning_rate": 7.658538337688662e-07, "loss": 0.00013045336527284235, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1652, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 75.87500190734863, "completions/min_length": 32.0, "epoch": 3.2859270290394638, "grad_norm": 0.005259941679482467, "kl": 0.1142578125, "learning_rate": 7.655865883938825e-07, "loss": 0.00011412893945816904, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1653, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.875, "completions/mean_length": 77.15625190734863, "completions/min_length": 33.0, "epoch": 3.2879126334077933, "grad_norm": 0.005645225135338039, "kl": 0.12353515625, "learning_rate": 7.653192372832053e-07, "loss": 0.00012346345465630293, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1654, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.625, "completions/mean_length": 76.19791889190674, "completions/min_length": 36.875, "epoch": 3.2898982377761232, "grad_norm": 0.004232910251998929, "kl": 0.1103515625, "learning_rate": 7.650517805432723e-07, "loss": 0.00011038359662052244, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1655, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.25, "completions/mean_length": 75.91666889190674, "completions/min_length": 32.0, "epoch": 3.2918838421444527, "grad_norm": 0.005116356027105769, "kl": 0.1380615234375, "learning_rate": 7.647842182805644e-07, "loss": 0.00013812229735776782, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1656, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 73.55208539962769, "completions/min_length": 31.125, "epoch": 3.2938694465127822, "grad_norm": 0.004553823528121366, "kl": 0.12689208984375, "learning_rate": 7.645165506016034e-07, "loss": 0.00012686976697295904, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1657, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.125, "completions/mean_length": 77.16667032241821, "completions/min_length": 33.25, "epoch": 3.2958550508811117, "grad_norm": 0.005727086790194925, "kl": 0.15771484375, "learning_rate": 7.642487776129538e-07, "loss": 0.00015769053425174206, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1658, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 76.989586353302, "completions/min_length": 28.125, "epoch": 3.2978406552494417, "grad_norm": 0.55710548896836, "kl": 0.128814697265625, "learning_rate": 7.639808994212216e-07, "loss": -0.02060895785689354, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1659, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 75.63541793823242, "completions/min_length": 30.875, "epoch": 3.299826259617771, "grad_norm": 1.0852818349107676, "kl": 0.1715087890625, "learning_rate": 7.637129161330546e-07, "loss": -0.006823991425335407, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1660, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.75, "completions/mean_length": 74.04166889190674, "completions/min_length": 31.875, "epoch": 3.3018118639861007, "grad_norm": 0.7534462709329907, "kl": 0.121826171875, "learning_rate": 7.634448278551431e-07, "loss": 0.01573183760046959, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.39076167345046997, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1661, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 67.78125238418579, "completions/min_length": 27.0, "epoch": 3.3037974683544302, "grad_norm": 0.00765202887471509, "kl": 0.1141357421875, "learning_rate": 7.631766346942186e-07, "loss": 0.00011410520528443158, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1662, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 73.55208587646484, "completions/min_length": 24.5, "epoch": 3.30578307272276, "grad_norm": 0.005772935990358881, "kl": 0.129638671875, "learning_rate": 7.629083367570545e-07, "loss": 0.00012965813220944256, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1663, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.875, "completions/mean_length": 72.80208492279053, "completions/min_length": 34.625, "epoch": 3.3077686770910897, "grad_norm": 0.8868981699109365, "kl": 0.14312744140625, "learning_rate": 7.626399341504659e-07, "loss": -0.0033722962252795696, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1664, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 62.03125238418579, "completions/min_length": 29.625, "epoch": 3.309754281459419, "grad_norm": 0.004636103307137744, "kl": 0.10882568359375, "learning_rate": 7.623714269813097e-07, "loss": 0.00010876665328396484, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1665, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.875, "completions/mean_length": 78.18750286102295, "completions/min_length": 36.375, "epoch": 3.3117398858277487, "grad_norm": 0.004663018321275086, "kl": 0.12786865234375, "learning_rate": 7.621028153564842e-07, "loss": 0.0001278170821024105, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1666, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 77.05208444595337, "completions/min_length": 27.75, "epoch": 3.313725490196078, "grad_norm": 1.0943482974223269, "kl": 0.13677978515625, "learning_rate": 7.618340993829296e-07, "loss": -0.014341501519083977, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1667, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.625, "completions/mean_length": 67.51041841506958, "completions/min_length": 29.375, "epoch": 3.315711094564408, "grad_norm": 0.02768554531048945, "kl": 0.132568359375, "learning_rate": 7.615652791676275e-07, "loss": 0.00013254139048513025, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1668, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.375, "completions/mean_length": 63.06250190734863, "completions/min_length": 26.5, "epoch": 3.3176966989327377, "grad_norm": 0.005198287396105113, "kl": 0.11370849609375, "learning_rate": 7.612963548176006e-07, "loss": 0.00011383212404325604, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1669, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.125, "completions/mean_length": 79.40625190734863, "completions/min_length": 32.75, "epoch": 3.319682303301067, "grad_norm": 0.7140729524804544, "kl": 0.11859130859375, "learning_rate": 7.610273264399139e-07, "loss": -0.013541224412620068, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.3624799847602844, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1670, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 72.802086353302, "completions/min_length": 25.875, "epoch": 3.3216679076693967, "grad_norm": 0.0054114449703056115, "kl": 0.12188720703125, "learning_rate": 7.60758194141673e-07, "loss": 0.00012187685206299648, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1671, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 72.68750238418579, "completions/min_length": 32.25, "epoch": 3.3236535120377266, "grad_norm": 0.005123265492942159, "kl": 0.12640380859375, "learning_rate": 7.604889580300253e-07, "loss": 0.00012642424553632736, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1672, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.0, "completions/mean_length": 61.69791793823242, "completions/min_length": 32.125, "epoch": 3.325639116406056, "grad_norm": 1.4226433850996956, "kl": 0.09539794921875, "learning_rate": 7.602196182121597e-07, "loss": 0.005553328897804022, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1673, "train_speed(iter/s)": 0.022627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.875, "completions/mean_length": 67.583336353302, "completions/min_length": 32.875, "epoch": 3.3276247207743856, "grad_norm": 0.008505849012252384, "kl": 0.15631103515625, "learning_rate": 7.599501747953058e-07, "loss": 0.00015634068404324353, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1674, "train_speed(iter/s)": 0.022627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.375, "completions/mean_length": 63.7916693687439, "completions/min_length": 28.75, "epoch": 3.329610325142715, "grad_norm": 0.014416546446628905, "kl": 0.14996337890625, "learning_rate": 7.596806278867349e-07, "loss": 0.00014982081484049559, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1675, "train_speed(iter/s)": 0.022628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 70.05208492279053, "completions/min_length": 27.375, "epoch": 3.331595929511045, "grad_norm": 0.022812507937014836, "kl": 0.1673583984375, "learning_rate": 7.594109775937594e-07, "loss": 0.00016750273061916232, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1676, "train_speed(iter/s)": 0.022628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.375, "completions/mean_length": 63.302085399627686, "completions/min_length": 31.375, "epoch": 3.3335815338793746, "grad_norm": 0.009978699406551094, "kl": 0.14642333984375, "learning_rate": 7.591412240237328e-07, "loss": 0.00014648112119175494, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1677, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.0, "completions/mean_length": 76.85416984558105, "completions/min_length": 34.75, "epoch": 3.335567138247704, "grad_norm": 0.01161245548076017, "kl": 0.15521240234375, "learning_rate": 7.588713672840499e-07, "loss": 0.0001550914894323796, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1678, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 75.87500286102295, "completions/min_length": 27.0, "epoch": 3.3375527426160336, "grad_norm": 0.006369140127020487, "kl": 0.148681640625, "learning_rate": 7.586014074821463e-07, "loss": 0.00014846259728074074, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1679, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.625, "completions/mean_length": 74.67708492279053, "completions/min_length": 32.125, "epoch": 3.339538346984363, "grad_norm": 0.01972339892496228, "kl": 0.1590576171875, "learning_rate": 7.583313447254986e-07, "loss": 0.00015887395420577377, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1680, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 67.86458539962769, "completions/min_length": 28.875, "epoch": 3.341523951352693, "grad_norm": 1.6950877150456949, "kl": 0.14044189453125, "learning_rate": 7.580611791216249e-07, "loss": 0.006854488514363766, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1681, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.5, "completions/mean_length": 72.06250238418579, "completions/min_length": 32.75, "epoch": 3.3435095557210226, "grad_norm": 0.01185761618753757, "kl": 0.145263671875, "learning_rate": 7.577909107780836e-07, "loss": 0.00014527476741932333, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1682, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.0, "completions/mean_length": 70.91666793823242, "completions/min_length": 36.125, "epoch": 3.345495160089352, "grad_norm": 0.006486787342394677, "kl": 0.12518310546875, "learning_rate": 7.575205398024747e-07, "loss": 0.00012520681775640696, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1683, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 73.54166984558105, "completions/min_length": 31.625, "epoch": 3.347480764457682, "grad_norm": 0.006158420987590548, "kl": 0.14495849609375, "learning_rate": 7.572500663024382e-07, "loss": 0.00014497833035420626, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1684, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.375, "completions/mean_length": 81.91666793823242, "completions/min_length": 33.5, "epoch": 3.3494663688260116, "grad_norm": 1.1276844325681221, "kl": 0.1416015625, "learning_rate": 7.569794903856554e-07, "loss": 0.006271720863878727, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1685, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.5, "completions/mean_length": 74.71875286102295, "completions/min_length": 34.375, "epoch": 3.351451973194341, "grad_norm": 0.008847755641461988, "kl": 0.16290283203125, "learning_rate": 7.567088121598489e-07, "loss": 0.0001630194892641157, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1686, "train_speed(iter/s)": 0.022628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.625, "completions/mean_length": 65.23958492279053, "completions/min_length": 30.375, "epoch": 3.3534375775626706, "grad_norm": 0.009989974382605888, "kl": 0.14715576171875, "learning_rate": 7.564380317327809e-07, "loss": 0.00014691035903524607, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1687, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.625, "completions/mean_length": 67.927086353302, "completions/min_length": 29.5, "epoch": 3.355423181931, "grad_norm": 0.006753273323245216, "kl": 0.12701416015625, "learning_rate": 7.561671492122551e-07, "loss": 0.00012727115245070308, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1688, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 62.07291889190674, "completions/min_length": 29.125, "epoch": 3.35740878629933, "grad_norm": 0.007011487067309397, "kl": 0.11737060546875, "learning_rate": 7.558961647061155e-07, "loss": 0.000117397794383578, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1689, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.125, "completions/mean_length": 65.3541693687439, "completions/min_length": 28.875, "epoch": 3.3593943906676595, "grad_norm": 0.006668422601808058, "kl": 0.14642333984375, "learning_rate": 7.55625078322247e-07, "loss": 0.0001464033266529441, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1690, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 74.96875238418579, "completions/min_length": 25.75, "epoch": 3.361379995035989, "grad_norm": 0.9052303731246248, "kl": 0.14935302734375, "learning_rate": 7.553538901685749e-07, "loss": -0.011964105069637299, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1691, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 65.71875238418579, "completions/min_length": 27.5, "epoch": 3.3633655994043186, "grad_norm": 0.07706817762589453, "kl": 0.2091064453125, "learning_rate": 7.550826003530648e-07, "loss": 0.00020899111405014992, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1692, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 76.58333587646484, "completions/min_length": 35.375, "epoch": 3.365351203772648, "grad_norm": 0.00727086030957464, "kl": 0.15716552734375, "learning_rate": 7.54811208983723e-07, "loss": 0.0001571819157106802, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1693, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 72.19791841506958, "completions/min_length": 28.5, "epoch": 3.367336808140978, "grad_norm": 0.7756703641063131, "kl": 0.140625, "learning_rate": 7.545397161685965e-07, "loss": -0.003865304170176387, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1694, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 68.79166889190674, "completions/min_length": 27.25, "epoch": 3.3693224125093075, "grad_norm": 0.007018035201710085, "kl": 0.1446533203125, "learning_rate": 7.542681220157719e-07, "loss": 0.0001447978720534593, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1695, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 76.21875190734863, "completions/min_length": 32.375, "epoch": 3.371308016877637, "grad_norm": 0.007976663372422239, "kl": 0.14532470703125, "learning_rate": 7.539964266333769e-07, "loss": 0.00014518079115077853, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1696, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 65.63541889190674, "completions/min_length": 31.75, "epoch": 3.373293621245967, "grad_norm": 0.009945207154231856, "kl": 0.16845703125, "learning_rate": 7.537246301295792e-07, "loss": 0.00016843291814439, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1697, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 73.67708587646484, "completions/min_length": 36.625, "epoch": 3.3752792256142965, "grad_norm": 0.7434967715816888, "kl": 0.1787109375, "learning_rate": 7.534527326125871e-07, "loss": 0.007135696243494749, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6979166669771075, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1698, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 69.08333587646484, "completions/min_length": 29.0, "epoch": 3.377264829982626, "grad_norm": 0.010068434496510612, "kl": 0.17083740234375, "learning_rate": 7.531807341906482e-07, "loss": 0.00017061618564184755, "memory(GiB)": 94.21, "reward": 1.5, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1699, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.125, "completions/mean_length": 63.46875238418579, "completions/min_length": 31.625, "epoch": 3.3792504343509555, "grad_norm": 0.00976635779637887, "kl": 0.1580810546875, "learning_rate": 7.529086349720513e-07, "loss": 0.0001582516561029479, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1700, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.125, "completions/mean_length": 65.12500238418579, "completions/min_length": 29.0, "epoch": 3.381236038719285, "grad_norm": 0.9611885138389086, "kl": 0.168701171875, "learning_rate": 7.526364350651248e-07, "loss": -0.003184668719768524, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1701, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 69.35416841506958, "completions/min_length": 30.375, "epoch": 3.383221643087615, "grad_norm": 0.009766277868600749, "kl": 0.15380859375, "learning_rate": 7.523641345782373e-07, "loss": 0.00015370766050182283, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1702, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 76.97916984558105, "completions/min_length": 35.5, "epoch": 3.3852072474559445, "grad_norm": 0.7288127808365923, "kl": 0.184814453125, "learning_rate": 7.520917336197976e-07, "loss": -0.0007909226114861667, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1703, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 67.35416841506958, "completions/min_length": 28.0, "epoch": 3.387192851824274, "grad_norm": 0.010006117930618948, "kl": 0.160400390625, "learning_rate": 7.51819232298254e-07, "loss": 0.00016042486822698265, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1704, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 72.68750238418579, "completions/min_length": 24.875, "epoch": 3.3891784561926035, "grad_norm": 1.3397785533930207, "kl": 0.1732177734375, "learning_rate": 7.515466307220954e-07, "loss": 0.003318143542855978, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1705, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 69.50000190734863, "completions/min_length": 21.625, "epoch": 3.391164060560933, "grad_norm": 0.01128887071312138, "kl": 0.20703125, "learning_rate": 7.512739289998502e-07, "loss": 0.00020709529053419828, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1706, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 70.69791889190674, "completions/min_length": 26.25, "epoch": 3.393149664929263, "grad_norm": 0.009535280218155568, "kl": 0.16607666015625, "learning_rate": 7.510011272400867e-07, "loss": 0.00016635702922940254, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1707, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.125, "completions/mean_length": 67.37500143051147, "completions/min_length": 27.875, "epoch": 3.3951352692975925, "grad_norm": 0.009928533263206538, "kl": 0.1678466796875, "learning_rate": 7.507282255514132e-07, "loss": 0.00016785907791927457, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1708, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 65.41666889190674, "completions/min_length": 25.125, "epoch": 3.397120873665922, "grad_norm": 0.01129909746147632, "kl": 0.16998291015625, "learning_rate": 7.504552240424777e-07, "loss": 0.00016990506264846772, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1709, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 64.21875095367432, "completions/min_length": 29.0, "epoch": 3.399106478034252, "grad_norm": 0.010326836098592045, "kl": 0.1895751953125, "learning_rate": 7.501821228219681e-07, "loss": 0.00018965858907904476, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1710, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.625, "completions/mean_length": 78.89583587646484, "completions/min_length": 32.75, "epoch": 3.4010920824025814, "grad_norm": 0.007930106955842066, "kl": 0.156982421875, "learning_rate": 7.499089219986114e-07, "loss": 0.00015706241538282484, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1711, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 74.04167032241821, "completions/min_length": 21.0, "epoch": 3.403077686770911, "grad_norm": 0.00881103987580228, "kl": 0.16668701171875, "learning_rate": 7.496356216811749e-07, "loss": 0.00016673810023348778, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1712, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 69.06250095367432, "completions/min_length": 33.625, "epoch": 3.4050632911392404, "grad_norm": 0.5746561055634027, "kl": 0.14215087890625, "learning_rate": 7.493622219784654e-07, "loss": -0.008945518173277378, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1713, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 71.34375238418579, "completions/min_length": 37.875, "epoch": 3.40704889550757, "grad_norm": 0.008620110561251806, "kl": 0.179443359375, "learning_rate": 7.490887229993291e-07, "loss": 0.00017924243002198637, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1714, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 71.85416841506958, "completions/min_length": 31.125, "epoch": 3.4090344998759, "grad_norm": 0.008395071812018153, "kl": 0.13922119140625, "learning_rate": 7.488151248526518e-07, "loss": 0.00013900024350732565, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1715, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 75.56250143051147, "completions/min_length": 32.625, "epoch": 3.4110201042442294, "grad_norm": 0.008315892709862402, "kl": 0.1593017578125, "learning_rate": 7.485414276473586e-07, "loss": 0.0001594174245838076, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1716, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.625, "completions/mean_length": 70.0729193687439, "completions/min_length": 28.5, "epoch": 3.413005708612559, "grad_norm": 0.00783300529868059, "kl": 0.1651611328125, "learning_rate": 7.482676314924143e-07, "loss": 0.00016503839287906885, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1717, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 61.135419845581055, "completions/min_length": 24.625, "epoch": 3.4149913129808884, "grad_norm": 1.9546241395304556, "kl": 0.18292236328125, "learning_rate": 7.479937364968232e-07, "loss": -0.00027079074061475694, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1718, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 78.18750190734863, "completions/min_length": 35.5, "epoch": 3.416976917349218, "grad_norm": 1.7623421600901559, "kl": 2.26300048828125, "learning_rate": 7.477197427696284e-07, "loss": -0.0010346894850954413, "memory(GiB)": 94.21, "reward": 1.7291666865348816, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.18837061524391174, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1719, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 78.32291889190674, "completions/min_length": 28.625, "epoch": 3.418962521717548, "grad_norm": 0.006645385845243952, "kl": 0.16180419921875, "learning_rate": 7.47445650419913e-07, "loss": 0.0001617235830053687, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1720, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 73.79166793823242, "completions/min_length": 33.5, "epoch": 3.4209481260858774, "grad_norm": 0.007464780670110095, "kl": 0.169921875, "learning_rate": 7.471714595567987e-07, "loss": 0.00016992166638374329, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1721, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.25, "completions/mean_length": 78.35416793823242, "completions/min_length": 30.375, "epoch": 3.422933730454207, "grad_norm": 0.0065839816936756465, "kl": 0.15899658203125, "learning_rate": 7.468971702894469e-07, "loss": 0.00015909734065644443, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1722, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.75, "completions/mean_length": 74.26041889190674, "completions/min_length": 28.75, "epoch": 3.424919334822537, "grad_norm": 1.6644447947001004, "kl": 0.1729736328125, "learning_rate": 7.466227827270583e-07, "loss": 0.013186678290367126, "memory(GiB)": 94.21, "reward": 1.8229166865348816, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.2281883768737316, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1723, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.875, "completions/mean_length": 69.17708492279053, "completions/min_length": 27.125, "epoch": 3.4269049391908664, "grad_norm": 0.006002825201215312, "kl": 0.14239501953125, "learning_rate": 7.463482969788718e-07, "loss": 0.00014237160212360322, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1724, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.625, "completions/mean_length": 68.88541889190674, "completions/min_length": 28.5, "epoch": 3.428890543559196, "grad_norm": 1.8490101057166934, "kl": 0.22674560546875, "learning_rate": 7.460737131541665e-07, "loss": -0.001330991624854505, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.06846532225608826, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.30890411138534546, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1725, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.625, "completions/mean_length": 67.45833539962769, "completions/min_length": 22.0, "epoch": 3.4308761479275254, "grad_norm": 0.006188197772008088, "kl": 0.135498046875, "learning_rate": 7.457990313622601e-07, "loss": 0.00013564078835770488, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1726, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 75.18750286102295, "completions/min_length": 27.25, "epoch": 3.432861752295855, "grad_norm": 0.007752230806755084, "kl": 0.1744384765625, "learning_rate": 7.455242517125091e-07, "loss": 0.00017422600649297237, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1727, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 63.75000190734863, "completions/min_length": 27.0, "epoch": 3.434847356664185, "grad_norm": 0.009166380740336441, "kl": 0.157470703125, "learning_rate": 7.452493743143091e-07, "loss": 0.0001575587666593492, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1728, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.625, "completions/mean_length": 66.64583396911621, "completions/min_length": 30.25, "epoch": 3.4368329610325143, "grad_norm": 0.00628188816198669, "kl": 0.12420654296875, "learning_rate": 7.449743992770949e-07, "loss": 0.0001242095313500613, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1729, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 74.05208539962769, "completions/min_length": 29.375, "epoch": 3.438818565400844, "grad_norm": 0.006433618028306444, "kl": 0.13214111328125, "learning_rate": 7.446993267103399e-07, "loss": 0.00013204696006141603, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1730, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 78.08333587646484, "completions/min_length": 34.25, "epoch": 3.4408041697691734, "grad_norm": 0.006373883099692789, "kl": 0.1527099609375, "learning_rate": 7.444241567235561e-07, "loss": 0.00015269606956280768, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1731, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 74.70833587646484, "completions/min_length": 29.125, "epoch": 3.442789774137503, "grad_norm": 1.138698122894617, "kl": 0.13763427734375, "learning_rate": 7.441488894262948e-07, "loss": -0.006746276281774044, "memory(GiB)": 94.21, "reward": 1.6979166865348816, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.35874661430716515, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1732, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.125, "completions/mean_length": 82.48958587646484, "completions/min_length": 31.875, "epoch": 3.444775378505833, "grad_norm": 0.008164486759855389, "kl": 0.13238525390625, "learning_rate": 7.438735249281459e-07, "loss": 0.0001324699114775285, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1733, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.125, "completions/mean_length": 69.66666793823242, "completions/min_length": 24.625, "epoch": 3.4467609828741623, "grad_norm": 0.01014397796764063, "kl": 0.1566162109375, "learning_rate": 7.435980633387374e-07, "loss": 0.0001566542632644996, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1734, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 75.95833492279053, "completions/min_length": 29.5, "epoch": 3.448746587242492, "grad_norm": 0.007700556726749011, "kl": 0.171142578125, "learning_rate": 7.433225047677368e-07, "loss": 0.00017101904086302966, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1735, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 75.16666984558105, "completions/min_length": 35.0, "epoch": 3.450732191610822, "grad_norm": 0.0078091523468408, "kl": 0.1402587890625, "learning_rate": 7.4304684932485e-07, "loss": 0.00014021956303622574, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1736, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 73.11458444595337, "completions/min_length": 21.75, "epoch": 3.4527177959791513, "grad_norm": 0.008060159842349679, "kl": 0.1611328125, "learning_rate": 7.42771097119821e-07, "loss": 0.0001611356856301427, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1737, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.625, "completions/mean_length": 71.78125190734863, "completions/min_length": 30.75, "epoch": 3.454703400347481, "grad_norm": 0.005258868446796736, "kl": 0.1329345703125, "learning_rate": 7.424952482624327e-07, "loss": 0.00013275850506033748, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1738, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 70.76041889190674, "completions/min_length": 28.5, "epoch": 3.4566890047158103, "grad_norm": 0.019238751690534312, "kl": 0.16473388671875, "learning_rate": 7.422193028625065e-07, "loss": 0.00016477250028401613, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1739, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.125, "completions/mean_length": 79.45833683013916, "completions/min_length": 29.25, "epoch": 3.45867460908414, "grad_norm": 0.019995114685998377, "kl": 0.1566162109375, "learning_rate": 7.419432610299023e-07, "loss": 0.0001565864949952811, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1740, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 78.71875333786011, "completions/min_length": 32.25, "epoch": 3.4606602134524698, "grad_norm": 0.004951524678397244, "kl": 0.12481689453125, "learning_rate": 7.416671228745181e-07, "loss": 0.00012488094216678292, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1741, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.75, "completions/mean_length": 74.26041793823242, "completions/min_length": 36.0, "epoch": 3.4626458178207993, "grad_norm": 0.005712938335980011, "kl": 0.15191650390625, "learning_rate": 7.413908885062902e-07, "loss": 0.00015190729754976928, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1742, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 76.083336353302, "completions/min_length": 25.875, "epoch": 3.464631422189129, "grad_norm": 0.004767199588825017, "kl": 0.11920166015625, "learning_rate": 7.411145580351938e-07, "loss": 0.0001191339033539407, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1743, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.875, "completions/mean_length": 79.87500190734863, "completions/min_length": 38.0, "epoch": 3.4666170265574583, "grad_norm": 0.005086541043455125, "kl": 0.16180419921875, "learning_rate": 7.408381315712416e-07, "loss": 0.00016174567281268537, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1744, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.25, "completions/mean_length": 74.55208587646484, "completions/min_length": 30.125, "epoch": 3.468602630925788, "grad_norm": 0.010422260363298103, "kl": 0.13922119140625, "learning_rate": 7.405616092244849e-07, "loss": 0.00013916482566855848, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1745, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 70.4166693687439, "completions/min_length": 29.625, "epoch": 3.4705882352941178, "grad_norm": 0.005632430520019546, "kl": 0.1383056640625, "learning_rate": 7.402849911050136e-07, "loss": 0.00013838404265698045, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1746, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.125, "completions/mean_length": 78.7291693687439, "completions/min_length": 32.0, "epoch": 3.4725738396624473, "grad_norm": 1.195313390646512, "kl": 0.15374755859375, "learning_rate": 7.400082773229549e-07, "loss": 0.00626950990408659, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1747, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 75.95833683013916, "completions/min_length": 29.625, "epoch": 3.4745594440307768, "grad_norm": 1.471272619877359, "kl": 0.15460205078125, "learning_rate": 7.397314679884745e-07, "loss": -0.018831651657819748, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.18335824459791183, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1748, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 72.63541889190674, "completions/min_length": 28.75, "epoch": 3.4765450483991067, "grad_norm": 0.006510104769034702, "kl": 0.13385009765625, "learning_rate": 7.394545632117761e-07, "loss": 0.00013389332161750644, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1749, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 72.09375190734863, "completions/min_length": 31.875, "epoch": 3.4785306527674362, "grad_norm": 0.9269615218887043, "kl": 0.15655517578125, "learning_rate": 7.391775631031015e-07, "loss": 0.01420507114380598, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1750, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 73.11458587646484, "completions/min_length": 27.375, "epoch": 3.4805162571357657, "grad_norm": 1.2281621569462045, "kl": 0.16961669921875, "learning_rate": 7.389004677727304e-07, "loss": -0.00881937239319086, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166669771075, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1751, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 68.4479193687439, "completions/min_length": 26.5, "epoch": 3.4825018615040952, "grad_norm": 0.004998159829211148, "kl": 0.1248779296875, "learning_rate": 7.386232773309801e-07, "loss": 0.00012489521759562194, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1752, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 76.05208492279053, "completions/min_length": 32.25, "epoch": 3.4844874658724247, "grad_norm": 0.0053036963477892714, "kl": 0.14691162109375, "learning_rate": 7.383459918882063e-07, "loss": 0.0001469059643568471, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1753, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.75, "completions/mean_length": 83.45833492279053, "completions/min_length": 37.5, "epoch": 3.4864730702407547, "grad_norm": 1.052878812999578, "kl": 0.14801025390625, "learning_rate": 7.380686115548023e-07, "loss": 0.002062710002064705, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1754, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.75, "completions/mean_length": 76.20833539962769, "completions/min_length": 30.375, "epoch": 3.488458674609084, "grad_norm": 0.004662863880316448, "kl": 0.1090087890625, "learning_rate": 7.377911364411988e-07, "loss": 0.00010899835615418851, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1755, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 73.04166889190674, "completions/min_length": 27.5, "epoch": 3.4904442789774137, "grad_norm": 0.008554424656485449, "kl": 0.1456298828125, "learning_rate": 7.375135666578649e-07, "loss": 0.00014553712389897555, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1756, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 69.98958587646484, "completions/min_length": 28.375, "epoch": 3.4924298833457432, "grad_norm": 0.7139281385286269, "kl": 0.13531494140625, "learning_rate": 7.37235902315307e-07, "loss": 0.005020014476031065, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1757, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 75.62500190734863, "completions/min_length": 28.75, "epoch": 3.4944154877140727, "grad_norm": 0.004439238411343683, "kl": 0.11932373046875, "learning_rate": 7.36958143524069e-07, "loss": 0.00011935785732930526, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1758, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 81.85416841506958, "completions/min_length": 25.875, "epoch": 3.4964010920824027, "grad_norm": 0.006904535457039458, "kl": 0.163818359375, "learning_rate": 7.366802903947329e-07, "loss": 0.00016380642773583531, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1759, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.125, "completions/mean_length": 78.34375238418579, "completions/min_length": 23.5, "epoch": 3.498386696450732, "grad_norm": 0.007442303936144033, "kl": 0.16259765625, "learning_rate": 7.364023430379177e-07, "loss": 0.0001626228477107361, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1760, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 74.927086353302, "completions/min_length": 32.625, "epoch": 3.5003723008190617, "grad_norm": 0.006725138916354058, "kl": 0.14306640625, "learning_rate": 7.361243015642804e-07, "loss": 0.0001429548137821257, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1761, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 83.68750286102295, "completions/min_length": 30.75, "epoch": 3.5023579051873917, "grad_norm": 0.006402114686299546, "kl": 0.141845703125, "learning_rate": 7.35846166084515e-07, "loss": 0.00014180198195390403, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1762, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.375, "completions/mean_length": 74.11458587646484, "completions/min_length": 32.0, "epoch": 3.504343509555721, "grad_norm": 1.5242345691940025, "kl": 0.15777587890625, "learning_rate": 7.355679367093535e-07, "loss": 0.005624070763587952, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1763, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 82.16666984558105, "completions/min_length": 24.25, "epoch": 3.5063291139240507, "grad_norm": 0.005008337493259989, "kl": 0.139892578125, "learning_rate": 7.352896135495648e-07, "loss": 0.00013970279542263597, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1764, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.125, "completions/mean_length": 79.03125095367432, "completions/min_length": 34.125, "epoch": 3.50831471829238, "grad_norm": 0.0063231165661246464, "kl": 0.128662109375, "learning_rate": 7.350111967159551e-07, "loss": 0.00012871438229922205, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1765, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.375, "completions/mean_length": 78.36458492279053, "completions/min_length": 33.125, "epoch": 3.5103003226607097, "grad_norm": 0.9796185565579775, "kl": 0.1348876953125, "learning_rate": 7.347326863193683e-07, "loss": 0.01389553677290678, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1766, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.125, "completions/mean_length": 74.03125333786011, "completions/min_length": 27.0, "epoch": 3.5122859270290396, "grad_norm": 0.012969665645352702, "kl": 0.1329345703125, "learning_rate": 7.344540824706854e-07, "loss": 0.00013288123591337353, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1767, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.875, "completions/mean_length": 74.65625238418579, "completions/min_length": 31.375, "epoch": 3.514271531397369, "grad_norm": 0.7792473241226281, "kl": 0.13555908203125, "learning_rate": 7.341753852808243e-07, "loss": -0.008246229030191898, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1768, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 71.57291793823242, "completions/min_length": 23.125, "epoch": 3.5162571357656986, "grad_norm": 0.006008880970299535, "kl": 0.1143798828125, "learning_rate": 7.338965948607405e-07, "loss": 0.00011432982137193903, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1769, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/mean_length": 73.43750286102295, "completions/min_length": 24.5, "epoch": 3.518242740134028, "grad_norm": 0.0045265401935526105, "kl": 0.14056396484375, "learning_rate": 7.336177113214264e-07, "loss": 0.00014058224041946232, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1770, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 72.92708587646484, "completions/min_length": 26.875, "epoch": 3.5202283445023577, "grad_norm": 0.008165285444302825, "kl": 0.12371826171875, "learning_rate": 7.333387347739116e-07, "loss": 0.00012376814265735447, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1771, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 69.07291793823242, "completions/min_length": 26.25, "epoch": 3.5222139488706876, "grad_norm": 0.004496528961753019, "kl": 0.1346435546875, "learning_rate": 7.330596653292624e-07, "loss": 0.00013465905794873834, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1772, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 70.33333587646484, "completions/min_length": 26.75, "epoch": 3.524199553239017, "grad_norm": 0.005157170136741095, "kl": 0.123809814453125, "learning_rate": 7.327805030985821e-07, "loss": 0.00012389587936922908, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1773, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.875, "completions/mean_length": 75.28125190734863, "completions/min_length": 33.25, "epoch": 3.5261851576073466, "grad_norm": 1.0780627597589751, "kl": 0.132080078125, "learning_rate": 7.325012481930119e-07, "loss": -0.006494271568953991, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1774, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 73.0729193687439, "completions/min_length": 31.5, "epoch": 3.5281707619756766, "grad_norm": 0.005637240376047731, "kl": 0.1092529296875, "learning_rate": 7.322219007237284e-07, "loss": 0.00010925737296929583, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1775, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 70.18750190734863, "completions/min_length": 32.625, "epoch": 3.530156366344006, "grad_norm": 0.0054442983551818164, "kl": 0.14739990234375, "learning_rate": 7.319424608019462e-07, "loss": 0.0001474183809477836, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1776, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.875, "completions/mean_length": 80.94791984558105, "completions/min_length": 32.625, "epoch": 3.5321419707123356, "grad_norm": 0.006323462722378896, "kl": 0.13037109375, "learning_rate": 7.31662928538916e-07, "loss": 0.0001303914177697152, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1777, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 73.26041841506958, "completions/min_length": 28.125, "epoch": 3.534127575080665, "grad_norm": 2.063083501267316, "kl": 0.14923095703125, "learning_rate": 7.31383304045926e-07, "loss": -0.005754535552114248, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.05974817834794521, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.2231760062277317, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1778, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 70.98958539962769, "completions/min_length": 30.75, "epoch": 3.5361131794489946, "grad_norm": 0.00569979668839109, "kl": 0.13128662109375, "learning_rate": 7.311035874343003e-07, "loss": 0.00013118793140165508, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1779, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 68.1354193687439, "completions/min_length": 18.5, "epoch": 3.5380987838173246, "grad_norm": 0.005362944183722852, "kl": 0.10382080078125, "learning_rate": 7.308237788154001e-07, "loss": 0.00010373725672252476, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1780, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 78.64583492279053, "completions/min_length": 37.75, "epoch": 3.540084388185654, "grad_norm": 0.005507703822651866, "kl": 0.13519287109375, "learning_rate": 7.305438783006235e-07, "loss": 0.00013502707588486373, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1781, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 69.90625238418579, "completions/min_length": 28.75, "epoch": 3.5420699925539836, "grad_norm": 0.005272995335092888, "kl": 0.12017822265625, "learning_rate": 7.302638860014045e-07, "loss": 0.00012019756104564294, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1782, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.125, "completions/mean_length": 67.47916889190674, "completions/min_length": 26.25, "epoch": 3.544055596922313, "grad_norm": 0.005618939014267104, "kl": 0.1375732421875, "learning_rate": 7.299838020292142e-07, "loss": 0.00013759899593424052, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1783, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 78.7916693687439, "completions/min_length": 30.75, "epoch": 3.5460412012906426, "grad_norm": 0.006450190090770658, "kl": 0.15087890625, "learning_rate": 7.297036264955598e-07, "loss": 0.00015095654816832393, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1784, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 79.89583683013916, "completions/min_length": 28.625, "epoch": 3.5480268056589725, "grad_norm": 0.006078465170606672, "kl": 0.12969970703125, "learning_rate": 7.294233595119856e-07, "loss": 0.00012971264368388802, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1785, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 68.27083444595337, "completions/min_length": 25.625, "epoch": 3.550012410027302, "grad_norm": 1.060742909163463, "kl": 0.103515625, "learning_rate": 7.291430011900714e-07, "loss": 0.002888138871639967, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1786, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 74.19791984558105, "completions/min_length": 30.5, "epoch": 3.5519980143956316, "grad_norm": 0.005159957355823121, "kl": 0.122314453125, "learning_rate": 7.288625516414341e-07, "loss": 0.00012228739797137678, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1787, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 67.57291841506958, "completions/min_length": 26.625, "epoch": 3.5539836187639615, "grad_norm": 0.005048837208848599, "kl": 0.09710693359375, "learning_rate": 7.285820109777267e-07, "loss": 9.700824739411473e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1788, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 81.55208492279053, "completions/min_length": 35.625, "epoch": 3.555969223132291, "grad_norm": 2.802655587411937, "kl": 0.13677978515625, "learning_rate": 7.283013793106383e-07, "loss": -0.0034123463556170464, "memory(GiB)": 94.21, "reward": 1.8437500149011612, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.8437500074505806, "rewards/CineAccuracyORM/std": 0.1783013790845871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1789, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 63.66666841506958, "completions/min_length": 22.0, "epoch": 3.5579548275006205, "grad_norm": 0.005373974337805658, "kl": 0.11224365234375, "learning_rate": 7.280206567518942e-07, "loss": 0.00011214089317945763, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1790, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 77.76041984558105, "completions/min_length": 35.0, "epoch": 3.55994043186895, "grad_norm": 0.7314390909669484, "kl": 0.1492919921875, "learning_rate": 7.277398434132564e-07, "loss": 0.010314702056348324, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166669771075, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1791, "train_speed(iter/s)": 0.022649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.875, "completions/mean_length": 76.07291889190674, "completions/min_length": 24.0, "epoch": 3.5619260362372795, "grad_norm": 0.005081806749917839, "kl": 0.1201171875, "learning_rate": 7.274589394065227e-07, "loss": 0.00012014494132017717, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1792, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.75, "completions/mean_length": 70.18750190734863, "completions/min_length": 32.375, "epoch": 3.5639116406056095, "grad_norm": 2.4177105628840043, "kl": 0.1680908203125, "learning_rate": 7.271779448435265e-07, "loss": 0.007997551001608372, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1793, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 73.53125286102295, "completions/min_length": 29.875, "epoch": 3.565897244973939, "grad_norm": 0.8596422892425496, "kl": 0.170166015625, "learning_rate": 7.268968598361381e-07, "loss": 0.0001700421271380037, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1794, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.125, "completions/mean_length": 67.48958492279053, "completions/min_length": 25.875, "epoch": 3.5678828493422685, "grad_norm": 0.004895542140499346, "kl": 0.09979248046875, "learning_rate": 7.266156844962637e-07, "loss": 9.970397513825446e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1795, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.375, "completions/mean_length": 75.10416984558105, "completions/min_length": 35.0, "epoch": 3.569868453710598, "grad_norm": 0.005409499908882387, "kl": 0.12469482421875, "learning_rate": 7.263344189358446e-07, "loss": 0.00012467967462725937, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1796, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 76.42708539962769, "completions/min_length": 30.5, "epoch": 3.5718540580789275, "grad_norm": 0.9104884220848838, "kl": 0.1405029296875, "learning_rate": 7.260530632668589e-07, "loss": 0.01337971817702055, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1797, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 72.15625190734863, "completions/min_length": 29.25, "epoch": 3.5738396624472575, "grad_norm": 0.004686041944902814, "kl": 0.112548828125, "learning_rate": 7.257716176013204e-07, "loss": 0.00011262830230407417, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1798, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 72.72916889190674, "completions/min_length": 27.5, "epoch": 3.575825266815587, "grad_norm": 0.005643537075254303, "kl": 0.14208984375, "learning_rate": 7.254900820512788e-07, "loss": 0.00014193866809364408, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1799, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 66.52083444595337, "completions/min_length": 28.0, "epoch": 3.5778108711839165, "grad_norm": 0.0048415769370699245, "kl": 0.09820556640625, "learning_rate": 7.252084567288188e-07, "loss": 9.824969311011955e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1800, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 63.302085876464844, "completions/min_length": 23.0, "epoch": 3.5797964755522464, "grad_norm": 1.7110392222200153, "kl": 0.89947509765625, "learning_rate": 7.24926741746062e-07, "loss": -0.0016936659812927246, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1801, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.125, "completions/mean_length": 61.156251430511475, "completions/min_length": 15.375, "epoch": 3.581782079920576, "grad_norm": 0.1543262279104331, "kl": 0.21392822265625, "learning_rate": 7.246449372151651e-07, "loss": 0.00021442461002152413, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1802, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 61.343750953674316, "completions/min_length": 25.75, "epoch": 3.5837676842889055, "grad_norm": 0.006875509190911133, "kl": 0.1259765625, "learning_rate": 7.243630432483203e-07, "loss": 0.0001259248674614355, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1803, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 72.00000286102295, "completions/min_length": 37.25, "epoch": 3.585753288657235, "grad_norm": 0.0072108871217124145, "kl": 0.1336669921875, "learning_rate": 7.240810599577557e-07, "loss": 0.00013358003343455493, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1804, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.25, "completions/mean_length": 69.25000286102295, "completions/min_length": 32.625, "epoch": 3.5877388930255645, "grad_norm": 0.007652425373720988, "kl": 0.123046875, "learning_rate": 7.237989874557348e-07, "loss": 0.00012306266580708325, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1805, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 68.67708444595337, "completions/min_length": 29.375, "epoch": 3.5897244973938944, "grad_norm": 0.005980489894497306, "kl": 0.1192626953125, "learning_rate": 7.235168258545569e-07, "loss": 0.00011916876246687025, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1806, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.125, "completions/mean_length": 68.59375333786011, "completions/min_length": 26.375, "epoch": 3.591710101762224, "grad_norm": 0.007014690623918719, "kl": 0.1312255859375, "learning_rate": 7.232345752665563e-07, "loss": 0.0001311766100116074, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1807, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 62.38541793823242, "completions/min_length": 25.625, "epoch": 3.5936957061305534, "grad_norm": 0.006197969225767213, "kl": 0.1141357421875, "learning_rate": 7.229522358041032e-07, "loss": 0.00011410910519771278, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1808, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.125, "completions/mean_length": 65.30208539962769, "completions/min_length": 29.75, "epoch": 3.595681310498883, "grad_norm": 0.00851083362483851, "kl": 0.13275146484375, "learning_rate": 7.226698075796028e-07, "loss": 0.00013260229025036097, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1809, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.375, "completions/mean_length": 69.68750190734863, "completions/min_length": 34.625, "epoch": 3.5976669148672125, "grad_norm": 0.005047180379247043, "kl": 0.12200927734375, "learning_rate": 7.223872907054959e-07, "loss": 0.00012217086623422801, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1810, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 73.14583539962769, "completions/min_length": 27.5, "epoch": 3.5996525192355424, "grad_norm": 0.006522959598551174, "kl": 0.13055419921875, "learning_rate": 7.221046852942585e-07, "loss": 0.00013057971955277026, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1811, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.875, "completions/mean_length": 67.66666841506958, "completions/min_length": 29.0, "epoch": 3.601638123603872, "grad_norm": 1.3865765564894799, "kl": 0.12506103515625, "learning_rate": 7.218219914584018e-07, "loss": -0.005225293338298798, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1812, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 65.77083587646484, "completions/min_length": 29.875, "epoch": 3.6036237279722014, "grad_norm": 0.006628537842286538, "kl": 0.12939453125, "learning_rate": 7.215392093104724e-07, "loss": 0.00012932605750393122, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1813, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.25, "completions/mean_length": 63.03125190734863, "completions/min_length": 27.875, "epoch": 3.6056093323405314, "grad_norm": 0.7043513720869784, "kl": 0.12054443359375, "learning_rate": 7.212563389630516e-07, "loss": 0.0026327527593821287, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1814, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.75, "completions/mean_length": 64.69791984558105, "completions/min_length": 30.375, "epoch": 3.607594936708861, "grad_norm": 1.879648578374954, "kl": 0.1417236328125, "learning_rate": 7.209733805287566e-07, "loss": 0.018731407821178436, "memory(GiB)": 94.21, "reward": 1.8854166865348816, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.12591182813048363, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1815, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 74.06250143051147, "completions/min_length": 32.375, "epoch": 3.6095805410771904, "grad_norm": 0.007056660868362295, "kl": 0.14337158203125, "learning_rate": 7.206903341202388e-07, "loss": 0.00014341103087645024, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1816, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 67.75000095367432, "completions/min_length": 32.625, "epoch": 3.61156614544552, "grad_norm": 0.006371641351019976, "kl": 0.13885498046875, "learning_rate": 7.204071998501851e-07, "loss": 0.00013888889225199819, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1817, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.875, "completions/mean_length": 68.11458492279053, "completions/min_length": 29.125, "epoch": 3.6135517498138494, "grad_norm": 0.008241844511920065, "kl": 0.12786865234375, "learning_rate": 7.201239778313172e-07, "loss": 0.00012802897254005075, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1818, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.625, "completions/mean_length": 64.80208492279053, "completions/min_length": 29.0, "epoch": 3.6155373541821794, "grad_norm": 0.006215019744812914, "kl": 0.13055419921875, "learning_rate": 7.198406681763924e-07, "loss": 0.0001305764017160982, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1819, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.75, "completions/mean_length": 55.61458492279053, "completions/min_length": 23.25, "epoch": 3.617522958550509, "grad_norm": 0.00827985827536905, "kl": 0.11395263671875, "learning_rate": 7.195572709982017e-07, "loss": 0.00011397053458495066, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1820, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.5, "completions/mean_length": 64.2604193687439, "completions/min_length": 24.75, "epoch": 3.6195085629188384, "grad_norm": 0.006110202754089011, "kl": 0.14373779296875, "learning_rate": 7.192737864095717e-07, "loss": 0.00014358545013237745, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1821, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.875, "completions/mean_length": 57.56250190734863, "completions/min_length": 24.125, "epoch": 3.621494167287168, "grad_norm": 0.009407831162796688, "kl": 0.11248779296875, "learning_rate": 7.189902145233639e-07, "loss": 0.0001125200287788175, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1822, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 71.43750190734863, "completions/min_length": 29.125, "epoch": 3.6234797716554974, "grad_norm": 0.0065155727812167, "kl": 0.14495849609375, "learning_rate": 7.18706555452474e-07, "loss": 0.00014499214012175798, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1823, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 60.83333492279053, "completions/min_length": 19.375, "epoch": 3.6254653760238273, "grad_norm": 2.731760216377746, "kl": 0.1307373046875, "learning_rate": 7.184228093098331e-07, "loss": 0.020896030589938164, "memory(GiB)": 94.21, "reward": 1.8437500149011612, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.8437500074505806, "rewards/CineAccuracyORM/std": 0.08474057167768478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1824, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.375, "completions/mean_length": 63.562500953674316, "completions/min_length": 27.75, "epoch": 3.627450980392157, "grad_norm": 0.007092418579656643, "kl": 0.1331787109375, "learning_rate": 7.181389762084062e-07, "loss": 0.00013313154340721667, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1825, "train_speed(iter/s)": 0.022665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.375, "completions/mean_length": 62.55208444595337, "completions/min_length": 27.5, "epoch": 3.6294365847604864, "grad_norm": 0.006724095345345822, "kl": 0.09979248046875, "learning_rate": 7.178550562611937e-07, "loss": 9.976244473364204e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1826, "train_speed(iter/s)": 0.022665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.625, "completions/mean_length": 56.54166841506958, "completions/min_length": 28.125, "epoch": 3.6314221891288163, "grad_norm": 0.008591325526163072, "kl": 0.13714599609375, "learning_rate": 7.175710495812299e-07, "loss": 0.00013697068789042532, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1827, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.875, "completions/mean_length": 58.02083396911621, "completions/min_length": 27.875, "epoch": 3.633407793497146, "grad_norm": 2.0772600373004755, "kl": 0.12646484375, "learning_rate": 7.172869562815841e-07, "loss": 0.013000253587961197, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1828, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.5, "completions/mean_length": 66.60416889190674, "completions/min_length": 23.5, "epoch": 3.6353933978654753, "grad_norm": 0.006807599656466557, "kl": 0.13201904296875, "learning_rate": 7.1700277647536e-07, "loss": 0.0001318681170232594, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1829, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.75, "completions/mean_length": 66.94791793823242, "completions/min_length": 28.125, "epoch": 3.637379002233805, "grad_norm": 0.006777744219711361, "kl": 0.10772705078125, "learning_rate": 7.167185102756954e-07, "loss": 0.00010777149873320013, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1830, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 55.35416841506958, "completions/min_length": 27.625, "epoch": 3.6393646066021343, "grad_norm": 0.006369809038275092, "kl": 0.1226806640625, "learning_rate": 7.164341577957628e-07, "loss": 0.0001226781605510041, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1831, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.125, "completions/mean_length": 57.145835399627686, "completions/min_length": 24.625, "epoch": 3.6413502109704643, "grad_norm": 0.004898125680672852, "kl": 0.09814453125, "learning_rate": 7.161497191487692e-07, "loss": 9.809241601033136e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1832, "train_speed(iter/s)": 0.022669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 62.14583492279053, "completions/min_length": 25.25, "epoch": 3.643335815338794, "grad_norm": 0.004841641715958726, "kl": 0.099365234375, "learning_rate": 7.158651944479554e-07, "loss": 9.944696648744866e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1833, "train_speed(iter/s)": 0.02267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 67.0104193687439, "completions/min_length": 29.25, "epoch": 3.6453214197071233, "grad_norm": 0.005631832695056003, "kl": 0.109375, "learning_rate": 7.155805838065971e-07, "loss": 0.00010937352635664865, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1834, "train_speed(iter/s)": 0.02267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 65.58333492279053, "completions/min_length": 27.25, "epoch": 3.647307024075453, "grad_norm": 2.0516988789726516, "kl": 0.121826171875, "learning_rate": 7.152958873380036e-07, "loss": 0.0021626483649015427, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1835, "train_speed(iter/s)": 0.02267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.875, "completions/mean_length": 59.156251430511475, "completions/min_length": 28.5, "epoch": 3.6492926284437823, "grad_norm": 0.00792809365393854, "kl": 0.104736328125, "learning_rate": 7.150111051555187e-07, "loss": 0.00010478984040673822, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1836, "train_speed(iter/s)": 0.022669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.0, "completions/mean_length": 65.06250143051147, "completions/min_length": 27.125, "epoch": 3.6512782328121123, "grad_norm": 0.006272151654520285, "kl": 0.12457275390625, "learning_rate": 7.147262373725203e-07, "loss": 0.00012455848627723753, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1837, "train_speed(iter/s)": 0.02267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.375, "completions/mean_length": 64.45833587646484, "completions/min_length": 29.25, "epoch": 3.653263837180442, "grad_norm": 0.010138752430041276, "kl": 0.11981201171875, "learning_rate": 7.144412841024203e-07, "loss": 0.0001199308899231255, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1838, "train_speed(iter/s)": 0.02267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.625, "completions/mean_length": 63.64583492279053, "completions/min_length": 30.0, "epoch": 3.6552494415487713, "grad_norm": 1.959911313887933, "kl": 0.13482666015625, "learning_rate": 7.141562454586649e-07, "loss": 0.0025736771058291197, "memory(GiB)": 94.21, "reward": 1.739583358168602, "reward_std": 0.0765465535223484, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3323722183704376, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1839, "train_speed(iter/s)": 0.022671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.875, "completions/mean_length": 61.312500953674316, "completions/min_length": 28.125, "epoch": 3.6572350459171012, "grad_norm": 0.005725444141515463, "kl": 0.11456298828125, "learning_rate": 7.13871121554734e-07, "loss": 0.0001146845388575457, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1840, "train_speed(iter/s)": 0.022672 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 69.6666693687439, "completions/min_length": 29.5, "epoch": 3.6592206502854308, "grad_norm": 1.1932159932838553, "kl": 0.15057373046875, "learning_rate": 7.135859125041413e-07, "loss": -0.008493129163980484, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1841, "train_speed(iter/s)": 0.022673 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.125, "completions/mean_length": 58.406251430511475, "completions/min_length": 22.125, "epoch": 3.6612062546537603, "grad_norm": 2.7533264143821095, "kl": 0.140380859375, "learning_rate": 7.133006184204346e-07, "loss": 0.009647785685956478, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1842, "train_speed(iter/s)": 0.022675 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 68.00000190734863, "completions/min_length": 26.875, "epoch": 3.6631918590220898, "grad_norm": 0.008914227966030367, "kl": 0.1353759765625, "learning_rate": 7.130152394171958e-07, "loss": 0.00013552504242397845, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1843, "train_speed(iter/s)": 0.022676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.125, "completions/mean_length": 70.57291984558105, "completions/min_length": 30.125, "epoch": 3.6651774633904193, "grad_norm": 0.004903080778798898, "kl": 0.116058349609375, "learning_rate": 7.127297756080404e-07, "loss": 0.0001160261090262793, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1844, "train_speed(iter/s)": 0.022676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.375, "completions/mean_length": 64.48958492279053, "completions/min_length": 24.75, "epoch": 3.6671630677587492, "grad_norm": 0.009379265161374049, "kl": 0.10693359375, "learning_rate": 7.124442271066174e-07, "loss": 0.00010711261711549014, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1845, "train_speed(iter/s)": 0.022675 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.625, "completions/mean_length": 65.09375190734863, "completions/min_length": 24.0, "epoch": 3.6691486721270787, "grad_norm": 0.013110222449817103, "kl": 0.11053466796875, "learning_rate": 7.121585940266098e-07, "loss": 0.00011059310054406524, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1846, "train_speed(iter/s)": 0.022676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 68.28125143051147, "completions/min_length": 29.25, "epoch": 3.6711342764954082, "grad_norm": 0.7796965033919522, "kl": 0.16357421875, "learning_rate": 7.118728764817344e-07, "loss": 0.0032601915299892426, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1847, "train_speed(iter/s)": 0.022676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.375, "completions/mean_length": 58.86458444595337, "completions/min_length": 24.875, "epoch": 3.6731198808637378, "grad_norm": 0.005150863181530681, "kl": 0.1390380859375, "learning_rate": 7.115870745857415e-07, "loss": 0.0001388014352414757, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1848, "train_speed(iter/s)": 0.022677 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.875, "completions/mean_length": 59.76041841506958, "completions/min_length": 24.375, "epoch": 3.6751054852320673, "grad_norm": 1.407358173851238, "kl": 0.12493896484375, "learning_rate": 7.113011884524147e-07, "loss": -0.01851654425263405, "memory(GiB)": 94.21, "reward": 1.8229166865348816, "reward_std": 0.05779037997126579, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.2281883768737316, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1849, "train_speed(iter/s)": 0.022675 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.375, "completions/mean_length": 55.69791793823242, "completions/min_length": 22.375, "epoch": 3.677091089600397, "grad_norm": 0.007230555246015449, "kl": 0.11529541015625, "learning_rate": 7.110152181955715e-07, "loss": 0.00011520447878865525, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1850, "train_speed(iter/s)": 0.022677 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 66.18750190734863, "completions/min_length": 26.5, "epoch": 3.6790766939687267, "grad_norm": 0.005970788714428602, "kl": 0.1524658203125, "learning_rate": 7.107291639290626e-07, "loss": 0.0001524357357993722, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1851, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 66.76041793823242, "completions/min_length": 21.875, "epoch": 3.6810622983370562, "grad_norm": 0.0058751415618637035, "kl": 0.092041015625, "learning_rate": 7.104430257667729e-07, "loss": 9.202898945659399e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1852, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 67.36458444595337, "completions/min_length": 28.25, "epoch": 3.683047902705386, "grad_norm": 0.005798619096735513, "kl": 0.12322998046875, "learning_rate": 7.101568038226193e-07, "loss": 0.00012326159048825502, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1853, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.375, "completions/mean_length": 66.61458539962769, "completions/min_length": 29.0, "epoch": 3.6850335070737157, "grad_norm": 0.005238016725894942, "kl": 0.10540771484375, "learning_rate": 7.098704982105533e-07, "loss": 0.00010534387547522783, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1854, "train_speed(iter/s)": 0.022677 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.75, "completions/mean_length": 66.72916841506958, "completions/min_length": 27.25, "epoch": 3.687019111442045, "grad_norm": 0.00717185441201321, "kl": 0.10296630859375, "learning_rate": 7.095841090445593e-07, "loss": 0.00010290753562003374, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1855, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.625, "completions/mean_length": 62.51041793823242, "completions/min_length": 25.75, "epoch": 3.6890047158103747, "grad_norm": 0.007378353955862464, "kl": 0.11468505859375, "learning_rate": 7.092976364386549e-07, "loss": 0.00011478550732135773, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1856, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.5, "completions/mean_length": 65.7604193687439, "completions/min_length": 27.625, "epoch": 3.690990320178704, "grad_norm": 1.588375474639574, "kl": 0.13885498046875, "learning_rate": 7.09011080506891e-07, "loss": -0.0022888171952217817, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1857, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.375, "completions/mean_length": 57.500000953674316, "completions/min_length": 23.125, "epoch": 3.692975924547034, "grad_norm": 0.004777811878242808, "kl": 0.0960693359375, "learning_rate": 7.087244413633515e-07, "loss": 9.60480174398981e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1858, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 69.85416889190674, "completions/min_length": 28.25, "epoch": 3.6949615289153637, "grad_norm": 1.323252843270289, "kl": 0.2032470703125, "learning_rate": 7.084377191221537e-07, "loss": 0.005240024998784065, "memory(GiB)": 94.21, "reward": 1.8125000149011612, "reward_std": 0.05103103816509247, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.23100870847702026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1859, "train_speed(iter/s)": 0.02268 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 68.81250238418579, "completions/min_length": 28.75, "epoch": 3.696947133283693, "grad_norm": 0.006342188120975697, "kl": 0.12506103515625, "learning_rate": 7.081509138974476e-07, "loss": 0.00012491666711866856, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1860, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.375, "completions/mean_length": 64.53125190734863, "completions/min_length": 20.625, "epoch": 3.6989327376520227, "grad_norm": 0.007387791806852369, "kl": 0.12518310546875, "learning_rate": 7.078640258034169e-07, "loss": 0.0001251039357157424, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1861, "train_speed(iter/s)": 0.022679 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 61.34375286102295, "completions/min_length": 28.875, "epoch": 3.700918342020352, "grad_norm": 0.009776880280304325, "kl": 0.10504150390625, "learning_rate": 7.075770549542776e-07, "loss": 0.00010499759810045362, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1862, "train_speed(iter/s)": 0.022679 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.875, "completions/mean_length": 73.62500190734863, "completions/min_length": 31.5, "epoch": 3.702903946388682, "grad_norm": 0.006774700881786028, "kl": 0.1256103515625, "learning_rate": 7.072900014642788e-07, "loss": 0.00012560535105876625, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1863, "train_speed(iter/s)": 0.022681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 77.3229193687439, "completions/min_length": 30.5, "epoch": 3.7048895507570117, "grad_norm": 0.6222512603968614, "kl": 0.13238525390625, "learning_rate": 7.070028654477031e-07, "loss": -0.00737034622579813, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1864, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 64.88541746139526, "completions/min_length": 21.875, "epoch": 3.706875155125341, "grad_norm": 0.006040225226120234, "kl": 0.09716796875, "learning_rate": 7.06715647018865e-07, "loss": 9.721890091896057e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1865, "train_speed(iter/s)": 0.022679 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.875, "completions/mean_length": 69.19791889190674, "completions/min_length": 25.625, "epoch": 3.708860759493671, "grad_norm": 0.005582538170644881, "kl": 0.105712890625, "learning_rate": 7.064283462921124e-07, "loss": 0.00010561906674411148, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1866, "train_speed(iter/s)": 0.022679 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.875, "completions/mean_length": 60.51041793823242, "completions/min_length": 24.375, "epoch": 3.7108463638620006, "grad_norm": 0.006752502014591171, "kl": 0.1104736328125, "learning_rate": 7.061409633818261e-07, "loss": 0.00011047690350096673, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1867, "train_speed(iter/s)": 0.022682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.875, "completions/mean_length": 68.12500333786011, "completions/min_length": 36.375, "epoch": 3.71283196823033, "grad_norm": 0.007721242730341458, "kl": 0.1253662109375, "learning_rate": 7.058534984024192e-07, "loss": 0.00012530006642919034, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1868, "train_speed(iter/s)": 0.022682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 60.875000953674316, "completions/min_length": 21.5, "epoch": 3.7148175725986596, "grad_norm": 0.006035502647825041, "kl": 0.0963134765625, "learning_rate": 7.055659514683376e-07, "loss": 9.623346704756841e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1869, "train_speed(iter/s)": 0.022682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.875, "completions/mean_length": 59.989585876464844, "completions/min_length": 23.625, "epoch": 3.716803176966989, "grad_norm": 0.011465434708352068, "kl": 0.142578125, "learning_rate": 7.052783226940598e-07, "loss": 0.00014271473628468812, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1870, "train_speed(iter/s)": 0.022684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.875, "completions/mean_length": 68.02083539962769, "completions/min_length": 28.375, "epoch": 3.718788781335319, "grad_norm": 0.006816685458152101, "kl": 0.129638671875, "learning_rate": 7.049906121940972e-07, "loss": 0.00012967245129402727, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1871, "train_speed(iter/s)": 0.022682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 65.37500095367432, "completions/min_length": 25.625, "epoch": 3.7207743857036486, "grad_norm": 1.99842936831698, "kl": 0.1126708984375, "learning_rate": 7.047028200829936e-07, "loss": 0.0154643002897501, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1872, "train_speed(iter/s)": 0.022682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.75, "completions/mean_length": 67.80208492279053, "completions/min_length": 30.25, "epoch": 3.722759990071978, "grad_norm": 0.00850229765740659, "kl": 0.14208984375, "learning_rate": 7.044149464753247e-07, "loss": 0.00014212813402991742, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1873, "train_speed(iter/s)": 0.022685 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 71.73958539962769, "completions/min_length": 31.5, "epoch": 3.7247455944403076, "grad_norm": 1.4640433437717335, "kl": 0.16326904296875, "learning_rate": 7.041269914856995e-07, "loss": 0.0028760037384927273, "memory(GiB)": 94.21, "reward": 1.7604166865348816, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.16290925815701485, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1874, "train_speed(iter/s)": 0.022684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 71.2916669845581, "completions/min_length": 32.5, "epoch": 3.726731198808637, "grad_norm": 0.006477994837750161, "kl": 0.159027099609375, "learning_rate": 7.038389552287589e-07, "loss": 0.0001590569590916857, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1875, "train_speed(iter/s)": 0.022685 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 66.87500190734863, "completions/min_length": 21.875, "epoch": 3.728716803176967, "grad_norm": 0.008271216754882607, "kl": 0.1041259765625, "learning_rate": 7.035508378191765e-07, "loss": 0.00010413752170279622, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1876, "train_speed(iter/s)": 0.022686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 66.78125095367432, "completions/min_length": 23.375, "epoch": 3.7307024075452966, "grad_norm": 0.00664322864823127, "kl": 0.116455078125, "learning_rate": 7.032626393716576e-07, "loss": 0.0001165727007901296, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1877, "train_speed(iter/s)": 0.022686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 56.052085876464844, "completions/min_length": 28.25, "epoch": 3.732688011913626, "grad_norm": 0.007829866383206988, "kl": 0.1219482421875, "learning_rate": 7.029743600009405e-07, "loss": 0.00012191152200102806, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1878, "train_speed(iter/s)": 0.022688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 71.43750095367432, "completions/min_length": 32.875, "epoch": 3.734673616281956, "grad_norm": 0.006424328093703674, "kl": 0.107666015625, "learning_rate": 7.026859998217952e-07, "loss": 0.00010763494356069714, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1879, "train_speed(iter/s)": 0.02269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.375, "completions/mean_length": 77.89583587646484, "completions/min_length": 33.75, "epoch": 3.7366592206502856, "grad_norm": 0.006521821053455773, "kl": 0.13043212890625, "learning_rate": 7.02397558949024e-07, "loss": 0.00013040761405136436, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1880, "train_speed(iter/s)": 0.022689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.125, "completions/mean_length": 74.87500286102295, "completions/min_length": 23.25, "epoch": 3.738644825018615, "grad_norm": 0.006884919195593551, "kl": 0.13214111328125, "learning_rate": 7.021090374974617e-07, "loss": 0.0001320820301771164, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1881, "train_speed(iter/s)": 0.022689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 71.82291841506958, "completions/min_length": 26.375, "epoch": 3.7406304293869446, "grad_norm": 0.18718118936398945, "kl": 0.20416259765625, "learning_rate": 7.018204355819745e-07, "loss": 0.00020377016335260123, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1882, "train_speed(iter/s)": 0.022689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 68.23958539962769, "completions/min_length": 24.625, "epoch": 3.742616033755274, "grad_norm": 1.173162333055251, "kl": 0.1240234375, "learning_rate": 7.015317533174611e-07, "loss": 0.00340383592993021, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.30977265536785126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1883, "train_speed(iter/s)": 0.022689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.625, "completions/mean_length": 71.33333539962769, "completions/min_length": 26.25, "epoch": 3.744601638123604, "grad_norm": 0.9205295205375594, "kl": 0.12945556640625, "learning_rate": 7.012429908188522e-07, "loss": -0.02063288539648056, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1884, "train_speed(iter/s)": 0.022689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 66.30208539962769, "completions/min_length": 26.25, "epoch": 3.7465872424919335, "grad_norm": 0.005034871181005451, "kl": 0.12896728515625, "learning_rate": 7.009541482011101e-07, "loss": 0.00012909471115563065, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1885, "train_speed(iter/s)": 0.02269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 63.09375190734863, "completions/min_length": 26.0, "epoch": 3.748572846860263, "grad_norm": 0.011845975495309176, "kl": 0.13043212890625, "learning_rate": 7.006652255792293e-07, "loss": 0.00013028444664087147, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1886, "train_speed(iter/s)": 0.022691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.875, "completions/mean_length": 65.88541841506958, "completions/min_length": 29.125, "epoch": 3.7505584512285925, "grad_norm": 0.007137531001676096, "kl": 0.11895751953125, "learning_rate": 7.00376223068236e-07, "loss": 0.00011895185161847621, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1887, "train_speed(iter/s)": 0.022692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.125, "completions/mean_length": 62.72916889190674, "completions/min_length": 26.625, "epoch": 3.752544055596922, "grad_norm": 0.0067629483915526405, "kl": 0.11480712890625, "learning_rate": 7.000871407831885e-07, "loss": 0.00011476640065666288, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1888, "train_speed(iter/s)": 0.022693 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 61.59375190734863, "completions/min_length": 26.5, "epoch": 3.754529659965252, "grad_norm": 0.007762670190126869, "kl": 0.131103515625, "learning_rate": 6.997979788391765e-07, "loss": 0.00013126680278219283, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1889, "train_speed(iter/s)": 0.022691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 68.61458539962769, "completions/min_length": 28.375, "epoch": 3.7565152643335815, "grad_norm": 0.005160033397521778, "kl": 0.10675048828125, "learning_rate": 6.995087373513214e-07, "loss": 0.00010677635145839304, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1890, "train_speed(iter/s)": 0.022691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 74.87500190734863, "completions/min_length": 26.75, "epoch": 3.758500868701911, "grad_norm": 0.005537344400095234, "kl": 0.11224365234375, "learning_rate": 6.992194164347766e-07, "loss": 0.00011223134060855955, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1891, "train_speed(iter/s)": 0.022691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.5, "completions/mean_length": 61.28125238418579, "completions/min_length": 28.0, "epoch": 3.760486473070241, "grad_norm": 0.005390244078857738, "kl": 0.1016845703125, "learning_rate": 6.989300162047272e-07, "loss": 0.0001016532041830942, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1892, "train_speed(iter/s)": 0.022694 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 66.68750143051147, "completions/min_length": 27.625, "epoch": 3.7624720774385705, "grad_norm": 0.005636414187853037, "kl": 0.107177734375, "learning_rate": 6.98640536776389e-07, "loss": 0.00010705619206419215, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1893, "train_speed(iter/s)": 0.022694 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.5, "completions/mean_length": 76.05208587646484, "completions/min_length": 35.375, "epoch": 3.7644576818069, "grad_norm": 0.005081606145340272, "kl": 0.12762451171875, "learning_rate": 6.983509782650102e-07, "loss": 0.00012753944611176848, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1894, "train_speed(iter/s)": 0.022695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 71.42708587646484, "completions/min_length": 27.75, "epoch": 3.7664432861752295, "grad_norm": 0.005999624547732231, "kl": 0.12530517578125, "learning_rate": 6.980613407858703e-07, "loss": 0.00012525395140983164, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1895, "train_speed(iter/s)": 0.022695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.375, "completions/mean_length": 78.71875286102295, "completions/min_length": 33.0, "epoch": 3.768428890543559, "grad_norm": 0.005217533850369434, "kl": 0.11328125, "learning_rate": 6.977716244542804e-07, "loss": 0.00011322525097057223, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1896, "train_speed(iter/s)": 0.022696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.875, "completions/mean_length": 62.750001430511475, "completions/min_length": 26.5, "epoch": 3.770414494911889, "grad_norm": 0.005345821615258043, "kl": 0.10003662109375, "learning_rate": 6.974818293855822e-07, "loss": 9.994323772843927e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1897, "train_speed(iter/s)": 0.022694 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 67.48958539962769, "completions/min_length": 27.625, "epoch": 3.7724000992802185, "grad_norm": 0.004984296940265329, "kl": 0.12548828125, "learning_rate": 6.971919556951497e-07, "loss": 0.00012557375885080546, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1898, "train_speed(iter/s)": 0.022695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 69.72916889190674, "completions/min_length": 28.625, "epoch": 3.774385703648548, "grad_norm": 0.007322977817807963, "kl": 0.1207275390625, "learning_rate": 6.969020034983876e-07, "loss": 0.00012055723345838487, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1899, "train_speed(iter/s)": 0.022695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 69.31250047683716, "completions/min_length": 27.0, "epoch": 3.7763713080168775, "grad_norm": 0.004901615243586582, "kl": 0.13311767578125, "learning_rate": 6.966119729107325e-07, "loss": 0.00013311261136550456, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1900, "train_speed(iter/s)": 0.022695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 66.39583539962769, "completions/min_length": 22.875, "epoch": 3.778356912385207, "grad_norm": 0.004334965617467214, "kl": 0.11871337890625, "learning_rate": 6.963218640476511e-07, "loss": 0.00011863360123243183, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1901, "train_speed(iter/s)": 0.022696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 70.208336353302, "completions/min_length": 29.0, "epoch": 3.780342516753537, "grad_norm": 0.004725200400548933, "kl": 0.10064697265625, "learning_rate": 6.960316770246426e-07, "loss": 0.00010059717897092924, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1902, "train_speed(iter/s)": 0.022697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 71.52083587646484, "completions/min_length": 30.125, "epoch": 3.7823281211218664, "grad_norm": 0.0041194173903015534, "kl": 0.1268310546875, "learning_rate": 6.957414119572361e-07, "loss": 0.00012679636711254716, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1903, "train_speed(iter/s)": 0.022697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 69.06250190734863, "completions/min_length": 27.625, "epoch": 3.784313725490196, "grad_norm": 3.176268279264717, "kl": 0.1370849609375, "learning_rate": 6.954510689609927e-07, "loss": -0.009499862790107727, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1904, "train_speed(iter/s)": 0.022696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.875, "completions/mean_length": 76.770836353302, "completions/min_length": 33.75, "epoch": 3.786299329858526, "grad_norm": 0.007278422250295857, "kl": 0.1461181640625, "learning_rate": 6.95160648151504e-07, "loss": 0.00014601660950575024, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1905, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.375, "completions/mean_length": 60.2291693687439, "completions/min_length": 26.25, "epoch": 3.7882849342268554, "grad_norm": 0.014600334124028485, "kl": 0.1004638671875, "learning_rate": 6.948701496443926e-07, "loss": 0.000100525445304811, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1906, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.625, "completions/mean_length": 64.84375190734863, "completions/min_length": 25.875, "epoch": 3.790270538595185, "grad_norm": 2.228812484942028, "kl": 0.16302490234375, "learning_rate": 6.945795735553123e-07, "loss": -0.002496750559657812, "memory(GiB)": 94.21, "reward": 1.8750000149011612, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.8750000037252903, "rewards/CineAccuracyORM/std": 0.12309149652719498, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1907, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 65.84375095367432, "completions/min_length": 28.25, "epoch": 3.7922561429635144, "grad_norm": 0.024051866287186398, "kl": 0.1282958984375, "learning_rate": 6.942889199999479e-07, "loss": 0.00012829070328734815, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1908, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 70.72916841506958, "completions/min_length": 29.625, "epoch": 3.794241747331844, "grad_norm": 0.10977170695701262, "kl": 0.20635986328125, "learning_rate": 6.939981890940143e-07, "loss": 0.00020652561215683818, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1909, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 63.84375238418579, "completions/min_length": 31.0, "epoch": 3.796227351700174, "grad_norm": 1.544374184989343, "kl": 0.11077880859375, "learning_rate": 6.93707380953258e-07, "loss": -0.00868302769958973, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1910, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.625, "completions/mean_length": 66.50000095367432, "completions/min_length": 30.625, "epoch": 3.7982129560685034, "grad_norm": 0.014868270103500735, "kl": 0.1168212890625, "learning_rate": 6.934164956934557e-07, "loss": 0.0001170191535493359, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1911, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 68.61458492279053, "completions/min_length": 30.75, "epoch": 3.800198560436833, "grad_norm": 0.004727159406425762, "kl": 0.09637451171875, "learning_rate": 6.931255334304154e-07, "loss": 9.636702452553436e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1912, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 63.46875333786011, "completions/min_length": 28.375, "epoch": 3.8021841648051624, "grad_norm": 0.00885914029598628, "kl": 0.10003662109375, "learning_rate": 6.928344942799751e-07, "loss": 0.00010010460391640663, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1913, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 71.36458539962769, "completions/min_length": 29.0, "epoch": 3.804169769173492, "grad_norm": 0.006270580432592501, "kl": 0.14453125, "learning_rate": 6.925433783580037e-07, "loss": 0.0001445884263375774, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1914, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/mean_length": 60.48958492279053, "completions/min_length": 21.375, "epoch": 3.806155373541822, "grad_norm": 0.007046441434647712, "kl": 0.105224609375, "learning_rate": 6.922521857804008e-07, "loss": 0.00010524334356887266, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1915, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.875, "completions/mean_length": 78.45833683013916, "completions/min_length": 28.625, "epoch": 3.8081409779101514, "grad_norm": 0.0052942223035268915, "kl": 0.1007080078125, "learning_rate": 6.919609166630965e-07, "loss": 0.00010063032095786184, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1916, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 74.47916793823242, "completions/min_length": 25.25, "epoch": 3.810126582278481, "grad_norm": 0.854848926341564, "kl": 0.111572265625, "learning_rate": 6.916695711220508e-07, "loss": -0.0038129142485558987, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1917, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 76.61458683013916, "completions/min_length": 29.75, "epoch": 3.812112186646811, "grad_norm": 0.005031210589315135, "kl": 0.10882568359375, "learning_rate": 6.913781492732548e-07, "loss": 0.00010881990601774305, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1918, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.375, "completions/mean_length": 74.54166889190674, "completions/min_length": 29.125, "epoch": 3.8140977910151403, "grad_norm": 0.006844058088828535, "kl": 0.13311767578125, "learning_rate": 6.910866512327301e-07, "loss": 0.00013300779392011464, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1919, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.875, "completions/mean_length": 69.78125238418579, "completions/min_length": 24.5, "epoch": 3.81608339538347, "grad_norm": 0.005166042245422494, "kl": 0.11859130859375, "learning_rate": 6.907950771165281e-07, "loss": 0.00011861752136610448, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1920, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.625, "completions/mean_length": 61.15625190734863, "completions/min_length": 21.5, "epoch": 3.8180689997517994, "grad_norm": 0.00485129154851239, "kl": 0.11279296875, "learning_rate": 6.905034270407304e-07, "loss": 0.00011265276407357305, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1921, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 66.14583539962769, "completions/min_length": 26.125, "epoch": 3.820054604120129, "grad_norm": 0.004765973189027413, "kl": 0.12005615234375, "learning_rate": 6.902117011214495e-07, "loss": 0.00011994189117103815, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1922, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 75.35416746139526, "completions/min_length": 30.0, "epoch": 3.822040208488459, "grad_norm": 0.004603408952741002, "kl": 0.1141357421875, "learning_rate": 6.899198994748273e-07, "loss": 0.00011391112639103085, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1923, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 65.43750143051147, "completions/min_length": 28.625, "epoch": 3.8240258128567883, "grad_norm": 0.006335350848244709, "kl": 0.12322998046875, "learning_rate": 6.896280222170368e-07, "loss": 0.00012319302186369896, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1924, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 73.75000190734863, "completions/min_length": 30.375, "epoch": 3.826011417225118, "grad_norm": 0.004939332662941249, "kl": 0.10321044921875, "learning_rate": 6.8933606946428e-07, "loss": 0.00010315005056327209, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1925, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 79.44792079925537, "completions/min_length": 33.625, "epoch": 3.8279970215934473, "grad_norm": 0.004856906748949679, "kl": 0.11199951171875, "learning_rate": 6.8904404133279e-07, "loss": 0.00011200741573702544, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1926, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 68.70833539962769, "completions/min_length": 24.875, "epoch": 3.829982625961777, "grad_norm": 0.006834150376968235, "kl": 0.13092041015625, "learning_rate": 6.887519379388293e-07, "loss": 0.00013078290794510394, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1927, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.375, "completions/mean_length": 72.75000095367432, "completions/min_length": 27.0, "epoch": 3.831968230330107, "grad_norm": 1.1439780393790333, "kl": 0.1322021484375, "learning_rate": 6.884597593986905e-07, "loss": -0.00607278710231185, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1928, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/mean_length": 67.8541693687439, "completions/min_length": 25.0, "epoch": 3.8339538346984363, "grad_norm": 0.0055525868749895695, "kl": 0.1134033203125, "learning_rate": 6.88167505828696e-07, "loss": 0.00011340210767230019, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1929, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 80.38541984558105, "completions/min_length": 33.375, "epoch": 3.835939439066766, "grad_norm": 0.004803210972103647, "kl": 0.1268310546875, "learning_rate": 6.878751773451982e-07, "loss": 0.00012681195221375674, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1930, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.25, "completions/mean_length": 67.31250143051147, "completions/min_length": 24.625, "epoch": 3.8379250434350958, "grad_norm": 0.004959017452902894, "kl": 0.13775634765625, "learning_rate": 6.875827740645795e-07, "loss": 0.00013765256153419614, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1931, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 74.75000190734863, "completions/min_length": 32.5, "epoch": 3.8399106478034253, "grad_norm": 0.004585640330844273, "kl": 0.11090087890625, "learning_rate": 6.872902961032516e-07, "loss": 0.00011086107406299561, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1932, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.125, "completions/mean_length": 68.94791841506958, "completions/min_length": 29.25, "epoch": 3.841896252171755, "grad_norm": 1.4734246620443077, "kl": 0.114990234375, "learning_rate": 6.869977435776565e-07, "loss": -0.016336556524038315, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1933, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 67.35416889190674, "completions/min_length": 29.0, "epoch": 3.8438818565400843, "grad_norm": 0.00621383867890989, "kl": 0.10723876953125, "learning_rate": 6.867051166042655e-07, "loss": 0.00010706111061153933, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1934, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 64.63541889190674, "completions/min_length": 25.75, "epoch": 3.845867460908414, "grad_norm": 0.12179852725245788, "kl": 0.19085693359375, "learning_rate": 6.864124152995796e-07, "loss": 0.00019111763685941696, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1935, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.75, "completions/mean_length": 72.552086353302, "completions/min_length": 30.25, "epoch": 3.8478530652767438, "grad_norm": 0.0059436806618933845, "kl": 0.11016845703125, "learning_rate": 6.861196397801297e-07, "loss": 0.0001100352019420825, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1936, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 63.31250190734863, "completions/min_length": 26.375, "epoch": 3.8498386696450733, "grad_norm": 0.8467452472158886, "kl": 0.10107421875, "learning_rate": 6.858267901624756e-07, "loss": -0.01629771664738655, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 1937, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.875, "completions/mean_length": 73.34375238418579, "completions/min_length": 28.0, "epoch": 3.8518242740134028, "grad_norm": 0.004944954355679515, "kl": 0.117767333984375, "learning_rate": 6.85533866563207e-07, "loss": 0.00011779210035456344, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1938, "train_speed(iter/s)": 0.022705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 63.67708492279053, "completions/min_length": 32.5, "epoch": 3.8538098783817323, "grad_norm": 0.3669326533371451, "kl": 0.4107666015625, "learning_rate": 6.852408690989434e-07, "loss": 0.00040961726335808635, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1939, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 68.73958587646484, "completions/min_length": 26.0, "epoch": 3.855795482750062, "grad_norm": 1.5836457630688137, "kl": 0.10760498046875, "learning_rate": 6.849477978863333e-07, "loss": -0.010588235221803188, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1940, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 63.82291889190674, "completions/min_length": 28.625, "epoch": 3.8577810871183917, "grad_norm": 0.025299780110206237, "kl": 0.1402587890625, "learning_rate": 6.846546530420543e-07, "loss": 0.00014026850112713873, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1941, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.75, "completions/mean_length": 61.562501430511475, "completions/min_length": 21.25, "epoch": 3.8597666914867212, "grad_norm": 1.4719966550579282, "kl": 0.12982177734375, "learning_rate": 6.843614346828137e-07, "loss": -0.00681114848703146, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.0765465535223484, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3615669459104538, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 1942, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.375, "completions/mean_length": 79.36458587646484, "completions/min_length": 29.125, "epoch": 3.8617522958550508, "grad_norm": 0.014381200129125837, "kl": 0.1251220703125, "learning_rate": 6.840681429253482e-07, "loss": 0.00012504885671660304, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1943, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.875, "completions/mean_length": 68.02083539962769, "completions/min_length": 29.0, "epoch": 3.8637379002233807, "grad_norm": 0.9467279928595111, "kl": 0.14398193359375, "learning_rate": 6.837747778864235e-07, "loss": 0.003352126805111766, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.05653337761759758, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1944, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 64.18750143051147, "completions/min_length": 26.625, "epoch": 3.86572350459171, "grad_norm": 0.028181675532151108, "kl": 0.134033203125, "learning_rate": 6.834813396828343e-07, "loss": 0.00013379205483943224, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1945, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.375, "completions/mean_length": 66.19791889190674, "completions/min_length": 30.375, "epoch": 3.8677091089600397, "grad_norm": 0.86803973594992, "kl": 0.15765380859375, "learning_rate": 6.831878284314045e-07, "loss": 0.005411760415881872, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1946, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.25, "completions/mean_length": 66.53125143051147, "completions/min_length": 30.5, "epoch": 3.8696947133283692, "grad_norm": 0.031029352449010936, "kl": 0.13739013671875, "learning_rate": 6.828942442489877e-07, "loss": 0.00013740621216129512, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1947, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.125, "completions/mean_length": 60.8229193687439, "completions/min_length": 30.75, "epoch": 3.8716803176966987, "grad_norm": 0.029363139256240227, "kl": 0.1807861328125, "learning_rate": 6.826005872524656e-07, "loss": 0.00018048289348371327, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1948, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.625, "completions/mean_length": 69.70833492279053, "completions/min_length": 30.75, "epoch": 3.8736659220650287, "grad_norm": 1.494218997773182, "kl": 0.1422119140625, "learning_rate": 6.823068575587495e-07, "loss": 0.0057297926396131516, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1949, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.375, "completions/mean_length": 60.08333492279053, "completions/min_length": 23.875, "epoch": 3.875651526433358, "grad_norm": 1.247487800196632, "kl": 0.19287109375, "learning_rate": 6.820130552847794e-07, "loss": -0.012802074663341045, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1950, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.125, "completions/mean_length": 61.083335399627686, "completions/min_length": 30.875, "epoch": 3.8776371308016877, "grad_norm": 1.229905762425562, "kl": 0.1690673828125, "learning_rate": 6.817191805475243e-07, "loss": 0.007171073462814093, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1951, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 69.7291693687439, "completions/min_length": 30.375, "epoch": 3.879622735170017, "grad_norm": 0.019876752549019244, "kl": 0.13299560546875, "learning_rate": 6.81425233463982e-07, "loss": 0.00013292356743477285, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1952, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.5, "completions/mean_length": 73.61458539962769, "completions/min_length": 31.875, "epoch": 3.8816083395383467, "grad_norm": 2.073612769823052, "kl": 0.14691162109375, "learning_rate": 6.81131214151179e-07, "loss": 0.00530334934592247, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1953, "train_speed(iter/s)": 0.022705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 67.71875095367432, "completions/min_length": 35.75, "epoch": 3.8835939439066767, "grad_norm": 0.02050286212793071, "kl": 0.1431884765625, "learning_rate": 6.808371227261709e-07, "loss": 0.00014325630036182702, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1954, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.875, "completions/mean_length": 71.34375238418579, "completions/min_length": 29.75, "epoch": 3.885579548275006, "grad_norm": 0.7280848519261982, "kl": 0.13128662109375, "learning_rate": 6.805429593060415e-07, "loss": 0.00013115754700265825, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666679084301, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1955, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.375, "completions/mean_length": 69.89583492279053, "completions/min_length": 33.875, "epoch": 3.8875651526433357, "grad_norm": 0.9744350584480629, "kl": 0.13262939453125, "learning_rate": 6.802487240079039e-07, "loss": 0.006188714876770973, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 1956, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.125, "completions/mean_length": 68.43750143051147, "completions/min_length": 33.25, "epoch": 3.8895507570116656, "grad_norm": 0.03647582824718364, "kl": 0.181396484375, "learning_rate": 6.799544169488991e-07, "loss": 0.00018141789769288152, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1957, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 65.38541889190674, "completions/min_length": 31.5, "epoch": 3.891536361379995, "grad_norm": 0.02551893699699209, "kl": 0.14971923828125, "learning_rate": 6.796600382461972e-07, "loss": 0.0001495483156759292, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1958, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.875, "completions/mean_length": 74.12500238418579, "completions/min_length": 33.375, "epoch": 3.8935219657483247, "grad_norm": 1.568937123012861, "kl": 0.16259765625, "learning_rate": 6.793655880169966e-07, "loss": -0.008005126379430294, "memory(GiB)": 94.21, "reward": 1.5833333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.583333333954215, "rewards/CineAccuracyORM/std": 0.375051774084568, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1959, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 72.50000333786011, "completions/min_length": 34.0, "epoch": 3.895507570116654, "grad_norm": 0.012139293898265117, "kl": 0.13787841796875, "learning_rate": 6.790710663785244e-07, "loss": 0.0001378914894303307, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1960, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.75, "completions/mean_length": 61.55208492279053, "completions/min_length": 30.5, "epoch": 3.8974931744849837, "grad_norm": 0.022091588572302666, "kl": 0.13909912109375, "learning_rate": 6.787764734480357e-07, "loss": 0.00013918429613113403, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1961, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.0, "completions/mean_length": 66.02083539962769, "completions/min_length": 31.875, "epoch": 3.8994787788533136, "grad_norm": 1.2804247007575393, "kl": 0.13671875, "learning_rate": 6.784818093428143e-07, "loss": 0.015222916379570961, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1962, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.625, "completions/mean_length": 68.39583587646484, "completions/min_length": 32.875, "epoch": 3.901464383221643, "grad_norm": 0.006602535393418251, "kl": 0.134033203125, "learning_rate": 6.781870741801723e-07, "loss": 0.00013385264901444316, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1963, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/mean_length": 66.87500238418579, "completions/min_length": 32.625, "epoch": 3.9034499875899726, "grad_norm": 0.006767917896116481, "kl": 0.12548828125, "learning_rate": 6.778922680774502e-07, "loss": 0.00012554106069728732, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1964, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.25, "completions/mean_length": 59.96875286102295, "completions/min_length": 28.5, "epoch": 3.905435591958302, "grad_norm": 0.006813314583956773, "kl": 0.11474609375, "learning_rate": 6.775973911520164e-07, "loss": 0.00011462499242043123, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1965, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.375, "completions/mean_length": 60.250001430511475, "completions/min_length": 30.0, "epoch": 3.9074211963266317, "grad_norm": 0.0718876500951966, "kl": 0.2137451171875, "learning_rate": 6.773024435212677e-07, "loss": 0.00021401792764663696, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1966, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.875, "completions/mean_length": 60.73958492279053, "completions/min_length": 30.75, "epoch": 3.9094068006949616, "grad_norm": 0.007183778965311937, "kl": 0.1380615234375, "learning_rate": 6.770074253026293e-07, "loss": 0.0001381241308990866, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1967, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 75.19791793823242, "completions/min_length": 33.125, "epoch": 3.911392405063291, "grad_norm": 0.006426563821911658, "kl": 0.11773681640625, "learning_rate": 6.767123366135541e-07, "loss": 0.00011759914923459291, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1968, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.625, "completions/mean_length": 61.7291693687439, "completions/min_length": 27.875, "epoch": 3.9133780094316206, "grad_norm": 0.005667167382687648, "kl": 0.1005859375, "learning_rate": 6.764171775715232e-07, "loss": 0.00010057847248390317, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1969, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 67.01041889190674, "completions/min_length": 31.625, "epoch": 3.9153636137999506, "grad_norm": 0.0066141996552372675, "kl": 0.13897705078125, "learning_rate": 6.761219482940457e-07, "loss": 0.00013895424490328878, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1970, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 67.52083587646484, "completions/min_length": 31.75, "epoch": 3.91734921816828, "grad_norm": 1.6495596582335152, "kl": 0.11236572265625, "learning_rate": 6.758266488986586e-07, "loss": -0.006600073538720608, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1971, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.375, "completions/mean_length": 70.41666984558105, "completions/min_length": 32.5, "epoch": 3.9193348225366096, "grad_norm": 0.007284866109490994, "kl": 0.10821533203125, "learning_rate": 6.755312795029271e-07, "loss": 0.00010831169493030757, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1972, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.125, "completions/mean_length": 62.54166889190674, "completions/min_length": 33.625, "epoch": 3.921320426904939, "grad_norm": 0.032575234447123964, "kl": 0.14178466796875, "learning_rate": 6.75235840224444e-07, "loss": 0.00014187510532792658, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1973, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.0, "completions/mean_length": 66.32291984558105, "completions/min_length": 37.0, "epoch": 3.9233060312732686, "grad_norm": 0.008293509430724343, "kl": 0.14508056640625, "learning_rate": 6.7494033118083e-07, "loss": 0.0001450619602110237, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1974, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.875, "completions/mean_length": 62.156250953674316, "completions/min_length": 30.625, "epoch": 3.9252916356415986, "grad_norm": 0.006937031637228478, "kl": 0.1177978515625, "learning_rate": 6.746447524897334e-07, "loss": 0.00011770258424803615, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1975, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.125, "completions/mean_length": 63.34375333786011, "completions/min_length": 30.875, "epoch": 3.927277240009928, "grad_norm": 0.008577611593958562, "kl": 0.1484375, "learning_rate": 6.743491042688306e-07, "loss": 0.00014840041694696993, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1976, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 72.98958492279053, "completions/min_length": 28.875, "epoch": 3.9292628443782576, "grad_norm": 0.0063579125709845065, "kl": 0.11407470703125, "learning_rate": 6.740533866358252e-07, "loss": 0.00011416269990149885, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1977, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 71.30208492279053, "completions/min_length": 30.75, "epoch": 3.931248448746587, "grad_norm": 1.4761917726996892, "kl": 0.15081787109375, "learning_rate": 6.737575997084491e-07, "loss": -0.013859925791621208, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.11807912588119507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1978, "train_speed(iter/s)": 0.022717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/mean_length": 63.60416841506958, "completions/min_length": 31.5, "epoch": 3.9332340531149166, "grad_norm": 0.007616244468245575, "kl": 0.1258544921875, "learning_rate": 6.73461743604461e-07, "loss": 0.00012578511086758226, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1979, "train_speed(iter/s)": 0.022717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 70.8854193687439, "completions/min_length": 35.375, "epoch": 3.9352196574832465, "grad_norm": 0.006625010156448254, "kl": 0.113037109375, "learning_rate": 6.731658184416479e-07, "loss": 0.00011308002285659313, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1980, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.5, "completions/mean_length": 74.54166793823242, "completions/min_length": 35.125, "epoch": 3.937205261851576, "grad_norm": 0.006120438705529417, "kl": 0.1190185546875, "learning_rate": 6.728698243378236e-07, "loss": 0.0001191550400108099, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1981, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.875, "completions/mean_length": 60.45833444595337, "completions/min_length": 31.25, "epoch": 3.9391908662199056, "grad_norm": 0.0071934876881588556, "kl": 0.125244140625, "learning_rate": 6.725737614108299e-07, "loss": 0.00012520799646154046, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1982, "train_speed(iter/s)": 0.022717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 71.23958587646484, "completions/min_length": 29.625, "epoch": 3.9411764705882355, "grad_norm": 0.00590422020723048, "kl": 0.135498046875, "learning_rate": 6.722776297785356e-07, "loss": 0.00013533519813790917, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1983, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.25, "completions/mean_length": 62.531251430511475, "completions/min_length": 32.625, "epoch": 3.943162074956565, "grad_norm": 0.007514490586814119, "kl": 0.1483154296875, "learning_rate": 6.719814295588371e-07, "loss": 0.00014824900426901877, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1984, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 64.15625095367432, "completions/min_length": 30.0, "epoch": 3.9451476793248945, "grad_norm": 0.008563349020987557, "kl": 0.12451171875, "learning_rate": 6.716851608696582e-07, "loss": 0.00012440930004231632, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1985, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.125, "completions/mean_length": 71.3541693687439, "completions/min_length": 32.875, "epoch": 3.947133283693224, "grad_norm": 0.00844346407017783, "kl": 0.13494873046875, "learning_rate": 6.713888238289496e-07, "loss": 0.00013479527842719108, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1986, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 66.62500143051147, "completions/min_length": 33.0, "epoch": 3.9491188880615535, "grad_norm": 2.6213789328551678, "kl": 0.142333984375, "learning_rate": 6.710924185546893e-07, "loss": 0.003287344006821513, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1987, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.625, "completions/mean_length": 64.16666889190674, "completions/min_length": 34.5, "epoch": 3.9511044924298835, "grad_norm": 0.9934585794107743, "kl": 0.124267578125, "learning_rate": 6.707959451648829e-07, "loss": -0.003844011574983597, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166669771075, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1988, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.875, "completions/mean_length": 67.927086353302, "completions/min_length": 33.0, "epoch": 3.953090096798213, "grad_norm": 0.006736003654127351, "kl": 0.11407470703125, "learning_rate": 6.704994037775626e-07, "loss": 0.00011398106289561838, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1989, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.75, "completions/mean_length": 62.843750953674316, "completions/min_length": 31.375, "epoch": 3.9550757011665425, "grad_norm": 0.008397633567474767, "kl": 0.11236572265625, "learning_rate": 6.702027945107879e-07, "loss": 0.00011219953739782795, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1990, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 69.09375190734863, "completions/min_length": 35.25, "epoch": 3.9570613055348725, "grad_norm": 0.0062549754019340245, "kl": 0.12615966796875, "learning_rate": 6.699061174826452e-07, "loss": 0.00012613250873982906, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1991, "train_speed(iter/s)": 0.022724 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.875, "completions/mean_length": 70.72916841506958, "completions/min_length": 32.875, "epoch": 3.9590469099032015, "grad_norm": 0.8474983537110822, "kl": 0.111968994140625, "learning_rate": 6.696093728112479e-07, "loss": -0.007378804497420788, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1992, "train_speed(iter/s)": 0.022725 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 72.25000190734863, "completions/min_length": 34.25, "epoch": 3.9610325142715315, "grad_norm": 0.006842777116988327, "kl": 0.12701416015625, "learning_rate": 6.693125606147368e-07, "loss": 0.00012694911856669933, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1993, "train_speed(iter/s)": 0.022726 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.875, "completions/mean_length": 70.01041793823242, "completions/min_length": 30.25, "epoch": 3.963018118639861, "grad_norm": 0.007910718969016838, "kl": 0.1258544921875, "learning_rate": 6.690156810112786e-07, "loss": 0.0001257880503544584, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1994, "train_speed(iter/s)": 0.022726 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.5, "completions/mean_length": 66.88541889190674, "completions/min_length": 35.25, "epoch": 3.9650037230081905, "grad_norm": 0.007175878029710901, "kl": 0.10247802734375, "learning_rate": 6.687187341190679e-07, "loss": 0.00010259408736601472, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1995, "train_speed(iter/s)": 0.022727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.25, "completions/mean_length": 66.40625190734863, "completions/min_length": 30.5, "epoch": 3.9669893273765204, "grad_norm": 0.0067688850015800465, "kl": 0.11151123046875, "learning_rate": 6.684217200563252e-07, "loss": 0.00011153890955029055, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1996, "train_speed(iter/s)": 0.022728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.375, "completions/mean_length": 68.03125190734863, "completions/min_length": 39.5, "epoch": 3.96897493174485, "grad_norm": 0.007878138096834917, "kl": 0.1180419921875, "learning_rate": 6.681246389412985e-07, "loss": 0.0001179118626168929, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1997, "train_speed(iter/s)": 0.022728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.875, "completions/mean_length": 72.67708587646484, "completions/min_length": 30.375, "epoch": 3.9709605361131795, "grad_norm": 0.00870718565431842, "kl": 0.14044189453125, "learning_rate": 6.678274908922619e-07, "loss": 0.00014022283721715212, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1998, "train_speed(iter/s)": 0.022728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.5, "completions/mean_length": 65.70833587646484, "completions/min_length": 35.625, "epoch": 3.972946140481509, "grad_norm": 0.007485116102384131, "kl": 0.127197265625, "learning_rate": 6.675302760275166e-07, "loss": 0.0001272835215786472, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1999, "train_speed(iter/s)": 0.022728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 75.1979193687439, "completions/min_length": 35.25, "epoch": 3.9749317448498385, "grad_norm": 0.006362508917186569, "kl": 0.11773681640625, "learning_rate": 6.6723299446539e-07, "loss": 0.00011758983600884676, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2000, "train_speed(iter/s)": 0.022728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.875, "completions/mean_length": 67.9479193687439, "completions/min_length": 30.5, "epoch": 3.9769173492181684, "grad_norm": 0.9363975663111594, "kl": 0.11956787109375, "learning_rate": 6.669356463242361e-07, "loss": 0.013932823203504086, "memory(GiB)": 94.21, "reward": 1.6458333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.3879413418471813, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2001, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.375, "completions/mean_length": 70.65625333786011, "completions/min_length": 32.625, "epoch": 3.978902953586498, "grad_norm": 0.007441439324369319, "kl": 0.13214111328125, "learning_rate": 6.66638231722436e-07, "loss": 0.00013221651897765696, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2002, "train_speed(iter/s)": 0.022723 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.625, "completions/mean_length": 71.22916793823242, "completions/min_length": 36.75, "epoch": 3.9808885579548274, "grad_norm": 0.008255645582687036, "kl": 0.11822509765625, "learning_rate": 6.663407507783964e-07, "loss": 0.00011813984019681811, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2003, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.875, "completions/mean_length": 61.812501430511475, "completions/min_length": 28.25, "epoch": 3.9828741623231574, "grad_norm": 0.00657424478774642, "kl": 0.10955810546875, "learning_rate": 6.66043203610551e-07, "loss": 0.00010963801469188184, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2004, "train_speed(iter/s)": 0.022723 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.375, "completions/mean_length": 75.08333587646484, "completions/min_length": 35.125, "epoch": 3.9848597666914864, "grad_norm": 0.009475988794735851, "kl": 0.13995361328125, "learning_rate": 6.657455903373596e-07, "loss": 0.00013996614143252373, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2005, "train_speed(iter/s)": 0.022724 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.75, "completions/mean_length": 67.92708539962769, "completions/min_length": 30.75, "epoch": 3.9868453710598164, "grad_norm": 0.005353517132734897, "kl": 0.10235595703125, "learning_rate": 6.654479110773083e-07, "loss": 0.00010232740896753967, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2006, "train_speed(iter/s)": 0.022724 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.25, "completions/mean_length": 64.23958683013916, "completions/min_length": 32.75, "epoch": 3.988830975428146, "grad_norm": 0.005695187441115863, "kl": 0.1026611328125, "learning_rate": 6.6515016594891e-07, "loss": 0.00010261538409395143, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2007, "train_speed(iter/s)": 0.022725 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 69.270836353302, "completions/min_length": 35.125, "epoch": 3.9908165797964754, "grad_norm": 1.0797091209633252, "kl": 0.13232421875, "learning_rate": 6.648523550707028e-07, "loss": -0.0008881315588951111, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 2008, "train_speed(iter/s)": 0.022725 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.5, "completions/mean_length": 64.26041889190674, "completions/min_length": 36.125, "epoch": 3.9928021841648054, "grad_norm": 0.0058442717427213235, "kl": 0.1116943359375, "learning_rate": 6.645544785612523e-07, "loss": 0.00011167748016305268, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2009, "train_speed(iter/s)": 0.022727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.875, "completions/mean_length": 69.07291841506958, "completions/min_length": 33.375, "epoch": 3.994787788533135, "grad_norm": 0.005048500283070655, "kl": 0.10980224609375, "learning_rate": 6.642565365391488e-07, "loss": 0.00010970650328090414, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2010, "train_speed(iter/s)": 0.022728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.875, "completions/mean_length": 60.59375190734863, "completions/min_length": 28.625, "epoch": 3.9967733929014644, "grad_norm": 0.010755256469881605, "kl": 0.12109375, "learning_rate": 6.639585291230097e-07, "loss": 0.00012112577678635716, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2011, "train_speed(iter/s)": 0.022727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 68.14583539962769, "completions/min_length": 31.125, "epoch": 3.998758997269794, "grad_norm": 0.005064750581841125, "kl": 0.1336669921875, "learning_rate": 6.636604564314781e-07, "loss": 0.0001337287249043584, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2012, "train_speed(iter/s)": 0.022727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 71.18750381469727, "completions/min_length": 32.375, "epoch": 4.00198560436833, "grad_norm": 0.018869894006653228, "kl": 0.11322021484375, "learning_rate": 6.633623185832231e-07, "loss": 0.00011309284309390932, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2013, "train_speed(iter/s)": 0.022724 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.5, "completions/mean_length": 68.38541889190674, "completions/min_length": 31.875, "epoch": 4.003971208736659, "grad_norm": 0.00565829619644366, "kl": 0.12139892578125, "learning_rate": 6.630641156969397e-07, "loss": 0.000121168268378824, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2014, "train_speed(iter/s)": 0.022724 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 68.88541984558105, "completions/min_length": 32.375, "epoch": 4.005956813104989, "grad_norm": 0.010764540924807033, "kl": 0.11993408203125, "learning_rate": 6.627658478913488e-07, "loss": 0.00011988793266937137, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2015, "train_speed(iter/s)": 0.022725 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 68.16666841506958, "completions/min_length": 31.625, "epoch": 4.007942417473318, "grad_norm": 0.005544413440212136, "kl": 0.1365966796875, "learning_rate": 6.624675152851974e-07, "loss": 0.00013645910075865686, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2016, "train_speed(iter/s)": 0.022726 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.125, "completions/mean_length": 59.10416793823242, "completions/min_length": 29.0, "epoch": 4.009928021841648, "grad_norm": 0.005310373150290691, "kl": 0.10260009765625, "learning_rate": 6.621691179972579e-07, "loss": 0.00010257892427034676, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2017, "train_speed(iter/s)": 0.022727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.25, "completions/mean_length": 62.489585399627686, "completions/min_length": 31.5, "epoch": 4.011913626209978, "grad_norm": 0.005474587717592868, "kl": 0.11834716796875, "learning_rate": 6.618706561463287e-07, "loss": 0.00011841466039186344, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2018, "train_speed(iter/s)": 0.022727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 68.68750190734863, "completions/min_length": 31.375, "epoch": 4.013899230578307, "grad_norm": 0.005991308310092773, "kl": 0.11474609375, "learning_rate": 6.615721298512337e-07, "loss": 0.00011478038504719734, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2019, "train_speed(iter/s)": 0.022727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.5, "completions/mean_length": 75.13541793823242, "completions/min_length": 36.0, "epoch": 4.015884834946637, "grad_norm": 0.005616389886586344, "kl": 0.13177490234375, "learning_rate": 6.612735392308227e-07, "loss": 0.00013186628348194063, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2020, "train_speed(iter/s)": 0.022728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.5, "completions/mean_length": 64.56250238418579, "completions/min_length": 31.375, "epoch": 4.017870439314967, "grad_norm": 0.006213665224953286, "kl": 0.095947265625, "learning_rate": 6.609748844039711e-07, "loss": 9.60233955993317e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2021, "train_speed(iter/s)": 0.022729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 65.84375190734863, "completions/min_length": 33.125, "epoch": 4.019856043683296, "grad_norm": 0.005088416557353746, "kl": 0.1044921875, "learning_rate": 6.606761654895797e-07, "loss": 0.00010443732026033103, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2022, "train_speed(iter/s)": 0.02273 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 73.50000238418579, "completions/min_length": 31.75, "epoch": 4.021841648051626, "grad_norm": 0.005350222865723179, "kl": 0.10540771484375, "learning_rate": 6.603773826065749e-07, "loss": 0.0001054336316883564, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2023, "train_speed(iter/s)": 0.022731 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 70.64583444595337, "completions/min_length": 34.25, "epoch": 4.023827252419955, "grad_norm": 0.00909442194662836, "kl": 0.1087646484375, "learning_rate": 6.600785358739083e-07, "loss": 0.00010878332977881655, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2024, "train_speed(iter/s)": 0.022731 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 68.114586353302, "completions/min_length": 35.625, "epoch": 4.025812856788285, "grad_norm": 0.008400199158934446, "kl": 0.1041259765625, "learning_rate": 6.597796254105575e-07, "loss": 0.00010408522211946547, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2025, "train_speed(iter/s)": 0.022733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.625, "completions/mean_length": 68.82292032241821, "completions/min_length": 30.875, "epoch": 4.027798461156615, "grad_norm": 0.6613634201844689, "kl": 0.1153564453125, "learning_rate": 6.594806513355251e-07, "loss": -0.008995014242827892, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2026, "train_speed(iter/s)": 0.022733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.125, "completions/mean_length": 66.64583683013916, "completions/min_length": 34.25, "epoch": 4.029784065524944, "grad_norm": 1.6642902751946476, "kl": 0.13616943359375, "learning_rate": 6.591816137678387e-07, "loss": -0.005860649049282074, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2027, "train_speed(iter/s)": 0.022735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.75, "completions/mean_length": 64.02083539962769, "completions/min_length": 31.0, "epoch": 4.031769669893274, "grad_norm": 2.373622370428783, "kl": 0.1221923828125, "learning_rate": 6.58882512826552e-07, "loss": -0.0016249145846813917, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2028, "train_speed(iter/s)": 0.022735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.125, "completions/mean_length": 63.989585399627686, "completions/min_length": 32.375, "epoch": 4.033755274261603, "grad_norm": 0.006024388389464885, "kl": 0.09661865234375, "learning_rate": 6.585833486307434e-07, "loss": 9.664696699474007e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2029, "train_speed(iter/s)": 0.022736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.375, "completions/mean_length": 63.656251430511475, "completions/min_length": 31.0, "epoch": 4.035740878629933, "grad_norm": 0.006330535347817947, "kl": 0.10064697265625, "learning_rate": 6.582841212995164e-07, "loss": 0.0001006251186481677, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2030, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.375, "completions/mean_length": 68.06250238418579, "completions/min_length": 32.625, "epoch": 4.037726482998263, "grad_norm": 0.007502050417841412, "kl": 0.12774658203125, "learning_rate": 6.579848309519997e-07, "loss": 0.0001276147668249905, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2031, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 64.35416841506958, "completions/min_length": 32.375, "epoch": 4.039712087366592, "grad_norm": 1.6598181666727851, "kl": 0.10150146484375, "learning_rate": 6.576854777073473e-07, "loss": 0.007703796029090881, "memory(GiB)": 94.21, "reward": 1.6979166865348816, "reward_std": 0.05779037997126579, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.2281883768737316, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2032, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 67.15625238418579, "completions/min_length": 34.25, "epoch": 4.041697691734922, "grad_norm": 1.8902506396159635, "kl": 0.10443115234375, "learning_rate": 6.573860616847385e-07, "loss": -0.006393782794475555, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2033, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/mean_length": 67.63541841506958, "completions/min_length": 34.875, "epoch": 4.043683296103252, "grad_norm": 0.007403135043457375, "kl": 0.12353515625, "learning_rate": 6.570865830033764e-07, "loss": 0.00012345501454547048, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2034, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 68.55208492279053, "completions/min_length": 30.25, "epoch": 4.045668900471581, "grad_norm": 0.007562275871408161, "kl": 0.12005615234375, "learning_rate": 6.567870417824904e-07, "loss": 0.00012012640218017623, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2035, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.75, "completions/mean_length": 61.50000190734863, "completions/min_length": 33.625, "epoch": 4.047654504839911, "grad_norm": 0.006990191592929426, "kl": 0.11993408203125, "learning_rate": 6.564874381413344e-07, "loss": 0.00011990381608484313, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2036, "train_speed(iter/s)": 0.022742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/mean_length": 66.00000190734863, "completions/min_length": 35.125, "epoch": 4.04964010920824, "grad_norm": 0.005657248848228836, "kl": 0.10577392578125, "learning_rate": 6.561877721991866e-07, "loss": 0.0001058382767951116, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2037, "train_speed(iter/s)": 0.022742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 67.98958539962769, "completions/min_length": 34.25, "epoch": 4.05162571357657, "grad_norm": 0.005489670958569793, "kl": 0.095947265625, "learning_rate": 6.558880440753507e-07, "loss": 9.60080506047234e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2038, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.875, "completions/mean_length": 65.50000190734863, "completions/min_length": 31.75, "epoch": 4.0536113179449, "grad_norm": 0.007025838097572775, "kl": 0.10809326171875, "learning_rate": 6.555882538891546e-07, "loss": 0.00010811498941620812, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2039, "train_speed(iter/s)": 0.022744 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.875, "completions/mean_length": 67.302086353302, "completions/min_length": 34.25, "epoch": 4.055596922313229, "grad_norm": 0.007774702212682246, "kl": 0.121826171875, "learning_rate": 6.552884017599516e-07, "loss": 0.00012183383660158142, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2040, "train_speed(iter/s)": 0.022745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 69.41666841506958, "completions/min_length": 34.5, "epoch": 4.057582526681559, "grad_norm": 0.005903042534519346, "kl": 0.1195068359375, "learning_rate": 6.549884878071189e-07, "loss": 0.00011955788795603439, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2041, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.875, "completions/mean_length": 66.7604193687439, "completions/min_length": 32.25, "epoch": 4.059568131049888, "grad_norm": 0.006313828209238338, "kl": 0.09527587890625, "learning_rate": 6.546885121500584e-07, "loss": 9.523648623144254e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2042, "train_speed(iter/s)": 0.022748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.75, "completions/mean_length": 69.76041889190674, "completions/min_length": 31.625, "epoch": 4.061553735418218, "grad_norm": 0.007556741937032334, "kl": 0.12371826171875, "learning_rate": 6.543884749081975e-07, "loss": 0.00012373802019283175, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2043, "train_speed(iter/s)": 0.022748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.875, "completions/mean_length": 67.15625190734863, "completions/min_length": 32.25, "epoch": 4.063539339786548, "grad_norm": 0.0053709199638572805, "kl": 0.1041259765625, "learning_rate": 6.54088376200987e-07, "loss": 0.00010402753105154261, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2044, "train_speed(iter/s)": 0.022749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.625, "completions/mean_length": 63.59375238418579, "completions/min_length": 36.25, "epoch": 4.065524944154877, "grad_norm": 0.006130519045175084, "kl": 0.0970458984375, "learning_rate": 6.537882161479027e-07, "loss": 9.713557665236294e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2045, "train_speed(iter/s)": 0.022749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 71.57291889190674, "completions/min_length": 35.625, "epoch": 4.067510548523207, "grad_norm": 0.005291047146873017, "kl": 0.10546875, "learning_rate": 6.534879948684446e-07, "loss": 0.00010560001828707755, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2046, "train_speed(iter/s)": 0.022749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 74.302086353302, "completions/min_length": 33.625, "epoch": 4.069496152891537, "grad_norm": 0.00536066426363881, "kl": 0.11505126953125, "learning_rate": 6.531877124821375e-07, "loss": 0.00011505494330776855, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2047, "train_speed(iter/s)": 0.022748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.5, "completions/mean_length": 60.20833444595337, "completions/min_length": 31.25, "epoch": 4.071481757259866, "grad_norm": 0.006665023104218962, "kl": 0.1234130859375, "learning_rate": 6.5288736910853e-07, "loss": 0.0001234076771652326, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2048, "train_speed(iter/s)": 0.022749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.75, "completions/mean_length": 71.73958444595337, "completions/min_length": 35.5, "epoch": 4.073467361628196, "grad_norm": 0.006252620345059078, "kl": 0.11041259765625, "learning_rate": 6.525869648671951e-07, "loss": 0.00011042873666156083, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2049, "train_speed(iter/s)": 0.02275 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.75, "completions/mean_length": 69.13541841506958, "completions/min_length": 36.125, "epoch": 4.075452965996525, "grad_norm": 0.24347129207963739, "kl": 0.478759765625, "learning_rate": 6.522864998777304e-07, "loss": 0.00047699594870209694, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2050, "train_speed(iter/s)": 0.022751 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.125, "completions/mean_length": 63.395835876464844, "completions/min_length": 34.25, "epoch": 4.077438570364855, "grad_norm": 0.005737709950479365, "kl": 0.1356201171875, "learning_rate": 6.519859742597573e-07, "loss": 0.00013556258636526763, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2051, "train_speed(iter/s)": 0.022751 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 63.66666793823242, "completions/min_length": 33.75, "epoch": 4.079424174733185, "grad_norm": 2.258360350546377, "kl": 0.12432861328125, "learning_rate": 6.516853881329214e-07, "loss": 0.001749946502968669, "memory(GiB)": 94.21, "reward": 1.59375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.59375, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2052, "train_speed(iter/s)": 0.022752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 64.364586353302, "completions/min_length": 31.125, "epoch": 4.081409779101514, "grad_norm": 0.03400161286007858, "kl": 0.1368408203125, "learning_rate": 6.513847416168929e-07, "loss": 0.00013668896281160414, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2053, "train_speed(iter/s)": 0.022754 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 64.13541793823242, "completions/min_length": 28.625, "epoch": 4.083395383469844, "grad_norm": 0.006116490161371725, "kl": 0.10125732421875, "learning_rate": 6.51084034831365e-07, "loss": 0.0001011538552120328, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2054, "train_speed(iter/s)": 0.022756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.75, "completions/mean_length": 70.57291889190674, "completions/min_length": 31.875, "epoch": 4.085380987838173, "grad_norm": 0.10092509064326098, "kl": 0.2481689453125, "learning_rate": 6.507832678960559e-07, "loss": 0.00024841591948643327, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2055, "train_speed(iter/s)": 0.022756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.375, "completions/mean_length": 69.46875143051147, "completions/min_length": 36.25, "epoch": 4.087366592206503, "grad_norm": 0.007406912868890276, "kl": 0.11578369140625, "learning_rate": 6.504824409307069e-07, "loss": 0.00011570375500014052, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2056, "train_speed(iter/s)": 0.022756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.375, "completions/mean_length": 68.64583587646484, "completions/min_length": 34.375, "epoch": 4.089352196574833, "grad_norm": 0.02998255989960638, "kl": 0.14678955078125, "learning_rate": 6.501815540550843e-07, "loss": 0.00014662364264950156, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2057, "train_speed(iter/s)": 0.022756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 68.78125190734863, "completions/min_length": 33.625, "epoch": 4.091337800943162, "grad_norm": 0.006547836350884739, "kl": 0.10009765625, "learning_rate": 6.49880607388977e-07, "loss": 0.00010011550330091268, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2058, "train_speed(iter/s)": 0.022757 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 63.07291793823242, "completions/min_length": 30.375, "epoch": 4.093323405311492, "grad_norm": 0.00809876703403715, "kl": 0.1104736328125, "learning_rate": 6.495796010521985e-07, "loss": 0.0001104526745621115, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2059, "train_speed(iter/s)": 0.022758 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.625, "completions/mean_length": 67.8541693687439, "completions/min_length": 34.625, "epoch": 4.095309009679822, "grad_norm": 0.006693559366330109, "kl": 0.109619140625, "learning_rate": 6.492785351645859e-07, "loss": 0.00010952491720672697, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2060, "train_speed(iter/s)": 0.022758 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 71.75000143051147, "completions/min_length": 33.875, "epoch": 4.097294614048151, "grad_norm": 0.03956217964159395, "kl": 0.15301513671875, "learning_rate": 6.489774098460002e-07, "loss": 0.00015298080688808113, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2061, "train_speed(iter/s)": 0.022757 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.75, "completions/mean_length": 72.47916984558105, "completions/min_length": 37.125, "epoch": 4.099280218416481, "grad_norm": 1.0697147886646554, "kl": 0.10809326171875, "learning_rate": 6.486762252163254e-07, "loss": 0.003566889790818095, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2062, "train_speed(iter/s)": 0.022757 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 77.37500190734863, "completions/min_length": 33.375, "epoch": 4.10126582278481, "grad_norm": 0.009292234084509054, "kl": 0.1224365234375, "learning_rate": 6.483749813954694e-07, "loss": 0.00012236254406161606, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2063, "train_speed(iter/s)": 0.022756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 68.1979193687439, "completions/min_length": 33.25, "epoch": 4.10325142715314, "grad_norm": 0.006989477705086491, "kl": 0.104248046875, "learning_rate": 6.480736785033644e-07, "loss": 0.00010422086052130908, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2064, "train_speed(iter/s)": 0.022757 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.875, "completions/mean_length": 82.42708587646484, "completions/min_length": 39.5, "epoch": 4.10523703152147, "grad_norm": 0.005914921173196594, "kl": 0.1207275390625, "learning_rate": 6.477723166599651e-07, "loss": 0.00012081613385817036, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2065, "train_speed(iter/s)": 0.022756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.25, "completions/mean_length": 67.48958539962769, "completions/min_length": 33.125, "epoch": 4.107222635889799, "grad_norm": 0.009639885902223732, "kl": 0.12384033203125, "learning_rate": 6.474708959852503e-07, "loss": 0.00012394244549795985, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2066, "train_speed(iter/s)": 0.022755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 69.18750143051147, "completions/min_length": 34.125, "epoch": 4.109208240258129, "grad_norm": 0.008215214185244112, "kl": 0.1318359375, "learning_rate": 6.471694165992219e-07, "loss": 0.0001318525173701346, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2067, "train_speed(iter/s)": 0.022755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.5, "completions/mean_length": 62.09375190734863, "completions/min_length": 34.875, "epoch": 4.111193844626458, "grad_norm": 0.008315337984825706, "kl": 0.1231689453125, "learning_rate": 6.468678786219052e-07, "loss": 0.0001231917121913284, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2068, "train_speed(iter/s)": 0.022755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.625, "completions/mean_length": 68.46875190734863, "completions/min_length": 31.375, "epoch": 4.113179448994788, "grad_norm": 0.007692090798060133, "kl": 0.11822509765625, "learning_rate": 6.46566282173349e-07, "loss": 0.00011806873953901231, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2069, "train_speed(iter/s)": 0.022756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.625, "completions/mean_length": 81.7291693687439, "completions/min_length": 39.5, "epoch": 4.115165053363118, "grad_norm": 1.790176083852296, "kl": 0.1051025390625, "learning_rate": 6.462646273736254e-07, "loss": -0.000978361233137548, "memory(GiB)": 94.21, "reward": 1.8125000149011612, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.2407601661980152, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2070, "train_speed(iter/s)": 0.022756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 75.32291889190674, "completions/min_length": 37.25, "epoch": 4.117150657731447, "grad_norm": 0.007760996761536065, "kl": 0.12493896484375, "learning_rate": 6.459629143428294e-07, "loss": 0.00012478661665227264, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2071, "train_speed(iter/s)": 0.022757 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.875, "completions/mean_length": 69.29166889190674, "completions/min_length": 35.5, "epoch": 4.119136262099777, "grad_norm": 0.007313916390480204, "kl": 0.12762451171875, "learning_rate": 6.456611432010795e-07, "loss": 0.0001277018163818866, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2072, "train_speed(iter/s)": 0.022759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 80.48958492279053, "completions/min_length": 33.75, "epoch": 4.121121866468107, "grad_norm": 0.007348825257992149, "kl": 0.12713623046875, "learning_rate": 6.453593140685171e-07, "loss": 0.00012722087558358908, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2073, "train_speed(iter/s)": 0.02276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.25, "completions/mean_length": 70.1666693687439, "completions/min_length": 33.0, "epoch": 4.123107470836436, "grad_norm": 0.006566896844079208, "kl": 0.1209716796875, "learning_rate": 6.450574270653072e-07, "loss": 0.00012109326780773699, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2074, "train_speed(iter/s)": 0.022762 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 71.04166984558105, "completions/min_length": 31.75, "epoch": 4.125093075204766, "grad_norm": 0.7637919243343001, "kl": 0.10986328125, "learning_rate": 6.447554823116371e-07, "loss": 0.006727161817252636, "memory(GiB)": 94.21, "reward": 1.9583333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9583333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2075, "train_speed(iter/s)": 0.022761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 69.90625238418579, "completions/min_length": 37.0, "epoch": 4.127078679573095, "grad_norm": 0.007444117658055701, "kl": 0.1278076171875, "learning_rate": 6.444534799277177e-07, "loss": 0.0001277729170396924, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2076, "train_speed(iter/s)": 0.022761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.125, "completions/mean_length": 73.96875286102295, "completions/min_length": 33.875, "epoch": 4.129064283941425, "grad_norm": 0.0064112611068168545, "kl": 0.10089111328125, "learning_rate": 6.441514200337823e-07, "loss": 0.00010088998533319682, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2077, "train_speed(iter/s)": 0.022759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.5, "completions/mean_length": 65.87500286102295, "completions/min_length": 34.25, "epoch": 4.131049888309755, "grad_norm": 0.007220385721150977, "kl": 0.1048583984375, "learning_rate": 6.438493027500878e-07, "loss": 0.00010480167111381888, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2078, "train_speed(iter/s)": 0.022759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.25, "completions/mean_length": 78.63541889190674, "completions/min_length": 32.875, "epoch": 4.133035492678084, "grad_norm": 0.12356985252536719, "kl": 0.19171142578125, "learning_rate": 6.435471281969132e-07, "loss": 0.0001919107453431934, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2079, "train_speed(iter/s)": 0.022759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 81.72916793823242, "completions/min_length": 39.5, "epoch": 4.135021097046414, "grad_norm": 0.0072358693611688845, "kl": 0.1356201171875, "learning_rate": 6.432448964945607e-07, "loss": 0.00013537434278987348, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2080, "train_speed(iter/s)": 0.02276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 67.88541793823242, "completions/min_length": 30.875, "epoch": 4.137006701414743, "grad_norm": 0.006059138893072708, "kl": 0.09832763671875, "learning_rate": 6.429426077633555e-07, "loss": 9.828020120039582e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2081, "train_speed(iter/s)": 0.022761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.375, "completions/mean_length": 71.81250333786011, "completions/min_length": 30.125, "epoch": 4.138992305783073, "grad_norm": 0.007048251625977571, "kl": 0.1160888671875, "learning_rate": 6.426402621236448e-07, "loss": 0.00011619397264439613, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2082, "train_speed(iter/s)": 0.02276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 68.93750190734863, "completions/min_length": 37.375, "epoch": 4.140977910151403, "grad_norm": 0.005108235245407083, "kl": 0.0955810546875, "learning_rate": 6.423378596957989e-07, "loss": 9.567459346726537e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2083, "train_speed(iter/s)": 0.02276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 70.54166984558105, "completions/min_length": 32.5, "epoch": 4.142963514519732, "grad_norm": 0.006484121392104565, "kl": 0.1065673828125, "learning_rate": 6.42035400600211e-07, "loss": 0.00010657946404535323, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2084, "train_speed(iter/s)": 0.02276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 71.17708396911621, "completions/min_length": 27.375, "epoch": 4.144949118888062, "grad_norm": 0.007882251499710408, "kl": 0.12213134765625, "learning_rate": 6.417328849572963e-07, "loss": 0.00012198302283650264, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2085, "train_speed(iter/s)": 0.022761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 78.39583587646484, "completions/min_length": 32.875, "epoch": 4.146934723256392, "grad_norm": 0.004703872238594433, "kl": 0.11083984375, "learning_rate": 6.414303128874927e-07, "loss": 0.00011083879508078098, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2086, "train_speed(iter/s)": 0.02276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.375, "completions/mean_length": 66.98958587646484, "completions/min_length": 36.75, "epoch": 4.148920327624721, "grad_norm": 0.005130927648626159, "kl": 0.10040283203125, "learning_rate": 6.411276845112607e-07, "loss": 0.00010044153896160424, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2087, "train_speed(iter/s)": 0.02276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.125, "completions/mean_length": 79.52083778381348, "completions/min_length": 32.125, "epoch": 4.150905931993051, "grad_norm": 0.8583976607757324, "kl": 0.11871337890625, "learning_rate": 6.40824999949083e-07, "loss": -0.0034922566264867783, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166669771075, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2088, "train_speed(iter/s)": 0.022756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 84.86458587646484, "completions/min_length": 40.875, "epoch": 4.15289153636138, "grad_norm": 0.09933518555005108, "kl": 0.1343994140625, "learning_rate": 6.40522259321465e-07, "loss": 0.00013415730791166425, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2089, "train_speed(iter/s)": 0.022757 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 81.69791889190674, "completions/min_length": 41.625, "epoch": 4.15487714072971, "grad_norm": 0.01409942750044266, "kl": 0.15057373046875, "learning_rate": 6.402194627489339e-07, "loss": 0.00015057779091876, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2090, "train_speed(iter/s)": 0.022755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 72.94791841506958, "completions/min_length": 34.5, "epoch": 4.1568627450980395, "grad_norm": 0.0054982524493101224, "kl": 0.110595703125, "learning_rate": 6.399166103520397e-07, "loss": 0.00011063828424084932, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2091, "train_speed(iter/s)": 0.022755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.625, "completions/mean_length": 74.13541889190674, "completions/min_length": 33.375, "epoch": 4.158848349466369, "grad_norm": 0.005859043458859241, "kl": 0.1171875, "learning_rate": 6.396137022513545e-07, "loss": 0.00011718708265107125, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2092, "train_speed(iter/s)": 0.022755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 66.63541889190674, "completions/min_length": 32.75, "epoch": 4.160833953834699, "grad_norm": 0.0057757172806900845, "kl": 0.096923828125, "learning_rate": 6.393107385674723e-07, "loss": 9.697902714833617e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2093, "train_speed(iter/s)": 0.022755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.875, "completions/mean_length": 79.84375286102295, "completions/min_length": 39.375, "epoch": 4.162819558203028, "grad_norm": 1.339601419508341, "kl": 0.13958740234375, "learning_rate": 6.390077194210093e-07, "loss": 0.0008928714087232947, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666679084301, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2094, "train_speed(iter/s)": 0.022757 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 64.55208396911621, "completions/min_length": 29.75, "epoch": 4.164805162571358, "grad_norm": 1.4715376401178155, "kl": 0.100830078125, "learning_rate": 6.387046449326044e-07, "loss": -0.0012706003617495298, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2095, "train_speed(iter/s)": 0.022757 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 70.32292032241821, "completions/min_length": 34.5, "epoch": 4.1667907669396875, "grad_norm": 0.004903255111950009, "kl": 0.1004638671875, "learning_rate": 6.384015152229174e-07, "loss": 0.00010027983080362901, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2096, "train_speed(iter/s)": 0.022758 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.875, "completions/mean_length": 73.55208587646484, "completions/min_length": 32.375, "epoch": 4.168776371308017, "grad_norm": 0.004786192227970846, "kl": 0.106689453125, "learning_rate": 6.380983304126312e-07, "loss": 0.00010673022916307673, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2097, "train_speed(iter/s)": 0.022758 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.125, "completions/mean_length": 68.15625095367432, "completions/min_length": 33.875, "epoch": 4.1707619756763465, "grad_norm": 0.006162062992126851, "kl": 0.10003662109375, "learning_rate": 6.377950906224498e-07, "loss": 9.988941019400954e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2098, "train_speed(iter/s)": 0.022759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.875, "completions/mean_length": 71.94791841506958, "completions/min_length": 36.625, "epoch": 4.1727475800446765, "grad_norm": 0.004269110568429055, "kl": 0.11187744140625, "learning_rate": 6.374917959730996e-07, "loss": 0.00011188999633304775, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2099, "train_speed(iter/s)": 0.022759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 67.3229169845581, "completions/min_length": 32.5, "epoch": 4.174733184413006, "grad_norm": 1.147202615199326, "kl": 0.09332275390625, "learning_rate": 6.371884465853288e-07, "loss": 0.0021340053062886, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2100, "train_speed(iter/s)": 0.022759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.875, "completions/mean_length": 57.89583492279053, "completions/min_length": 28.25, "epoch": 4.1767187887813355, "grad_norm": 0.004930987189095135, "kl": 0.09088134765625, "learning_rate": 6.368850425799071e-07, "loss": 9.076326387003064e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2101, "train_speed(iter/s)": 0.022761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.375, "completions/mean_length": 69.10416889190674, "completions/min_length": 34.875, "epoch": 4.178704393149665, "grad_norm": 0.0072407518494027765, "kl": 0.0975341796875, "learning_rate": 6.36581584077626e-07, "loss": 9.744841372594237e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2102, "train_speed(iter/s)": 0.022762 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.375, "completions/mean_length": 70.28125190734863, "completions/min_length": 32.125, "epoch": 4.1806899975179945, "grad_norm": 0.031942284214138535, "kl": 0.1275634765625, "learning_rate": 6.36278071199299e-07, "loss": 0.0001275965478271246, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2103, "train_speed(iter/s)": 0.022761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 72.92708444595337, "completions/min_length": 33.875, "epoch": 4.1826756018863245, "grad_norm": 0.017988018169071023, "kl": 0.11181640625, "learning_rate": 6.359745040657611e-07, "loss": 0.00011177727719768882, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2104, "train_speed(iter/s)": 0.022761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.875, "completions/mean_length": 73.20833492279053, "completions/min_length": 34.0, "epoch": 4.1846612062546535, "grad_norm": 0.004784269293028821, "kl": 0.1123046875, "learning_rate": 6.356708827978688e-07, "loss": 0.00011230337258893996, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2105, "train_speed(iter/s)": 0.022762 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 72.92708492279053, "completions/min_length": 30.875, "epoch": 4.1866468106229835, "grad_norm": 1.6088016454590102, "kl": 0.2540283203125, "learning_rate": 6.353672075165002e-07, "loss": -0.0019080055644735694, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2106, "train_speed(iter/s)": 0.022761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 239.125, "completions/mean_length": 79.47916889190674, "completions/min_length": 31.125, "epoch": 4.188632414991313, "grad_norm": 1.910045322763083, "kl": 0.10528564453125, "learning_rate": 6.350634783425548e-07, "loss": 0.030049694702029228, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.0852636992931366, "rewards/CineAccuracyORM/mean": 0.9583333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 2107, "train_speed(iter/s)": 0.022757 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 72.37500333786011, "completions/min_length": 34.875, "epoch": 4.1906180193596425, "grad_norm": 0.009727818090587384, "kl": 0.10888671875, "learning_rate": 6.347596953969538e-07, "loss": 0.00010897692118305713, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2108, "train_speed(iter/s)": 0.022756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 68.63541841506958, "completions/min_length": 33.375, "epoch": 4.1926036237279725, "grad_norm": 0.006830828685938915, "kl": 0.09857177734375, "learning_rate": 6.344558588006397e-07, "loss": 9.849101479630917e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2109, "train_speed(iter/s)": 0.022755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 71.30208492279053, "completions/min_length": 34.75, "epoch": 4.1945892280963015, "grad_norm": 1.7226840248697695, "kl": 0.111724853515625, "learning_rate": 6.341519686745764e-07, "loss": -0.0008382114465348423, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2110, "train_speed(iter/s)": 0.022756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 77.86458396911621, "completions/min_length": 35.5, "epoch": 4.1965748324646315, "grad_norm": 0.005362547596227209, "kl": 0.0897216796875, "learning_rate": 6.338480251397488e-07, "loss": 8.972088107839227e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2111, "train_speed(iter/s)": 0.022754 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.875, "completions/mean_length": 68.02083539962769, "completions/min_length": 31.375, "epoch": 4.198560436832961, "grad_norm": 1.1520637314147772, "kl": 0.54217529296875, "learning_rate": 6.335440283171635e-07, "loss": 0.0005427386495284736, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2112, "train_speed(iter/s)": 0.022754 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 75.22916793823242, "completions/min_length": 33.75, "epoch": 4.2005460412012905, "grad_norm": 0.815616580644487, "kl": 0.104278564453125, "learning_rate": 6.332399783278481e-07, "loss": -0.0004986375570297241, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2113, "train_speed(iter/s)": 0.022754 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.625, "completions/mean_length": 62.04166889190674, "completions/min_length": 33.875, "epoch": 4.2025316455696204, "grad_norm": 0.004005737931186196, "kl": 0.09130859375, "learning_rate": 6.329358752928515e-07, "loss": 9.119157766690478e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2114, "train_speed(iter/s)": 0.022755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.25, "completions/mean_length": 73.10416793823242, "completions/min_length": 35.5, "epoch": 4.2045172499379495, "grad_norm": 0.004237827244992204, "kl": 0.1151123046875, "learning_rate": 6.326317193332434e-07, "loss": 0.00011505176371429116, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2115, "train_speed(iter/s)": 0.022755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 77.17708492279053, "completions/min_length": 32.5, "epoch": 4.2065028543062795, "grad_norm": 0.003736387319663302, "kl": 0.09588623046875, "learning_rate": 6.323275105701149e-07, "loss": 9.586976375430822e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2116, "train_speed(iter/s)": 0.022755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 78.302086353302, "completions/min_length": 36.25, "epoch": 4.208488458674609, "grad_norm": 0.004441502566949554, "kl": 0.09747314453125, "learning_rate": 6.32023249124578e-07, "loss": 9.750405297381803e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2117, "train_speed(iter/s)": 0.022754 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.75, "completions/mean_length": 74.89583587646484, "completions/min_length": 33.5, "epoch": 4.2104740630429385, "grad_norm": 1.046161051555905, "kl": 0.1474609375, "learning_rate": 6.317189351177656e-07, "loss": -0.012441083788871765, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2118, "train_speed(iter/s)": 0.022754 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 73.59375143051147, "completions/min_length": 36.75, "epoch": 4.212459667411268, "grad_norm": 1.3710098052319393, "kl": 0.10467529296875, "learning_rate": 6.314145686708318e-07, "loss": -0.005860991310328245, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2119, "train_speed(iter/s)": 0.022754 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 70.53125190734863, "completions/min_length": 32.25, "epoch": 4.2144452717795975, "grad_norm": 0.7564927188599331, "kl": 0.08367919921875, "learning_rate": 6.311101499049511e-07, "loss": -0.007466999813914299, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2120, "train_speed(iter/s)": 0.022753 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 358.375, "completions/mean_length": 98.90625333786011, "completions/min_length": 39.125, "epoch": 4.216430876147927, "grad_norm": 0.4515841407103624, "kl": 0.1043701171875, "learning_rate": 6.308056789413194e-07, "loss": 0.019756514579057693, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 2121, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 70.86458539962769, "completions/min_length": 34.75, "epoch": 4.218416480516257, "grad_norm": 0.005363501536762136, "kl": 0.095947265625, "learning_rate": 6.305011559011531e-07, "loss": 9.590342233423144e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2122, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 72.97916984558105, "completions/min_length": 33.5, "epoch": 4.2204020848845865, "grad_norm": 0.005511976413349534, "kl": 0.108642578125, "learning_rate": 6.301965809056889e-07, "loss": 0.00010873382416320965, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2123, "train_speed(iter/s)": 0.022747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.875, "completions/mean_length": 81.78125190734863, "completions/min_length": 43.125, "epoch": 4.222387689252916, "grad_norm": 0.0060005434509784, "kl": 0.11236572265625, "learning_rate": 6.298919540761851e-07, "loss": 0.00011240018648095429, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2124, "train_speed(iter/s)": 0.022747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 87.02083683013916, "completions/min_length": 41.125, "epoch": 4.224373293621246, "grad_norm": 0.00540217399546723, "kl": 0.1251220703125, "learning_rate": 6.2958727553392e-07, "loss": 0.0001250765926670283, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2125, "train_speed(iter/s)": 0.022749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.125, "completions/mean_length": 85.3854193687439, "completions/min_length": 41.125, "epoch": 4.226358897989575, "grad_norm": 0.004633497194136154, "kl": 0.1041259765625, "learning_rate": 6.292825454001924e-07, "loss": 0.00010407729860162362, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2126, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.375, "completions/mean_length": 71.31250286102295, "completions/min_length": 37.25, "epoch": 4.228344502357905, "grad_norm": 0.019864548172534455, "kl": 0.11541748046875, "learning_rate": 6.289777637963222e-07, "loss": 0.00011548617476364598, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2127, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.875, "completions/mean_length": 77.45833539962769, "completions/min_length": 35.125, "epoch": 4.230330106726234, "grad_norm": 0.0038901213178737966, "kl": 0.09002685546875, "learning_rate": 6.286729308436491e-07, "loss": 8.997264376375824e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2128, "train_speed(iter/s)": 0.022745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 76.08333587646484, "completions/min_length": 37.0, "epoch": 4.232315711094564, "grad_norm": 0.00409681251213564, "kl": 0.095458984375, "learning_rate": 6.283680466635342e-07, "loss": 9.557482553645968e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2129, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.875, "completions/mean_length": 75.16666889190674, "completions/min_length": 31.375, "epoch": 4.234301315462894, "grad_norm": 0.004317427547692185, "kl": 0.09796142578125, "learning_rate": 6.280631113773579e-07, "loss": 9.802542626857758e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2130, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 75.645836353302, "completions/min_length": 35.875, "epoch": 4.236286919831223, "grad_norm": 0.0050924570654843375, "kl": 0.10748291015625, "learning_rate": 6.277581251065216e-07, "loss": 0.00010743318125605583, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2131, "train_speed(iter/s)": 0.022744 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 80.19792032241821, "completions/min_length": 33.625, "epoch": 4.238272524199553, "grad_norm": 0.005420583518167263, "kl": 0.093780517578125, "learning_rate": 6.274530879724467e-07, "loss": 9.378982940688729e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2132, "train_speed(iter/s)": 0.022745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.25, "completions/mean_length": 81.78125238418579, "completions/min_length": 36.0, "epoch": 4.240258128567882, "grad_norm": 0.02458503872892915, "kl": 0.14434814453125, "learning_rate": 6.271480000965753e-07, "loss": 0.0001443479413865134, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2133, "train_speed(iter/s)": 0.022745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.25, "completions/mean_length": 71.72916841506958, "completions/min_length": 36.125, "epoch": 4.242243732936212, "grad_norm": 0.007250340908846005, "kl": 0.11737060546875, "learning_rate": 6.268428616003692e-07, "loss": 0.00011734977306332439, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2134, "train_speed(iter/s)": 0.022745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.625, "completions/mean_length": 70.00000238418579, "completions/min_length": 34.875, "epoch": 4.244229337304542, "grad_norm": 0.005096121448354268, "kl": 0.11724853515625, "learning_rate": 6.265376726053106e-07, "loss": 0.00011712061677826568, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2135, "train_speed(iter/s)": 0.022747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 77.54166889190674, "completions/min_length": 33.375, "epoch": 4.246214941672871, "grad_norm": 1.0925840630045296, "kl": 0.1119384765625, "learning_rate": 6.262324332329017e-07, "loss": 0.010600554756820202, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2136, "train_speed(iter/s)": 0.022747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.625, "completions/mean_length": 69.0416693687439, "completions/min_length": 36.875, "epoch": 4.248200546041201, "grad_norm": 0.02270641123003356, "kl": 0.12701416015625, "learning_rate": 6.25927143604665e-07, "loss": 0.00012672273442149162, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2137, "train_speed(iter/s)": 0.022748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.375, "completions/mean_length": 71.14583587646484, "completions/min_length": 30.375, "epoch": 4.250186150409531, "grad_norm": 0.0073331646430859435, "kl": 0.12554931640625, "learning_rate": 6.256218038421427e-07, "loss": 0.00012549271923489869, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2138, "train_speed(iter/s)": 0.022749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 73.56250190734863, "completions/min_length": 32.125, "epoch": 4.25217175477786, "grad_norm": 1.214939233500986, "kl": 0.12042236328125, "learning_rate": 6.253164140668969e-07, "loss": -0.010047522373497486, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2139, "train_speed(iter/s)": 0.022749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 71.86458539962769, "completions/min_length": 32.625, "epoch": 4.25415735914619, "grad_norm": 0.006206648688445081, "kl": 0.11810302734375, "learning_rate": 6.250109744005099e-07, "loss": 0.00011817819904536009, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2140, "train_speed(iter/s)": 0.022749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 73.63541841506958, "completions/min_length": 35.625, "epoch": 4.256142963514519, "grad_norm": 0.006382881391492027, "kl": 0.1397705078125, "learning_rate": 6.247054849645841e-07, "loss": 0.0001397307205479592, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2141, "train_speed(iter/s)": 0.022749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 79.93750286102295, "completions/min_length": 34.75, "epoch": 4.258128567882849, "grad_norm": 0.0051710882284947335, "kl": 0.10797119140625, "learning_rate": 6.24399945880741e-07, "loss": 0.00010794639092637226, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2142, "train_speed(iter/s)": 0.022749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 75.43750095367432, "completions/min_length": 36.75, "epoch": 4.260114172251179, "grad_norm": 0.005328129405991408, "kl": 0.1298828125, "learning_rate": 6.240943572706222e-07, "loss": 0.0001298969582421705, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2143, "train_speed(iter/s)": 0.022749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 72.34375190734863, "completions/min_length": 32.125, "epoch": 4.262099776619508, "grad_norm": 0.011909797426491574, "kl": 0.123291015625, "learning_rate": 6.237887192558893e-07, "loss": 0.00012328616867307574, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2144, "train_speed(iter/s)": 0.022748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.625, "completions/mean_length": 81.79166984558105, "completions/min_length": 37.875, "epoch": 4.264085380987838, "grad_norm": 0.8580151172022481, "kl": 0.11370849609375, "learning_rate": 6.234830319582232e-07, "loss": 0.010421425104141235, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2145, "train_speed(iter/s)": 0.022747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.875, "completions/mean_length": 79.67708492279053, "completions/min_length": 40.125, "epoch": 4.266070985356167, "grad_norm": 0.0067696176604222, "kl": 0.1102294921875, "learning_rate": 6.231772954993244e-07, "loss": 0.00011016390635631979, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2146, "train_speed(iter/s)": 0.022747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.875, "completions/mean_length": 77.5416693687439, "completions/min_length": 39.125, "epoch": 4.268056589724497, "grad_norm": 0.007469727195422004, "kl": 0.1107177734375, "learning_rate": 6.228715100009134e-07, "loss": 0.00011065860599046573, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2147, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.375, "completions/mean_length": 76.95833492279053, "completions/min_length": 37.0, "epoch": 4.270042194092827, "grad_norm": 1.4679923823335637, "kl": 0.113037109375, "learning_rate": 6.225656755847297e-07, "loss": 0.00011307001113891602, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2148, "train_speed(iter/s)": 0.022747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 77.08333587646484, "completions/min_length": 34.75, "epoch": 4.272027798461156, "grad_norm": 0.3427710252940932, "kl": 0.26995849609375, "learning_rate": 6.222597923725326e-07, "loss": 0.0002700219047255814, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2149, "train_speed(iter/s)": 0.022745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 86.68750286102295, "completions/min_length": 38.0, "epoch": 4.274013402829486, "grad_norm": 0.006710652229371557, "kl": 0.1202392578125, "learning_rate": 6.219538604861008e-07, "loss": 0.00012040885485475883, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2150, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.375, "completions/mean_length": 80.28125286102295, "completions/min_length": 35.25, "epoch": 4.275999007197816, "grad_norm": 0.006281563695797398, "kl": 0.09869384765625, "learning_rate": 6.216478800472323e-07, "loss": 9.877184493234381e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2151, "train_speed(iter/s)": 0.022745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 70.72916889190674, "completions/min_length": 39.875, "epoch": 4.277984611566145, "grad_norm": 0.007604098408230099, "kl": 0.11285400390625, "learning_rate": 6.213418511777444e-07, "loss": 0.00011290128895780072, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2152, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 73.31250095367432, "completions/min_length": 32.375, "epoch": 4.279970215934475, "grad_norm": 0.00611997089867395, "kl": 0.09368896484375, "learning_rate": 6.210357739994736e-07, "loss": 9.357830276712775e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2153, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.125, "completions/mean_length": 83.48958587646484, "completions/min_length": 37.875, "epoch": 4.281955820302804, "grad_norm": 0.005505165867505372, "kl": 0.1214599609375, "learning_rate": 6.207296486342762e-07, "loss": 0.00012154154683230445, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2154, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.125, "completions/mean_length": 80.89583492279053, "completions/min_length": 33.75, "epoch": 4.283941424671134, "grad_norm": 0.005343126484922576, "kl": 0.11297607421875, "learning_rate": 6.204234752040267e-07, "loss": 0.00011293106945231557, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2155, "train_speed(iter/s)": 0.022747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.625, "completions/mean_length": 70.26041889190674, "completions/min_length": 35.5, "epoch": 4.285927029039464, "grad_norm": 0.005165625719855671, "kl": 0.10089111328125, "learning_rate": 6.201172538306197e-07, "loss": 0.00010086174006573856, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2156, "train_speed(iter/s)": 0.022748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.375, "completions/mean_length": 84.13541841506958, "completions/min_length": 39.625, "epoch": 4.287912633407793, "grad_norm": 0.005232846227203835, "kl": 0.11505126953125, "learning_rate": 6.198109846359681e-07, "loss": 0.00011489923053886741, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2157, "train_speed(iter/s)": 0.022748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.25, "completions/mean_length": 84.35416984558105, "completions/min_length": 32.625, "epoch": 4.289898237776123, "grad_norm": 1.2383161245242642, "kl": 0.12127685546875, "learning_rate": 6.195046677420046e-07, "loss": 0.009707589633762836, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2158, "train_speed(iter/s)": 0.022747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 72.54166889190674, "completions/min_length": 34.625, "epoch": 4.291883842144452, "grad_norm": 0.004348617019701289, "kl": 0.0947265625, "learning_rate": 6.191983032706802e-07, "loss": 9.466991468798369e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2159, "train_speed(iter/s)": 0.022748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 75.84375190734863, "completions/min_length": 29.875, "epoch": 4.293869446512782, "grad_norm": 0.004859036025950963, "kl": 0.115081787109375, "learning_rate": 6.188918913439654e-07, "loss": 0.00011508363240864128, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2160, "train_speed(iter/s)": 0.022749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.0, "completions/mean_length": 74.77083492279053, "completions/min_length": 35.625, "epoch": 4.295855050881112, "grad_norm": 0.020921215540191326, "kl": 0.108001708984375, "learning_rate": 6.18585432083849e-07, "loss": 0.00010808964725583792, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2161, "train_speed(iter/s)": 0.022748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.25, "completions/mean_length": 83.4479193687439, "completions/min_length": 39.125, "epoch": 4.297840655249441, "grad_norm": 1.266218071061667, "kl": 0.102294921875, "learning_rate": 6.182789256123392e-07, "loss": -0.00463305227458477, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2162, "train_speed(iter/s)": 0.022747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 82.32291889190674, "completions/min_length": 38.0, "epoch": 4.299826259617771, "grad_norm": 0.007097711139553494, "kl": 0.13116455078125, "learning_rate": 6.179723720514628e-07, "loss": 0.00013098123599775136, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2163, "train_speed(iter/s)": 0.022747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.125, "completions/mean_length": 83.63541984558105, "completions/min_length": 41.5, "epoch": 4.301811863986101, "grad_norm": 2.1056242944638575, "kl": 0.1376953125, "learning_rate": 6.176657715232653e-07, "loss": 0.0019861895125359297, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2164, "train_speed(iter/s)": 0.022746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 87.53125190734863, "completions/min_length": 38.75, "epoch": 4.30379746835443, "grad_norm": 0.004936754792153554, "kl": 0.1077880859375, "learning_rate": 6.173591241498108e-07, "loss": 0.00010781047603813931, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2165, "train_speed(iter/s)": 0.022745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.25, "completions/mean_length": 81.70833683013916, "completions/min_length": 36.0, "epoch": 4.30578307272276, "grad_norm": 1.9775669128117377, "kl": 0.10980224609375, "learning_rate": 6.170524300531822e-07, "loss": -0.022212138399481773, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.057790376245975494, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2166, "train_speed(iter/s)": 0.022744 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.375, "completions/mean_length": 69.27083539962769, "completions/min_length": 32.0, "epoch": 4.307768677091089, "grad_norm": 0.0065041533823454786, "kl": 0.10552978515625, "learning_rate": 6.167456893554811e-07, "loss": 0.0001056869950843975, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2167, "train_speed(iter/s)": 0.022743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 73.82291793823242, "completions/min_length": 29.75, "epoch": 4.309754281459419, "grad_norm": 0.005296944146499002, "kl": 0.09259033203125, "learning_rate": 6.164389021788274e-07, "loss": 9.261623199563473e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2168, "train_speed(iter/s)": 0.022743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 76.63541984558105, "completions/min_length": 35.25, "epoch": 4.311739885827749, "grad_norm": 0.8327864291520056, "kl": 0.13287353515625, "learning_rate": 6.161320686453597e-07, "loss": -0.006725304760038853, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2169, "train_speed(iter/s)": 0.022743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.125, "completions/mean_length": 64.06250095367432, "completions/min_length": 30.5, "epoch": 4.313725490196078, "grad_norm": 1.4972505990412106, "kl": 0.106475830078125, "learning_rate": 6.158251888772349e-07, "loss": 0.0009859844576567411, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2170, "train_speed(iter/s)": 0.022743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.625, "completions/mean_length": 66.55208587646484, "completions/min_length": 34.0, "epoch": 4.315711094564408, "grad_norm": 0.009045672936514327, "kl": 0.0880126953125, "learning_rate": 6.155182629966284e-07, "loss": 8.795078611001372e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2171, "train_speed(iter/s)": 0.022743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 71.94791984558105, "completions/min_length": 34.125, "epoch": 4.317696698932737, "grad_norm": 0.19352634896198373, "kl": 0.29296875, "learning_rate": 6.152112911257341e-07, "loss": 0.0002925730077549815, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2172, "train_speed(iter/s)": 0.022743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.375, "completions/mean_length": 73.36458539962769, "completions/min_length": 34.375, "epoch": 4.319682303301067, "grad_norm": 0.004952008794087204, "kl": 0.094696044921875, "learning_rate": 6.149042733867638e-07, "loss": 9.47610751609318e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2173, "train_speed(iter/s)": 0.022742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 72.16666889190674, "completions/min_length": 32.375, "epoch": 4.321667907669397, "grad_norm": 0.010858264860695637, "kl": 0.1270751953125, "learning_rate": 6.145972099019482e-07, "loss": 0.00012701982632279396, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2174, "train_speed(iter/s)": 0.022743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 79.95833587646484, "completions/min_length": 37.25, "epoch": 4.323653512037726, "grad_norm": 0.004800592752618598, "kl": 0.11627197265625, "learning_rate": 6.142901007935354e-07, "loss": 0.00011625223851297051, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2175, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.875, "completions/mean_length": 74.98958492279053, "completions/min_length": 39.875, "epoch": 4.325639116406056, "grad_norm": 0.009943999205471144, "kl": 0.132171630859375, "learning_rate": 6.139829461837923e-07, "loss": 0.00013222289271652699, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2176, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 235.875, "completions/mean_length": 79.98958587646484, "completions/min_length": 35.75, "epoch": 4.327624720774386, "grad_norm": 0.5031584637934956, "kl": 0.10137939453125, "learning_rate": 6.136757461950038e-07, "loss": 0.018761513754725456, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 2177, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 69.56250238418579, "completions/min_length": 35.625, "epoch": 4.329610325142715, "grad_norm": 0.005095083675958716, "kl": 0.13433837890625, "learning_rate": 6.133685009494727e-07, "loss": 0.00013436871813610196, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2178, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 70.40625190734863, "completions/min_length": 33.125, "epoch": 4.331595929511045, "grad_norm": 0.005527743852791901, "kl": 0.089111328125, "learning_rate": 6.130612105695198e-07, "loss": 8.904706191970035e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2179, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 72.85416793823242, "completions/min_length": 35.125, "epoch": 4.333581533879374, "grad_norm": 0.015451175902975043, "kl": 0.14697265625, "learning_rate": 6.127538751774838e-07, "loss": 0.00014691613614559174, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2180, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 76.89583396911621, "completions/min_length": 29.5, "epoch": 4.335567138247704, "grad_norm": 0.005606252675714691, "kl": 0.09564208984375, "learning_rate": 6.12446494895722e-07, "loss": 9.575676813255996e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2181, "train_speed(iter/s)": 0.022737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.5, "completions/mean_length": 63.67708492279053, "completions/min_length": 31.75, "epoch": 4.337552742616034, "grad_norm": 0.005841399969538675, "kl": 0.09710693359375, "learning_rate": 6.12139069846609e-07, "loss": 9.709967707749456e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2182, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 70.81250143051147, "completions/min_length": 31.375, "epoch": 4.339538346984363, "grad_norm": 0.0052388760696236505, "kl": 0.10931396484375, "learning_rate": 6.118316001525367e-07, "loss": 0.0001094454200938344, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2183, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 73.40625190734863, "completions/min_length": 35.0, "epoch": 4.341523951352693, "grad_norm": 0.007631662816604879, "kl": 0.12664794921875, "learning_rate": 6.115240859359158e-07, "loss": 0.00012666269321925938, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2184, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.5, "completions/mean_length": 70.22916793823242, "completions/min_length": 35.25, "epoch": 4.343509555721022, "grad_norm": 2.353035239427212, "kl": 0.1259765625, "learning_rate": 6.112165273191743e-07, "loss": 0.01087275892496109, "memory(GiB)": 94.21, "reward": 1.8854166865348816, "reward_std": 0.0900652389973402, "rewards/CineAccuracyORM/mean": 0.885416679084301, "rewards/CineAccuracyORM/std": 0.17456800863146782, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2185, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.875, "completions/mean_length": 75.35416841506958, "completions/min_length": 35.875, "epoch": 4.345495160089352, "grad_norm": 0.0068809409178183486, "kl": 0.12139892578125, "learning_rate": 6.109089244247576e-07, "loss": 0.00012130946561228484, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2186, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 74.15625286102295, "completions/min_length": 35.375, "epoch": 4.347480764457682, "grad_norm": 1.5008723837449958, "kl": 0.13409423828125, "learning_rate": 6.106012773751292e-07, "loss": 0.0020405042450875044, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666679084301, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2187, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.625, "completions/mean_length": 67.92708587646484, "completions/min_length": 32.5, "epoch": 4.349466368826011, "grad_norm": 0.006215444967002574, "kl": 0.10614013671875, "learning_rate": 6.102935862927699e-07, "loss": 0.00010628569725668058, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2188, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 63.21875238418579, "completions/min_length": 28.375, "epoch": 4.351451973194341, "grad_norm": 0.005864865024531859, "kl": 0.08837890625, "learning_rate": 6.099858513001781e-07, "loss": 8.834658365231007e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2189, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.375, "completions/mean_length": 78.22916841506958, "completions/min_length": 36.625, "epoch": 4.353437577562671, "grad_norm": 0.011949924863843985, "kl": 0.12762451171875, "learning_rate": 6.096780725198696e-07, "loss": 0.0001275732065550983, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2190, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 73.64583539962769, "completions/min_length": 35.5, "epoch": 4.355423181931, "grad_norm": 0.7898535902942363, "kl": 0.11663818359375, "learning_rate": 6.093702500743777e-07, "loss": -0.008968975394964218, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2191, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 77.37500238418579, "completions/min_length": 34.875, "epoch": 4.35740878629933, "grad_norm": 0.005929511737035633, "kl": 0.113525390625, "learning_rate": 6.090623840862532e-07, "loss": 0.00011332183930790052, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2192, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 70.40625143051147, "completions/min_length": 38.75, "epoch": 4.359394390667659, "grad_norm": 0.006608199745306608, "kl": 0.09600830078125, "learning_rate": 6.087544746780642e-07, "loss": 9.602106729289517e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2193, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.875, "completions/mean_length": 64.60416793823242, "completions/min_length": 33.625, "epoch": 4.361379995035989, "grad_norm": 0.0075261420942403555, "kl": 0.09661865234375, "learning_rate": 6.084465219723958e-07, "loss": 9.656776092015207e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2194, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 73.09375286102295, "completions/min_length": 33.5, "epoch": 4.363365599404319, "grad_norm": 0.007139100938433291, "kl": 0.1239013671875, "learning_rate": 6.081385260918506e-07, "loss": 0.00012366512964945287, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2195, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 76.56250286102295, "completions/min_length": 33.0, "epoch": 4.365351203772648, "grad_norm": 0.007515732848141562, "kl": 0.14251708984375, "learning_rate": 6.078304871590484e-07, "loss": 0.00014258341980166733, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2196, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 63.343751430511475, "completions/min_length": 28.875, "epoch": 4.367336808140978, "grad_norm": 0.012154051616345378, "kl": 0.1004638671875, "learning_rate": 6.07522405296626e-07, "loss": 0.00010032587306341156, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2197, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 78.87500238418579, "completions/min_length": 36.25, "epoch": 4.369322412509307, "grad_norm": 0.006292582384205913, "kl": 0.1156005859375, "learning_rate": 6.072142806272375e-07, "loss": 0.00011555859236977994, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2198, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 74.62500286102295, "completions/min_length": 34.75, "epoch": 4.371308016877637, "grad_norm": 1.0412706249333372, "kl": 0.11749267578125, "learning_rate": 6.069061132735539e-07, "loss": 0.012513134628534317, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2199, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.875, "completions/mean_length": 71.35416889190674, "completions/min_length": 30.375, "epoch": 4.373293621245967, "grad_norm": 2.327396643946038, "kl": 0.1337890625, "learning_rate": 6.065979033582631e-07, "loss": 0.007141719572246075, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166669771075, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2200, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.0, "completions/mean_length": 74.32291793823242, "completions/min_length": 34.375, "epoch": 4.375279225614296, "grad_norm": 0.057880779564860686, "kl": 0.21368408203125, "learning_rate": 6.0628965100407e-07, "loss": 0.00021348870359361172, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2201, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 70.50000333786011, "completions/min_length": 34.75, "epoch": 4.377264829982626, "grad_norm": 0.008269238724736299, "kl": 0.10009765625, "learning_rate": 6.059813563336966e-07, "loss": 0.0001000729389488697, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2202, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.125, "completions/mean_length": 64.00000047683716, "completions/min_length": 30.875, "epoch": 4.379250434350956, "grad_norm": 0.008295183069219449, "kl": 0.111572265625, "learning_rate": 6.056730194698816e-07, "loss": 0.00011152803199365735, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2203, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 73.77083683013916, "completions/min_length": 33.125, "epoch": 4.381236038719285, "grad_norm": 0.006837591722160979, "kl": 0.11505126953125, "learning_rate": 6.053646405353803e-07, "loss": 0.00011512891796883196, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2204, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.625, "completions/mean_length": 75.31250190734863, "completions/min_length": 32.375, "epoch": 4.383221643087615, "grad_norm": 0.1702190118078794, "kl": 0.17474365234375, "learning_rate": 6.050562196529651e-07, "loss": 0.00017487231525592506, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2205, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.5, "completions/mean_length": 66.10416793823242, "completions/min_length": 32.125, "epoch": 4.385207247455944, "grad_norm": 3.9327445203665405, "kl": 0.11376953125, "learning_rate": 6.047477569454251e-07, "loss": 0.002327452879399061, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2206, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/mean_length": 62.31250190734863, "completions/min_length": 30.875, "epoch": 4.387192851824274, "grad_norm": 0.0075972750118719476, "kl": 0.10369873046875, "learning_rate": 6.044392525355655e-07, "loss": 0.00010372063843533397, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2207, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.625, "completions/mean_length": 81.6354193687439, "completions/min_length": 38.5, "epoch": 4.389178456192604, "grad_norm": 0.006659378555092665, "kl": 0.11602783203125, "learning_rate": 6.041307065462086e-07, "loss": 0.00011604859901126474, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2208, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 64.63541889190674, "completions/min_length": 30.875, "epoch": 4.391164060560933, "grad_norm": 0.007547120112482382, "kl": 0.12982177734375, "learning_rate": 6.038221191001934e-07, "loss": 0.000129716529045254, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2209, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 72.02083539962769, "completions/min_length": 34.0, "epoch": 4.393149664929263, "grad_norm": 0.006123245332568914, "kl": 0.12371826171875, "learning_rate": 6.03513490320375e-07, "loss": 0.0001235016097780317, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2210, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.25, "completions/mean_length": 61.45833444595337, "completions/min_length": 34.375, "epoch": 4.395135269297592, "grad_norm": 0.007131661201739009, "kl": 0.1190185546875, "learning_rate": 6.03204820329625e-07, "loss": 0.00011896588694071397, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2211, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 75.73958587646484, "completions/min_length": 34.5, "epoch": 4.397120873665922, "grad_norm": 0.005671645655020672, "kl": 0.11016845703125, "learning_rate": 6.028961092508318e-07, "loss": 0.00011019622616004199, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2212, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.125, "completions/mean_length": 61.82291889190674, "completions/min_length": 32.625, "epoch": 4.399106478034252, "grad_norm": 0.0061670904229082875, "kl": 0.097076416015625, "learning_rate": 6.025873572068996e-07, "loss": 9.709945879876614e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2213, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.125, "completions/mean_length": 70.97916984558105, "completions/min_length": 33.5, "epoch": 4.401092082402581, "grad_norm": 0.005323586639017825, "kl": 0.1224365234375, "learning_rate": 6.022785643207494e-07, "loss": 0.0001225903833983466, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2214, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 65.90625190734863, "completions/min_length": 33.625, "epoch": 4.403077686770911, "grad_norm": 0.960801187850006, "kl": 0.129150390625, "learning_rate": 6.019697307153179e-07, "loss": -0.0025950162671506405, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2215, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 68.67708444595337, "completions/min_length": 34.0, "epoch": 4.405063291139241, "grad_norm": 0.005329605907190612, "kl": 0.10321044921875, "learning_rate": 6.016608565135587e-07, "loss": 0.0001033106236718595, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2216, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 68.47916889190674, "completions/min_length": 33.5, "epoch": 4.40704889550757, "grad_norm": 0.007051404309491915, "kl": 0.102294921875, "learning_rate": 6.013519418384411e-07, "loss": 0.00010225444566458464, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2217, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 79.88541889190674, "completions/min_length": 34.625, "epoch": 4.4090344998759, "grad_norm": 0.006658809598769112, "kl": 0.13885498046875, "learning_rate": 6.010429868129506e-07, "loss": 0.0001388160017086193, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2218, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 76.05208587646484, "completions/min_length": 37.25, "epoch": 4.411020104244229, "grad_norm": 0.005511796052147891, "kl": 0.12298583984375, "learning_rate": 6.007339915600889e-07, "loss": 0.00012317602522671223, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2219, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 69.71875095367432, "completions/min_length": 33.0, "epoch": 4.413005708612559, "grad_norm": 1.798769509700025, "kl": 0.1224365234375, "learning_rate": 6.004249562028734e-07, "loss": 0.004593212157487869, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.3624799847602844, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2220, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.625, "completions/mean_length": 78.8541693687439, "completions/min_length": 34.375, "epoch": 4.414991312980889, "grad_norm": 0.00873641520187544, "kl": 0.124267578125, "learning_rate": 6.001158808643378e-07, "loss": 0.0001242886937689036, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2221, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 71.020836353302, "completions/min_length": 32.25, "epoch": 4.416976917349218, "grad_norm": 0.008128993079254675, "kl": 0.13153076171875, "learning_rate": 5.998067656675318e-07, "loss": 0.00013156019849702716, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2222, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.625, "completions/mean_length": 75.81250190734863, "completions/min_length": 32.5, "epoch": 4.418962521717548, "grad_norm": 0.005167928035830761, "kl": 0.1212158203125, "learning_rate": 5.994976107355204e-07, "loss": 0.00012118794256821275, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2223, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.875, "completions/mean_length": 68.00000190734863, "completions/min_length": 33.375, "epoch": 4.420948126085877, "grad_norm": 1.3517077603265466, "kl": 0.11785888671875, "learning_rate": 5.991884161913849e-07, "loss": -0.0055998824536800385, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166669771075, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2224, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.375, "completions/mean_length": 68.70833492279053, "completions/min_length": 34.25, "epoch": 4.422933730454207, "grad_norm": 0.008277399572450427, "kl": 0.1094970703125, "learning_rate": 5.988791821582223e-07, "loss": 0.00010955524339806288, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2225, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 72.78125143051147, "completions/min_length": 35.5, "epoch": 4.424919334822537, "grad_norm": 2.1676799538672005, "kl": 0.108642578125, "learning_rate": 5.985699087591455e-07, "loss": 0.0005191924865357578, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2226, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 76.11458587646484, "completions/min_length": 33.375, "epoch": 4.426904939190866, "grad_norm": 0.010581434453325027, "kl": 0.11492919921875, "learning_rate": 5.982605961172826e-07, "loss": 0.00011495671787997708, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2227, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 77.28125190734863, "completions/min_length": 39.5, "epoch": 4.428890543559196, "grad_norm": 0.014829442732975697, "kl": 0.14886474609375, "learning_rate": 5.979512443557773e-07, "loss": 0.00014887719589751214, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2228, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 80.87500286102295, "completions/min_length": 35.875, "epoch": 4.430876147927526, "grad_norm": 0.006505129416172592, "kl": 0.10479736328125, "learning_rate": 5.976418535977895e-07, "loss": 0.00010468787513673306, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2229, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 69.59375190734863, "completions/min_length": 32.625, "epoch": 4.432861752295855, "grad_norm": 0.00908326855435322, "kl": 0.10736083984375, "learning_rate": 5.973324239664943e-07, "loss": 0.00010746221232693642, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2230, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 78.5729193687439, "completions/min_length": 32.875, "epoch": 4.434847356664185, "grad_norm": 0.14447759008994532, "kl": 0.1588134765625, "learning_rate": 5.970229555850823e-07, "loss": 0.0001588661689311266, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2231, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.875, "completions/mean_length": 69.38541841506958, "completions/min_length": 32.625, "epoch": 4.436832961032514, "grad_norm": 0.004345346713983905, "kl": 0.094024658203125, "learning_rate": 5.96713448576759e-07, "loss": 9.405636228621006e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2232, "train_speed(iter/s)": 0.022742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.25, "completions/mean_length": 80.56250286102295, "completions/min_length": 41.25, "epoch": 4.438818565400844, "grad_norm": 0.007195054618359823, "kl": 0.10919189453125, "learning_rate": 5.964039030647463e-07, "loss": 0.00010911936260527, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2233, "train_speed(iter/s)": 0.022743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 79.96875190734863, "completions/min_length": 38.0, "epoch": 4.440804169769174, "grad_norm": 0.7738683864853091, "kl": 0.12646484375, "learning_rate": 5.960943191722806e-07, "loss": -0.001549186883494258, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2234, "train_speed(iter/s)": 0.022743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.375, "completions/mean_length": 85.32291984558105, "completions/min_length": 41.625, "epoch": 4.442789774137503, "grad_norm": 0.005536912862824791, "kl": 0.119415283203125, "learning_rate": 5.957846970226139e-07, "loss": 0.00011937151430174708, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2235, "train_speed(iter/s)": 0.022744 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 71.31250286102295, "completions/min_length": 31.625, "epoch": 4.444775378505833, "grad_norm": 0.006703990839594633, "kl": 0.093994140625, "learning_rate": 5.954750367390133e-07, "loss": 9.39558885875158e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2236, "train_speed(iter/s)": 0.022743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 78.05208492279053, "completions/min_length": 38.25, "epoch": 4.446760982874162, "grad_norm": 1.131962043629531, "kl": 0.115234375, "learning_rate": 5.951653384447614e-07, "loss": -0.002550897654145956, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2237, "train_speed(iter/s)": 0.022744 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.125, "completions/mean_length": 77.97917079925537, "completions/min_length": 32.875, "epoch": 4.448746587242492, "grad_norm": 0.0051354552749809145, "kl": 0.0958251953125, "learning_rate": 5.948556022631556e-07, "loss": 9.585735824657604e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2238, "train_speed(iter/s)": 0.022744 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.125, "completions/mean_length": 86.14583587646484, "completions/min_length": 39.25, "epoch": 4.450732191610822, "grad_norm": 0.004362142424286791, "kl": 0.103515625, "learning_rate": 5.945458283175084e-07, "loss": 0.00010353367542847991, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2239, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 72.92708492279053, "completions/min_length": 33.625, "epoch": 4.452717795979151, "grad_norm": 0.006494140007703258, "kl": 0.115478515625, "learning_rate": 5.942360167311476e-07, "loss": 0.00011557410471141338, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2240, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 75.46875286102295, "completions/min_length": 32.125, "epoch": 4.454703400347481, "grad_norm": 0.005789564642703661, "kl": 0.108795166015625, "learning_rate": 5.939261676274155e-07, "loss": 0.00010869429388549179, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2241, "train_speed(iter/s)": 0.022742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.75, "completions/mean_length": 88.41666793823242, "completions/min_length": 39.625, "epoch": 4.456689004715811, "grad_norm": 1.2740001698366927, "kl": 0.09259033203125, "learning_rate": 5.936162811296699e-07, "loss": 0.007235649041831493, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2242, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.5, "completions/mean_length": 67.46875143051147, "completions/min_length": 30.625, "epoch": 4.45867460908414, "grad_norm": 0.005501138745148115, "kl": 0.1058349609375, "learning_rate": 5.933063573612835e-07, "loss": 0.00010583333641989157, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2243, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 71.65625333786011, "completions/min_length": 32.125, "epoch": 4.46066021345247, "grad_norm": 0.0052792073570150375, "kl": 0.09326171875, "learning_rate": 5.929963964456429e-07, "loss": 9.329054591944441e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2244, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.25, "completions/mean_length": 65.1979193687439, "completions/min_length": 35.375, "epoch": 4.462645817820799, "grad_norm": 1.5023405885701933, "kl": 0.670166015625, "learning_rate": 5.926863985061506e-07, "loss": 0.0006756539805792272, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2245, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.25, "completions/mean_length": 82.18750095367432, "completions/min_length": 41.875, "epoch": 4.464631422189129, "grad_norm": 0.005549182014369176, "kl": 0.095733642578125, "learning_rate": 5.923763636662233e-07, "loss": 9.578568278811872e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2246, "train_speed(iter/s)": 0.022742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.25, "completions/mean_length": 65.51041889190674, "completions/min_length": 34.0, "epoch": 4.466617026557459, "grad_norm": 0.006619573686598099, "kl": 0.07904052734375, "learning_rate": 5.920662920492927e-07, "loss": 7.910047133918852e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2247, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 70.50000190734863, "completions/min_length": 33.375, "epoch": 4.468602630925788, "grad_norm": 0.005926243252638949, "kl": 0.09478759765625, "learning_rate": 5.917561837788045e-07, "loss": 9.47156804613769e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2248, "train_speed(iter/s)": 0.022741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.375, "completions/mean_length": 70.26041889190674, "completions/min_length": 35.875, "epoch": 4.470588235294118, "grad_norm": 0.004213126061681416, "kl": 0.08453369140625, "learning_rate": 5.914460389782198e-07, "loss": 8.450097811874002e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2249, "train_speed(iter/s)": 0.022742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.125, "completions/mean_length": 69.67708492279053, "completions/min_length": 30.375, "epoch": 4.472573839662447, "grad_norm": 0.004192200362034999, "kl": 0.08502197265625, "learning_rate": 5.911358577710137e-07, "loss": 8.49056668812409e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2250, "train_speed(iter/s)": 0.022742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 71.58333587646484, "completions/min_length": 33.0, "epoch": 4.474559444030777, "grad_norm": 0.006390449071373999, "kl": 0.080718994140625, "learning_rate": 5.908256402806761e-07, "loss": 8.078113751253113e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2251, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 81.3854193687439, "completions/min_length": 37.625, "epoch": 4.476545048399107, "grad_norm": 0.005658102287913021, "kl": 0.1036376953125, "learning_rate": 5.90515386630711e-07, "loss": 0.00010348573414376006, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2252, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 65.739586353302, "completions/min_length": 29.125, "epoch": 4.478530652767436, "grad_norm": 0.004735257029860334, "kl": 0.0887451171875, "learning_rate": 5.90205096944637e-07, "loss": 8.882739348337054e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2253, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.375, "completions/mean_length": 83.23958539962769, "completions/min_length": 34.5, "epoch": 4.480516257135766, "grad_norm": 0.005777942210744048, "kl": 0.123779296875, "learning_rate": 5.898947713459874e-07, "loss": 0.00012394817895255983, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2254, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.375, "completions/mean_length": 74.677086353302, "completions/min_length": 27.375, "epoch": 4.482501861504096, "grad_norm": 0.005132669729563733, "kl": 0.08868408203125, "learning_rate": 5.895844099583093e-07, "loss": 8.870419696904719e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2255, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.625, "completions/mean_length": 73.89583587646484, "completions/min_length": 36.75, "epoch": 4.484487465872425, "grad_norm": 0.004848555611705649, "kl": 0.0904541015625, "learning_rate": 5.892740129051637e-07, "loss": 9.037971904035658e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2256, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 71.29166841506958, "completions/min_length": 32.75, "epoch": 4.486473070240755, "grad_norm": 0.00417805260128553, "kl": 0.10064697265625, "learning_rate": 5.88963580310127e-07, "loss": 0.00010073679004563019, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2257, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 80.81250190734863, "completions/min_length": 41.375, "epoch": 4.488458674609084, "grad_norm": 0.004026284028419137, "kl": 0.10394287109375, "learning_rate": 5.886531122967888e-07, "loss": 0.00010382196342106909, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2258, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 78.40625238418579, "completions/min_length": 31.25, "epoch": 4.490444278977414, "grad_norm": 0.0033095778525263505, "kl": 0.0966796875, "learning_rate": 5.883426089887531e-07, "loss": 9.674718603491783e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2259, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.375, "completions/mean_length": 77.65625143051147, "completions/min_length": 31.0, "epoch": 4.492429883345744, "grad_norm": 0.005435685847843289, "kl": 0.11260986328125, "learning_rate": 5.880320705096376e-07, "loss": 0.00011271526454947889, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2260, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.375, "completions/mean_length": 76.802086353302, "completions/min_length": 32.75, "epoch": 4.494415487714073, "grad_norm": 0.003927775994213521, "kl": 0.117889404296875, "learning_rate": 5.877214969830745e-07, "loss": 0.00011776182509493083, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2261, "train_speed(iter/s)": 0.02274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.125, "completions/mean_length": 83.16666984558105, "completions/min_length": 31.75, "epoch": 4.496401092082403, "grad_norm": 0.00425138432199738, "kl": 0.09832763671875, "learning_rate": 5.874108885327098e-07, "loss": 9.834981756284833e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2262, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 79.96875190734863, "completions/min_length": 29.625, "epoch": 4.498386696450732, "grad_norm": 1.4847891524536354, "kl": 0.09027099609375, "learning_rate": 5.871002452822033e-07, "loss": -0.010531831532716751, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2263, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 75.64583587646484, "completions/min_length": 31.25, "epoch": 4.500372300819062, "grad_norm": 0.008389382738686307, "kl": 0.09576416015625, "learning_rate": 5.867895673552288e-07, "loss": 9.583967039361596e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2264, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.625, "completions/mean_length": 73.27083587646484, "completions/min_length": 37.625, "epoch": 4.502357905187392, "grad_norm": 0.003872977791608026, "kl": 0.091400146484375, "learning_rate": 5.864788548754737e-07, "loss": 9.143753413809463e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2265, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 72.62500190734863, "completions/min_length": 34.125, "epoch": 4.504343509555721, "grad_norm": 0.005363005900148331, "kl": 0.098846435546875, "learning_rate": 5.861681079666394e-07, "loss": 9.892591333482414e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2266, "train_speed(iter/s)": 0.022739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 74.21875286102295, "completions/min_length": 30.75, "epoch": 4.506329113924051, "grad_norm": 0.012140812564650938, "kl": 0.101165771484375, "learning_rate": 5.858573267524408e-07, "loss": 0.00010126647248398513, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2267, "train_speed(iter/s)": 0.022738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.875, "completions/mean_length": 83.62500333786011, "completions/min_length": 34.0, "epoch": 4.508314718292381, "grad_norm": 0.008323405110357944, "kl": 0.11407470703125, "learning_rate": 5.855465113566065e-07, "loss": 0.00011406631529098377, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2268, "train_speed(iter/s)": 0.022737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 82.34375190734863, "completions/min_length": 36.375, "epoch": 4.51030032266071, "grad_norm": 0.004265793745991542, "kl": 0.092498779296875, "learning_rate": 5.852356619028789e-07, "loss": 9.246898116543889e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2269, "train_speed(iter/s)": 0.022735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.375, "completions/mean_length": 84.97916793823242, "completions/min_length": 36.125, "epoch": 4.51228592702904, "grad_norm": 0.004924395643969063, "kl": 0.09814453125, "learning_rate": 5.849247785150134e-07, "loss": 9.818821854423732e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2270, "train_speed(iter/s)": 0.022733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 75.60416984558105, "completions/min_length": 33.875, "epoch": 4.514271531397369, "grad_norm": 0.009767516751758824, "kl": 0.10345458984375, "learning_rate": 5.8461386131678e-07, "loss": 0.0001034951419569552, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2271, "train_speed(iter/s)": 0.022732 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.375, "completions/mean_length": 73.52083444595337, "completions/min_length": 33.0, "epoch": 4.516257135765699, "grad_norm": 1.8262905603559427, "kl": 0.103271484375, "learning_rate": 5.84302910431961e-07, "loss": -0.004324705805629492, "memory(GiB)": 94.21, "reward": 1.8437500149011612, "reward_std": 0.05779037997126579, "rewards/CineAccuracyORM/mean": 0.8437500074505806, "rewards/CineAccuracyORM/std": 0.1911909468472004, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2272, "train_speed(iter/s)": 0.022731 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 75.31250238418579, "completions/min_length": 34.875, "epoch": 4.518242740134029, "grad_norm": 1.0578366038951796, "kl": 0.157470703125, "learning_rate": 5.839919259843525e-07, "loss": 0.00038225826574489474, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666679084301, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2273, "train_speed(iter/s)": 0.02273 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.625, "completions/mean_length": 78.39583492279053, "completions/min_length": 27.625, "epoch": 4.520228344502358, "grad_norm": 0.008614658097092684, "kl": 0.10693359375, "learning_rate": 5.836809080977643e-07, "loss": 0.00010691669012885541, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2274, "train_speed(iter/s)": 0.02273 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.875, "completions/mean_length": 78.37500286102295, "completions/min_length": 34.625, "epoch": 4.522213948870688, "grad_norm": 0.003384064155008789, "kl": 0.08953857421875, "learning_rate": 5.833698568960194e-07, "loss": 8.955416706157848e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2275, "train_speed(iter/s)": 0.022729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.625, "completions/mean_length": 80.59375286102295, "completions/min_length": 33.125, "epoch": 4.524199553239017, "grad_norm": 0.004664150600999667, "kl": 0.103271484375, "learning_rate": 5.830587725029537e-07, "loss": 0.00010333849786547944, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2276, "train_speed(iter/s)": 0.022729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 72.73958587646484, "completions/min_length": 34.75, "epoch": 4.526185157607347, "grad_norm": 0.0033492312690766, "kl": 0.0872802734375, "learning_rate": 5.827476550424164e-07, "loss": 8.730647823540494e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2277, "train_speed(iter/s)": 0.02273 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.625, "completions/mean_length": 81.02083587646484, "completions/min_length": 38.875, "epoch": 4.528170761975677, "grad_norm": 0.003882525200665498, "kl": 0.098480224609375, "learning_rate": 5.824365046382702e-07, "loss": 9.85459191724658e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2278, "train_speed(iter/s)": 0.022728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 82.25000190734863, "completions/min_length": 34.125, "epoch": 4.530156366344006, "grad_norm": 0.004690491013353411, "kl": 0.10150146484375, "learning_rate": 5.821253214143908e-07, "loss": 0.00010153650509892032, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2279, "train_speed(iter/s)": 0.022727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 73.54166841506958, "completions/min_length": 34.125, "epoch": 4.532141970712336, "grad_norm": 0.008977938805834819, "kl": 0.0831298828125, "learning_rate": 5.818141054946667e-07, "loss": 8.310612611239776e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2280, "train_speed(iter/s)": 0.022727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.25, "completions/mean_length": 78.51041984558105, "completions/min_length": 29.875, "epoch": 4.5341275750806656, "grad_norm": 0.005656146666333078, "kl": 0.1134033203125, "learning_rate": 5.815028570029998e-07, "loss": 0.00011320726480334997, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2281, "train_speed(iter/s)": 0.022728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.125, "completions/mean_length": 79.70833730697632, "completions/min_length": 33.5, "epoch": 4.536113179448995, "grad_norm": 0.0034398269403313384, "kl": 0.08966064453125, "learning_rate": 5.811915760633046e-07, "loss": 8.962757419794798e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2282, "train_speed(iter/s)": 0.022729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 81.28125238418579, "completions/min_length": 36.75, "epoch": 4.538098783817325, "grad_norm": 1.5436595756455518, "kl": 0.09344482421875, "learning_rate": 5.808802627995089e-07, "loss": -0.01086314208805561, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2283, "train_speed(iter/s)": 0.022728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.5, "completions/mean_length": 93.56250286102295, "completions/min_length": 39.125, "epoch": 4.540084388185654, "grad_norm": 0.0032088076358396497, "kl": 0.092498779296875, "learning_rate": 5.805689173355528e-07, "loss": 9.258277714252472e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2284, "train_speed(iter/s)": 0.022726 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 66.64583444595337, "completions/min_length": 30.375, "epoch": 4.542069992553984, "grad_norm": 0.005530933365593661, "kl": 0.11419677734375, "learning_rate": 5.802575397953899e-07, "loss": 0.00011411268496885896, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2285, "train_speed(iter/s)": 0.022726 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 77.19791984558105, "completions/min_length": 31.5, "epoch": 4.5440555969223135, "grad_norm": 0.005525004061213157, "kl": 0.10479736328125, "learning_rate": 5.79946130302986e-07, "loss": 0.00010479833872523159, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2286, "train_speed(iter/s)": 0.022726 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 75.89583587646484, "completions/min_length": 39.625, "epoch": 4.546041201290643, "grad_norm": 0.7946204126687513, "kl": 0.0855712890625, "learning_rate": 5.796346889823202e-07, "loss": -0.0010799586307257414, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2287, "train_speed(iter/s)": 0.022725 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.875, "completions/mean_length": 82.31250286102295, "completions/min_length": 36.5, "epoch": 4.5480268056589725, "grad_norm": 0.8243358410051134, "kl": 0.102294921875, "learning_rate": 5.793232159573838e-07, "loss": 0.00551933329552412, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2288, "train_speed(iter/s)": 0.022726 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.125, "completions/mean_length": 88.19791841506958, "completions/min_length": 38.5, "epoch": 4.550012410027302, "grad_norm": 1.476136086373933, "kl": 0.10107421875, "learning_rate": 5.790117113521806e-07, "loss": 0.00010109124559676275, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2289, "train_speed(iter/s)": 0.022725 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.375, "completions/mean_length": 82.42708587646484, "completions/min_length": 37.25, "epoch": 4.551998014395632, "grad_norm": 0.005686331306765389, "kl": 0.08880615234375, "learning_rate": 5.787001752907276e-07, "loss": 8.879662345862016e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2290, "train_speed(iter/s)": 0.022724 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 77.19791889190674, "completions/min_length": 32.875, "epoch": 4.5539836187639615, "grad_norm": 1.0548499734212398, "kl": 0.0855712890625, "learning_rate": 5.783886078970537e-07, "loss": 0.0011719726026058197, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2291, "train_speed(iter/s)": 0.022724 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 81.98958492279053, "completions/min_length": 31.5, "epoch": 4.555969223132291, "grad_norm": 0.005153707535255694, "kl": 0.0975341796875, "learning_rate": 5.780770092952009e-07, "loss": 9.75908333202824e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2292, "train_speed(iter/s)": 0.022724 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.5, "completions/mean_length": 75.37500143051147, "completions/min_length": 32.75, "epoch": 4.5579548275006205, "grad_norm": 0.005105565838476203, "kl": 0.085601806640625, "learning_rate": 5.777653796092229e-07, "loss": 8.554181840736419e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2293, "train_speed(iter/s)": 0.022723 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 81.89583683013916, "completions/min_length": 38.625, "epoch": 4.5599404318689505, "grad_norm": 0.004798246380916185, "kl": 0.09674072265625, "learning_rate": 5.774537189631861e-07, "loss": 9.666193363955244e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2294, "train_speed(iter/s)": 0.022724 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 76.28125095367432, "completions/min_length": 32.625, "epoch": 4.5619260362372795, "grad_norm": 0.00572011525467052, "kl": 0.103302001953125, "learning_rate": 5.771420274811696e-07, "loss": 0.00010323635069653392, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2295, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 81.05208587646484, "completions/min_length": 36.5, "epoch": 4.5639116406056095, "grad_norm": 0.004333919524378042, "kl": 0.08770751953125, "learning_rate": 5.768303052872642e-07, "loss": 8.761404023971409e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2296, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 68.39583539962769, "completions/min_length": 25.375, "epoch": 4.565897244973939, "grad_norm": 0.005163773785245674, "kl": 0.08453369140625, "learning_rate": 5.765185525055732e-07, "loss": 8.445871935691684e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2297, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 83.18750286102295, "completions/min_length": 36.875, "epoch": 4.5678828493422685, "grad_norm": 0.0036358926877679476, "kl": 0.08697509765625, "learning_rate": 5.762067692602119e-07, "loss": 8.697862358530983e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2298, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 72.37500190734863, "completions/min_length": 31.25, "epoch": 4.5698684537105985, "grad_norm": 0.00606025448858362, "kl": 0.092010498046875, "learning_rate": 5.758949556753082e-07, "loss": 9.191343997372314e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2299, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 72.41666889190674, "completions/min_length": 31.875, "epoch": 4.5718540580789275, "grad_norm": 1.9573697120281306, "kl": 0.09136962890625, "learning_rate": 5.755831118750015e-07, "loss": -0.0026668086647987366, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.05974818021059036, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.18617857620120049, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2300, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 76.41666841506958, "completions/min_length": 39.75, "epoch": 4.5738396624472575, "grad_norm": 0.006006257641365506, "kl": 0.09698486328125, "learning_rate": 5.752712379834435e-07, "loss": 9.682127711130306e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2301, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.875, "completions/mean_length": 91.87500286102295, "completions/min_length": 38.75, "epoch": 4.5758252668155865, "grad_norm": 0.003058366902914677, "kl": 0.08502197265625, "learning_rate": 5.74959334124798e-07, "loss": 8.493951463606209e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2302, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.375, "completions/mean_length": 79.50000286102295, "completions/min_length": 31.25, "epoch": 4.5778108711839165, "grad_norm": 0.003333706345711044, "kl": 0.09051513671875, "learning_rate": 5.746474004232405e-07, "loss": 9.043920726981014e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2303, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 77.27083587646484, "completions/min_length": 33.875, "epoch": 4.5797964755522464, "grad_norm": 0.0055613782787916006, "kl": 0.09375, "learning_rate": 5.743354370029583e-07, "loss": 9.380087431054562e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2304, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.875, "completions/mean_length": 78.33333539962769, "completions/min_length": 35.875, "epoch": 4.5817820799205755, "grad_norm": 0.0037245481595628164, "kl": 0.089385986328125, "learning_rate": 5.74023443988151e-07, "loss": 8.936197264119983e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2305, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 74.12500190734863, "completions/min_length": 34.375, "epoch": 4.5837676842889055, "grad_norm": 2.230663856149765, "kl": 0.098907470703125, "learning_rate": 5.737114215030295e-07, "loss": -0.0076047456823289394, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.8333333395421505, "rewards/CineAccuracyORM/std": 0.17548105120658875, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2306, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.875, "completions/mean_length": 67.03125190734863, "completions/min_length": 32.5, "epoch": 4.585753288657235, "grad_norm": 0.0037963544544235488, "kl": 0.07318115234375, "learning_rate": 5.733993696718168e-07, "loss": 7.318564894376323e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2307, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 83.31250238418579, "completions/min_length": 37.75, "epoch": 4.5877388930255645, "grad_norm": 0.004103642316760418, "kl": 0.0963134765625, "learning_rate": 5.73087288618747e-07, "loss": 9.642505028750747e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2308, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 71.28125190734863, "completions/min_length": 33.0, "epoch": 4.589724497393894, "grad_norm": 0.0034598986690599367, "kl": 0.099853515625, "learning_rate": 5.727751784680667e-07, "loss": 9.977837180485949e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2309, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 77.72916841506958, "completions/min_length": 36.875, "epoch": 4.5917101017622235, "grad_norm": 1.9793506553163016, "kl": 0.08746337890625, "learning_rate": 5.724630393440333e-07, "loss": -0.00461022462695837, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2310, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 78.21875238418579, "completions/min_length": 32.75, "epoch": 4.5936957061305534, "grad_norm": 0.03233720365612494, "kl": 0.09197998046875, "learning_rate": 5.721508713709162e-07, "loss": 9.184504597214982e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2311, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 72.88541841506958, "completions/min_length": 31.75, "epoch": 4.595681310498883, "grad_norm": 0.6754820617097291, "kl": 0.08831787109375, "learning_rate": 5.718386746729961e-07, "loss": 0.008238280192017555, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2312, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.0, "completions/mean_length": 73.59375381469727, "completions/min_length": 36.375, "epoch": 4.5976669148672125, "grad_norm": 0.005317897778769317, "kl": 0.09112548828125, "learning_rate": 5.715264493745651e-07, "loss": 9.116034198086709e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2313, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 72.3854193687439, "completions/min_length": 30.625, "epoch": 4.599652519235542, "grad_norm": 0.0033929874385534953, "kl": 0.078765869140625, "learning_rate": 5.71214195599927e-07, "loss": 7.870925037423149e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2314, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 80.50000286102295, "completions/min_length": 32.25, "epoch": 4.6016381236038715, "grad_norm": 0.0032873752823652275, "kl": 0.093505859375, "learning_rate": 5.709019134733964e-07, "loss": 9.349064202979207e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2315, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 65.71875238418579, "completions/min_length": 30.0, "epoch": 4.603623727972201, "grad_norm": 0.011534392390535811, "kl": 0.1021728515625, "learning_rate": 5.705896031192997e-07, "loss": 0.00010226282029179856, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2316, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.375, "completions/mean_length": 84.61458587646484, "completions/min_length": 36.875, "epoch": 4.605609332340531, "grad_norm": 0.003852925664040561, "kl": 0.10693359375, "learning_rate": 5.702772646619742e-07, "loss": 0.00010697339894250035, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2317, "train_speed(iter/s)": 0.022717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.875, "completions/mean_length": 77.06250286102295, "completions/min_length": 35.375, "epoch": 4.6075949367088604, "grad_norm": 0.004558999717422353, "kl": 0.08642578125, "learning_rate": 5.699648982257685e-07, "loss": 8.62802698975429e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2318, "train_speed(iter/s)": 0.022717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.75, "completions/mean_length": 83.78125286102295, "completions/min_length": 33.125, "epoch": 4.60958054107719, "grad_norm": 0.0036160464170613936, "kl": 0.10736083984375, "learning_rate": 5.696525039350425e-07, "loss": 0.0001075066247722134, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2319, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 81.53125286102295, "completions/min_length": 34.5, "epoch": 4.61156614544552, "grad_norm": 0.003760841205670082, "kl": 0.08953857421875, "learning_rate": 5.693400819141669e-07, "loss": 8.954511577030644e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2320, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 78.33333492279053, "completions/min_length": 33.25, "epoch": 4.613551749813849, "grad_norm": 0.0032498174063719976, "kl": 0.082763671875, "learning_rate": 5.690276322875236e-07, "loss": 8.275630534626544e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2321, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 77.11458587646484, "completions/min_length": 33.0, "epoch": 4.615537354182179, "grad_norm": 0.004086336713738296, "kl": 0.08404541015625, "learning_rate": 5.687151551795054e-07, "loss": 8.408527355641127e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2322, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.875, "completions/mean_length": 82.73958683013916, "completions/min_length": 34.0, "epoch": 4.617522958550508, "grad_norm": 0.01248841140481593, "kl": 0.13299560546875, "learning_rate": 5.684026507145165e-07, "loss": 0.0001329867372987792, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2323, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 65.54166841506958, "completions/min_length": 32.375, "epoch": 4.619508562918838, "grad_norm": 3.1658015235706767, "kl": 0.09576416015625, "learning_rate": 5.68090119016971e-07, "loss": -0.007310189306735992, "memory(GiB)": 94.21, "reward": 1.5729166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2324, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.875, "completions/mean_length": 78.60416889190674, "completions/min_length": 37.375, "epoch": 4.621494167287168, "grad_norm": 0.003460455846155925, "kl": 0.11016845703125, "learning_rate": 5.677775602112947e-07, "loss": 0.00011014618212357163, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2325, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 73.8229193687439, "completions/min_length": 33.875, "epoch": 4.623479771655497, "grad_norm": 0.983220531646428, "kl": 0.1031494140625, "learning_rate": 5.674649744219242e-07, "loss": -0.0007950937142595649, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2326, "train_speed(iter/s)": 0.022717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 74.39583587646484, "completions/min_length": 33.625, "epoch": 4.625465376023827, "grad_norm": 1.396096258057355, "kl": 0.09417724609375, "learning_rate": 5.671523617733064e-07, "loss": 0.007858018390834332, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2327, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 83.89583539962769, "completions/min_length": 34.0, "epoch": 4.627450980392156, "grad_norm": 1.0866659667101322, "kl": 0.11163330078125, "learning_rate": 5.66839722389899e-07, "loss": -0.003320117946714163, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.30977265536785126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2328, "train_speed(iter/s)": 0.022717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.875, "completions/mean_length": 76.93750238418579, "completions/min_length": 34.5, "epoch": 4.629436584760486, "grad_norm": 0.004133248965090325, "kl": 0.089813232421875, "learning_rate": 5.665270563961702e-07, "loss": 8.97534191608429e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2329, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 74.84375238418579, "completions/min_length": 37.875, "epoch": 4.631422189128816, "grad_norm": 3.6598942616515813, "kl": 0.108184814453125, "learning_rate": 5.662143639165995e-07, "loss": 0.0006956271827220917, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2330, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 75.42708492279053, "completions/min_length": 34.0, "epoch": 4.633407793497145, "grad_norm": 0.003285613511303384, "kl": 0.09954833984375, "learning_rate": 5.659016450756761e-07, "loss": 9.94454458123073e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2331, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.375, "completions/mean_length": 88.41667079925537, "completions/min_length": 38.25, "epoch": 4.635393397865475, "grad_norm": 1.3189134547690207, "kl": 0.19110107421875, "learning_rate": 5.655888999979004e-07, "loss": -0.013224013149738312, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2332, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 78.29166841506958, "completions/min_length": 33.125, "epoch": 4.637379002233805, "grad_norm": 0.00324602327054682, "kl": 0.07659912109375, "learning_rate": 5.652761288077824e-07, "loss": 7.664500299142674e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2333, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.125, "completions/mean_length": 76.05208492279053, "completions/min_length": 33.625, "epoch": 4.639364606602134, "grad_norm": 1.2506648342000495, "kl": 0.22784423828125, "learning_rate": 5.649633316298435e-07, "loss": 0.0016061998903751373, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2334, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.625, "completions/mean_length": 82.34375238418579, "completions/min_length": 36.375, "epoch": 4.641350210970464, "grad_norm": 0.004870658101811156, "kl": 0.101318359375, "learning_rate": 5.646505085886144e-07, "loss": 0.0001012769207591191, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2335, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 70.12500286102295, "completions/min_length": 32.125, "epoch": 4.643335815338793, "grad_norm": 0.006618151616686567, "kl": 0.088531494140625, "learning_rate": 5.643376598086371e-07, "loss": 8.857453940436244e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2336, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 61.37500238418579, "completions/min_length": 24.25, "epoch": 4.645321419707123, "grad_norm": 0.0035250140365492284, "kl": 0.08172607421875, "learning_rate": 5.640247854144633e-07, "loss": 8.166702173184603e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2337, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.875, "completions/mean_length": 69.39583587646484, "completions/min_length": 28.125, "epoch": 4.647307024075453, "grad_norm": 0.005895554204687453, "kl": 0.0992431640625, "learning_rate": 5.637118855306547e-07, "loss": 9.923591278493404e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2338, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.5, "completions/mean_length": 76.33333587646484, "completions/min_length": 33.875, "epoch": 4.649292628443782, "grad_norm": 0.7507075993821026, "kl": 0.13653564453125, "learning_rate": 5.633989602817837e-07, "loss": 0.004025675356388092, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2339, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 72.27083492279053, "completions/min_length": 32.25, "epoch": 4.651278232812112, "grad_norm": 0.004642095660157392, "kl": 0.0936279296875, "learning_rate": 5.630860097924325e-07, "loss": 9.374831279274076e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2340, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 79.15625190734863, "completions/min_length": 29.25, "epoch": 4.653263837180441, "grad_norm": 1.2879447231176964, "kl": 0.11737060546875, "learning_rate": 5.627730341871933e-07, "loss": 0.00011727835226338357, "memory(GiB)": 94.21, "reward": 1.9583333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9583333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2341, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.125, "completions/mean_length": 79.21875238418579, "completions/min_length": 33.75, "epoch": 4.655249441548771, "grad_norm": 0.7877443988396042, "kl": 0.09991455078125, "learning_rate": 5.624600335906681e-07, "loss": 0.008169095031917095, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2342, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.875, "completions/mean_length": 79.91666984558105, "completions/min_length": 28.125, "epoch": 4.657235045917101, "grad_norm": 0.00591320841549025, "kl": 0.09527587890625, "learning_rate": 5.621470081274698e-07, "loss": 9.53097696765326e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2343, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.375, "completions/mean_length": 72.66666984558105, "completions/min_length": 30.5, "epoch": 4.65922065028543, "grad_norm": 0.003958699016839167, "kl": 0.09844970703125, "learning_rate": 5.6183395792222e-07, "loss": 9.841163409873843e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2344, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.625, "completions/mean_length": 73.81250190734863, "completions/min_length": 28.5, "epoch": 4.66120625465376, "grad_norm": 0.005329246216295835, "kl": 0.075164794921875, "learning_rate": 5.615208830995508e-07, "loss": 7.521644874941558e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2345, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 74.98958539962769, "completions/min_length": 35.375, "epoch": 4.66319185902209, "grad_norm": 0.006392068411079872, "kl": 0.09625244140625, "learning_rate": 5.612077837841039e-07, "loss": 9.623055666452274e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2346, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 73.33333587646484, "completions/min_length": 31.125, "epoch": 4.665177463390419, "grad_norm": 0.004098474073644894, "kl": 0.087005615234375, "learning_rate": 5.608946601005311e-07, "loss": 8.697745943209156e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2347, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 83.95833683013916, "completions/min_length": 38.75, "epoch": 4.667163067758749, "grad_norm": 1.227646445616175, "kl": 0.10247802734375, "learning_rate": 5.605815121734934e-07, "loss": -0.012014409527182579, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2348, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.75, "completions/mean_length": 82.06250238418579, "completions/min_length": 29.375, "epoch": 4.669148672127078, "grad_norm": 0.004814142082146612, "kl": 0.088348388671875, "learning_rate": 5.602683401276614e-07, "loss": 8.838576468406245e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2349, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.5, "completions/mean_length": 83.78125286102295, "completions/min_length": 34.875, "epoch": 4.671134276495408, "grad_norm": 0.005736769257701222, "kl": 0.10888671875, "learning_rate": 5.599551440877161e-07, "loss": 0.00010884978109970689, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2350, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.25, "completions/mean_length": 81.69791984558105, "completions/min_length": 41.625, "epoch": 4.673119880863738, "grad_norm": 0.005309514502676998, "kl": 0.10150146484375, "learning_rate": 5.596419241783474e-07, "loss": 0.00010142278915736824, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2351, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 67.97916841506958, "completions/min_length": 27.875, "epoch": 4.675105485232067, "grad_norm": 0.005548726933792474, "kl": 0.10076904296875, "learning_rate": 5.593286805242549e-07, "loss": 0.00010085056419484317, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2352, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.875, "completions/mean_length": 81.47916889190674, "completions/min_length": 28.75, "epoch": 4.677091089600397, "grad_norm": 0.0030455917706678484, "kl": 0.09954833984375, "learning_rate": 5.590154132501472e-07, "loss": 9.947115177055821e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2353, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 75.59375238418579, "completions/min_length": 32.25, "epoch": 4.679076693968726, "grad_norm": 0.7006896297815685, "kl": 0.10076904296875, "learning_rate": 5.58702122480743e-07, "loss": -0.006216554902493954, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2354, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 82.11458587646484, "completions/min_length": 32.375, "epoch": 4.681062298337056, "grad_norm": 0.003947702664324214, "kl": 0.108642578125, "learning_rate": 5.583888083407699e-07, "loss": 0.00010863847273867577, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2355, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 73.052086353302, "completions/min_length": 32.125, "epoch": 4.683047902705386, "grad_norm": 0.0034927833041282865, "kl": 0.08538818359375, "learning_rate": 5.580754709549652e-07, "loss": 8.53494493640028e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2356, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.375, "completions/mean_length": 84.04166984558105, "completions/min_length": 32.25, "epoch": 4.685033507073715, "grad_norm": 2.4961354573388714, "kl": 0.1226806640625, "learning_rate": 5.577621104480751e-07, "loss": 0.012847809121012688, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.2592903971672058, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2357, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.375, "completions/mean_length": 80.07291841506958, "completions/min_length": 31.625, "epoch": 4.687019111442045, "grad_norm": 0.0043750363380664565, "kl": 0.10748291015625, "learning_rate": 5.574487269448549e-07, "loss": 0.00010759155702544376, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2358, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 83.01041793823242, "completions/min_length": 32.375, "epoch": 4.689004715810375, "grad_norm": 0.004391053659948604, "kl": 0.11566162109375, "learning_rate": 5.571353205700695e-07, "loss": 0.00011556669778656214, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2359, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 74.58333539962769, "completions/min_length": 31.625, "epoch": 4.690990320178704, "grad_norm": 0.006677941367474188, "kl": 0.120849609375, "learning_rate": 5.568218914484925e-07, "loss": 0.00012101449829060584, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2360, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 75.1354193687439, "completions/min_length": 37.5, "epoch": 4.692975924547034, "grad_norm": 0.004430730118602707, "kl": 0.0965576171875, "learning_rate": 5.565084397049071e-07, "loss": 9.643529483582824e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2361, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.25, "completions/mean_length": 81.96875143051147, "completions/min_length": 30.375, "epoch": 4.694961528915364, "grad_norm": 0.004668489739607124, "kl": 0.0992431640625, "learning_rate": 5.561949654641046e-07, "loss": 9.912320820149034e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2362, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 82.67708587646484, "completions/min_length": 38.0, "epoch": 4.696947133283693, "grad_norm": 0.004168253751659089, "kl": 0.08251953125, "learning_rate": 5.558814688508862e-07, "loss": 8.25101014925167e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2363, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 70.03125047683716, "completions/min_length": 31.0, "epoch": 4.698932737652023, "grad_norm": 1.067279898281127, "kl": 0.1177978515625, "learning_rate": 5.555679499900613e-07, "loss": -0.004482237156480551, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2364, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 72.15625143051147, "completions/min_length": 33.0, "epoch": 4.700918342020352, "grad_norm": 1.6388835772280552, "kl": 0.1346435546875, "learning_rate": 5.552544090064487e-07, "loss": 0.001115697785280645, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.3624799847602844, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2365, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.875, "completions/mean_length": 72.08333492279053, "completions/min_length": 38.75, "epoch": 4.702903946388682, "grad_norm": 0.006825047979680221, "kl": 0.086669921875, "learning_rate": 5.549408460248757e-07, "loss": 8.665671339258552e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2366, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.75, "completions/mean_length": 77.40625238418579, "completions/min_length": 37.0, "epoch": 4.704889550757011, "grad_norm": 0.00532596327968206, "kl": 0.0960693359375, "learning_rate": 5.546272611701783e-07, "loss": 9.604192746337503e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2367, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.375, "completions/mean_length": 77.17708587646484, "completions/min_length": 37.5, "epoch": 4.706875155125341, "grad_norm": 0.005383832134177418, "kl": 0.08392333984375, "learning_rate": 5.543136545672014e-07, "loss": 8.388089190702885e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2368, "train_speed(iter/s)": 0.022705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.375, "completions/mean_length": 84.57291841506958, "completions/min_length": 33.125, "epoch": 4.708860759493671, "grad_norm": 0.9234181661280052, "kl": 0.091705322265625, "learning_rate": 5.540000263407986e-07, "loss": -0.0066925231367349625, "memory(GiB)": 94.21, "reward": 1.9583333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9583333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2369, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 73.13541889190674, "completions/min_length": 32.875, "epoch": 4.710846363862, "grad_norm": 0.015995415525439965, "kl": 0.12982177734375, "learning_rate": 5.536863766158317e-07, "loss": 0.00012981262989342213, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2370, "train_speed(iter/s)": 0.022705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.25, "completions/mean_length": 79.79166984558105, "completions/min_length": 27.25, "epoch": 4.71283196823033, "grad_norm": 0.011202974540784661, "kl": 0.103607177734375, "learning_rate": 5.533727055171717e-07, "loss": 0.00010354029654990882, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2371, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.0, "completions/mean_length": 77.26041889190674, "completions/min_length": 33.375, "epoch": 4.71481757259866, "grad_norm": 0.022467234398754667, "kl": 0.114990234375, "learning_rate": 5.530590131696978e-07, "loss": 0.00011488457676023245, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2372, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 75.20833539962769, "completions/min_length": 33.625, "epoch": 4.716803176966989, "grad_norm": 0.0052564242479786515, "kl": 0.08538818359375, "learning_rate": 5.527452996982973e-07, "loss": 8.54157842695713e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2373, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.375, "completions/mean_length": 73.40625333786011, "completions/min_length": 36.625, "epoch": 4.718788781335319, "grad_norm": 0.0034781322116524535, "kl": 0.109893798828125, "learning_rate": 5.524315652278663e-07, "loss": 0.00010982695675920695, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2374, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 74.05208683013916, "completions/min_length": 31.5, "epoch": 4.720774385703649, "grad_norm": 0.012881657197212185, "kl": 0.103057861328125, "learning_rate": 5.521178098833092e-07, "loss": 0.00010311169899068773, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2375, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 66.91666889190674, "completions/min_length": 29.0, "epoch": 4.722759990071978, "grad_norm": 0.009953129755388168, "kl": 0.087158203125, "learning_rate": 5.518040337895391e-07, "loss": 8.716151205589995e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2376, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 77.64583492279053, "completions/min_length": 34.0, "epoch": 4.724745594440308, "grad_norm": 0.00759740992684897, "kl": 0.111328125, "learning_rate": 5.514902370714763e-07, "loss": 0.00011120867566205561, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2377, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.625, "completions/mean_length": 81.552086353302, "completions/min_length": 32.125, "epoch": 4.726731198808637, "grad_norm": 0.003968374011660823, "kl": 0.107421875, "learning_rate": 5.511764198540505e-07, "loss": 0.00010742887388914824, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2378, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.5, "completions/mean_length": 74.63541984558105, "completions/min_length": 29.125, "epoch": 4.728716803176967, "grad_norm": 0.0043210659978762415, "kl": 0.08184814453125, "learning_rate": 5.508625822621988e-07, "loss": 8.181139128282666e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2379, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.875, "completions/mean_length": 84.54166984558105, "completions/min_length": 33.5, "epoch": 4.730702407545296, "grad_norm": 0.8589729055032919, "kl": 0.09381103515625, "learning_rate": 5.505487244208667e-07, "loss": 0.004366949200630188, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2380, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.625, "completions/mean_length": 80.70833683013916, "completions/min_length": 35.625, "epoch": 4.732688011913626, "grad_norm": 0.003663083710401756, "kl": 0.119384765625, "learning_rate": 5.502348464550077e-07, "loss": 0.00011942406854359433, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2381, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 71.59375143051147, "completions/min_length": 32.625, "epoch": 4.734673616281956, "grad_norm": 0.0067909253536695404, "kl": 0.09637451171875, "learning_rate": 5.499209484895833e-07, "loss": 9.640395728638396e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2382, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 83.11458587646484, "completions/min_length": 36.875, "epoch": 4.736659220650285, "grad_norm": 0.003723763070110564, "kl": 0.123870849609375, "learning_rate": 5.496070306495632e-07, "loss": 0.0001238843979081139, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2383, "train_speed(iter/s)": 0.022697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 69.68750143051147, "completions/min_length": 31.125, "epoch": 4.738644825018615, "grad_norm": 0.9046098062435918, "kl": 0.1024169921875, "learning_rate": 5.492930930599244e-07, "loss": 0.018296249210834503, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2384, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.875, "completions/mean_length": 76.59375095367432, "completions/min_length": 31.625, "epoch": 4.740630429386945, "grad_norm": 1.1791352675626274, "kl": 0.10150146484375, "learning_rate": 5.489791358456526e-07, "loss": 0.019950827583670616, "memory(GiB)": 94.21, "reward": 1.8750000149011612, "reward_std": 0.05103103630244732, "rewards/CineAccuracyORM/mean": 0.8750000027939677, "rewards/CineAccuracyORM/std": 0.07216878235340118, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2385, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 69.864586353302, "completions/min_length": 32.625, "epoch": 4.742616033755274, "grad_norm": 0.0032040085308207853, "kl": 0.08612060546875, "learning_rate": 5.486651591317405e-07, "loss": 8.610550867160782e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2386, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 74.45833587646484, "completions/min_length": 30.75, "epoch": 4.744601638123604, "grad_norm": 1.2950493324088936, "kl": 0.098785400390625, "learning_rate": 5.48351163043189e-07, "loss": 0.01045030914247036, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2387, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.875, "completions/mean_length": 70.07291841506958, "completions/min_length": 33.375, "epoch": 4.746587242491934, "grad_norm": 1.1716671097453595, "kl": 0.09332275390625, "learning_rate": 5.48037147705007e-07, "loss": 9.33458431973122e-05, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666679084301, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2388, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 68.05208492279053, "completions/min_length": 32.125, "epoch": 4.748572846860263, "grad_norm": 0.006718216519998036, "kl": 0.09088134765625, "learning_rate": 5.477231132422102e-07, "loss": 9.086485079023987e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2389, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 72.12500238418579, "completions/min_length": 34.0, "epoch": 4.750558451228593, "grad_norm": 0.005850786240599144, "kl": 0.1046142578125, "learning_rate": 5.474090597798228e-07, "loss": 0.00010463615763001144, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2390, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 66.87500238418579, "completions/min_length": 30.375, "epoch": 4.752544055596922, "grad_norm": 0.005589889461778325, "kl": 0.091949462890625, "learning_rate": 5.47094987442876e-07, "loss": 9.180587949231267e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2391, "train_speed(iter/s)": 0.022697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.125, "completions/mean_length": 70.38541841506958, "completions/min_length": 34.375, "epoch": 4.754529659965252, "grad_norm": 0.006014159457760767, "kl": 0.10833740234375, "learning_rate": 5.467808963564089e-07, "loss": 0.00010835006833076477, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2392, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 66.30208492279053, "completions/min_length": 30.25, "epoch": 4.756515264333581, "grad_norm": 0.005456182113695637, "kl": 0.11688232421875, "learning_rate": 5.464667866454677e-07, "loss": 0.00011701976472977549, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2393, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 72.28125190734863, "completions/min_length": 32.5, "epoch": 4.758500868701911, "grad_norm": 0.0060403999481007265, "kl": 0.0926513671875, "learning_rate": 5.46152658435106e-07, "loss": 9.255832992494106e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2394, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 72.17708444595337, "completions/min_length": 33.875, "epoch": 4.760486473070241, "grad_norm": 2.210610458798055, "kl": 0.11981201171875, "learning_rate": 5.458385118503854e-07, "loss": 0.0004693021473940462, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2395, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.625, "completions/mean_length": 79.81250333786011, "completions/min_length": 36.375, "epoch": 4.76247207743857, "grad_norm": 0.005084701552266223, "kl": 0.111572265625, "learning_rate": 5.455243470163741e-07, "loss": 0.00011156110849697143, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2396, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 82.96875047683716, "completions/min_length": 36.25, "epoch": 4.7644576818069, "grad_norm": 0.005731457334040926, "kl": 0.087615966796875, "learning_rate": 5.452101640581479e-07, "loss": 8.76723206602037e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2397, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.375, "completions/mean_length": 81.26041841506958, "completions/min_length": 33.375, "epoch": 4.76644328617523, "grad_norm": 0.9592539950105011, "kl": 0.54248046875, "learning_rate": 5.448959631007897e-07, "loss": 0.0005427635624073446, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2398, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 65.7916693687439, "completions/min_length": 30.0, "epoch": 4.768428890543559, "grad_norm": 1.1409983457833175, "kl": 0.11627197265625, "learning_rate": 5.445817442693895e-07, "loss": 0.009158136323094368, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2399, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 69.81250190734863, "completions/min_length": 33.125, "epoch": 4.770414494911889, "grad_norm": 0.007583821969683654, "kl": 0.09735107421875, "learning_rate": 5.44267507689045e-07, "loss": 9.73366986727342e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2400, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.375, "completions/mean_length": 76.16666793823242, "completions/min_length": 32.75, "epoch": 4.772400099280219, "grad_norm": 0.007561538167937902, "kl": 0.10980224609375, "learning_rate": 5.439532534848598e-07, "loss": 0.00010980182560160756, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2401, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 74.42708587646484, "completions/min_length": 30.5, "epoch": 4.774385703648548, "grad_norm": 0.005302285364055428, "kl": 0.095703125, "learning_rate": 5.436389817819458e-07, "loss": 9.568034147378057e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2402, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 75.15625190734863, "completions/min_length": 36.25, "epoch": 4.776371308016878, "grad_norm": 0.007283842877613333, "kl": 0.114013671875, "learning_rate": 5.43324692705421e-07, "loss": 0.00011406959674786776, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2403, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 69.81250238418579, "completions/min_length": 33.875, "epoch": 4.778356912385207, "grad_norm": 0.005518262783046529, "kl": 0.11669921875, "learning_rate": 5.430103863804107e-07, "loss": 0.00011671232641674578, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2404, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.625, "completions/mean_length": 77.95833492279053, "completions/min_length": 35.0, "epoch": 4.780342516753537, "grad_norm": 0.0064720898847713485, "kl": 0.0914306640625, "learning_rate": 5.426960629320466e-07, "loss": 9.163098002318293e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2405, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.625, "completions/mean_length": 70.39583444595337, "completions/min_length": 29.75, "epoch": 4.782328121121866, "grad_norm": 0.0071481501454178165, "kl": 0.103851318359375, "learning_rate": 5.423817224854681e-07, "loss": 0.00010394788114354014, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2406, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 67.07291889190674, "completions/min_length": 29.875, "epoch": 4.784313725490196, "grad_norm": 0.007921547386444372, "kl": 0.11151123046875, "learning_rate": 5.420673651658206e-07, "loss": 0.00011142025323351845, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2407, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.875, "completions/mean_length": 63.73958492279053, "completions/min_length": 28.25, "epoch": 4.786299329858526, "grad_norm": 0.007132908701328116, "kl": 0.102020263671875, "learning_rate": 5.417529910982566e-07, "loss": 0.00010212243068963289, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2408, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 69.65625143051147, "completions/min_length": 31.25, "epoch": 4.788284934226855, "grad_norm": 0.004370914955831252, "kl": 0.084197998046875, "learning_rate": 5.414386004079348e-07, "loss": 8.41338187456131e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2409, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 65.62500190734863, "completions/min_length": 29.5, "epoch": 4.790270538595185, "grad_norm": 0.007441647857881992, "kl": 0.1043701171875, "learning_rate": 5.411241932200212e-07, "loss": 0.00010443673090776429, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2410, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.125, "completions/mean_length": 71.62500286102295, "completions/min_length": 29.875, "epoch": 4.792256142963515, "grad_norm": 1.6594604408765041, "kl": 0.093017578125, "learning_rate": 5.408097696596879e-07, "loss": -0.008023375645279884, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2411, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 67.62500095367432, "completions/min_length": 29.75, "epoch": 4.794241747331844, "grad_norm": 0.0049650770774216785, "kl": 0.10498046875, "learning_rate": 5.404953298521136e-07, "loss": 0.00010494540038052946, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2412, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/mean_length": 60.93750190734863, "completions/min_length": 28.875, "epoch": 4.796227351700174, "grad_norm": 0.005936533204147861, "kl": 0.1175537109375, "learning_rate": 5.401808739224836e-07, "loss": 0.00011726385855581611, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2413, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 70.489586353302, "completions/min_length": 29.75, "epoch": 4.798212956068504, "grad_norm": 0.005918967887232658, "kl": 0.10546875, "learning_rate": 5.398664019959893e-07, "loss": 0.00010532489977777004, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2414, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.875, "completions/mean_length": 75.80208587646484, "completions/min_length": 32.0, "epoch": 4.800198560436833, "grad_norm": 1.6532817596178317, "kl": 0.09588623046875, "learning_rate": 5.395519141978288e-07, "loss": -0.011720061302185059, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.1783013828098774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2415, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.375, "completions/mean_length": 61.52083492279053, "completions/min_length": 30.25, "epoch": 4.802184164805163, "grad_norm": 0.006584208519040118, "kl": 0.09381103515625, "learning_rate": 5.392374106532067e-07, "loss": 9.39446035772562e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2416, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 68.53125190734863, "completions/min_length": 36.25, "epoch": 4.804169769173492, "grad_norm": 0.004054651644895156, "kl": 0.109771728515625, "learning_rate": 5.389228914873333e-07, "loss": 0.00010964569810312241, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2417, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 62.69791889190674, "completions/min_length": 26.75, "epoch": 4.806155373541822, "grad_norm": 0.0051225988957680855, "kl": 0.0938720703125, "learning_rate": 5.386083568254252e-07, "loss": 9.3840055342298e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2418, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.375, "completions/mean_length": 60.81250190734863, "completions/min_length": 23.625, "epoch": 4.808140977910151, "grad_norm": 0.004022710665534642, "kl": 0.09423828125, "learning_rate": 5.382938067927056e-07, "loss": 9.437235712539405e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2419, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 67.75000238418579, "completions/min_length": 27.0, "epoch": 4.810126582278481, "grad_norm": 0.0044085375323783095, "kl": 0.10601806640625, "learning_rate": 5.379792415144039e-07, "loss": 0.00010604178532958031, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2420, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.875, "completions/mean_length": 67.56250238418579, "completions/min_length": 33.5, "epoch": 4.812112186646811, "grad_norm": 0.0037846223981376536, "kl": 0.086639404296875, "learning_rate": 5.376646611157547e-07, "loss": 8.663708285894245e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2421, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.375, "completions/mean_length": 68.2916693687439, "completions/min_length": 30.875, "epoch": 4.81409779101514, "grad_norm": 1.2514813350739247, "kl": 0.115234375, "learning_rate": 5.373500657219994e-07, "loss": 0.0035899742506444454, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2422, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 64.82291793823242, "completions/min_length": 25.5, "epoch": 4.81608339538347, "grad_norm": 0.0061356395441369655, "kl": 0.0989990234375, "learning_rate": 5.370354554583851e-07, "loss": 9.900087025016546e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2423, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.125, "completions/mean_length": 61.10416841506958, "completions/min_length": 28.125, "epoch": 4.8180689997518, "grad_norm": 0.010043134892153693, "kl": 0.12725830078125, "learning_rate": 5.367208304501651e-07, "loss": 0.0001272726512979716, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2424, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 70.46875190734863, "completions/min_length": 29.125, "epoch": 4.820054604120129, "grad_norm": 0.011813061415614057, "kl": 0.096405029296875, "learning_rate": 5.364061908225979e-07, "loss": 9.641156066209078e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2425, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.5, "completions/mean_length": 73.01041889190674, "completions/min_length": 30.125, "epoch": 4.822040208488459, "grad_norm": 0.0033979085263042023, "kl": 0.09051513671875, "learning_rate": 5.360915367009487e-07, "loss": 9.04013286344707e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2426, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.875, "completions/mean_length": 66.75000286102295, "completions/min_length": 30.5, "epoch": 4.824025812856789, "grad_norm": 0.015786099070162915, "kl": 0.1123046875, "learning_rate": 5.35776868210488e-07, "loss": 0.00011224078480154276, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2427, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.875, "completions/mean_length": 67.53125143051147, "completions/min_length": 29.75, "epoch": 4.826011417225118, "grad_norm": 0.01813570278831089, "kl": 0.11529541015625, "learning_rate": 5.354621854764918e-07, "loss": 0.00011539125989656895, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2428, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.375, "completions/mean_length": 67.84375190734863, "completions/min_length": 31.625, "epoch": 4.827997021593448, "grad_norm": 1.3404214233406921, "kl": 0.13311767578125, "learning_rate": 5.351474886242419e-07, "loss": 0.0008415021002292633, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666679084301, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2429, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 77.84375286102295, "completions/min_length": 33.875, "epoch": 4.829982625961777, "grad_norm": 0.008204466360237073, "kl": 0.1256103515625, "learning_rate": 5.348327777790261e-07, "loss": 0.00012558323214761913, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2430, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 70.88541841506958, "completions/min_length": 35.0, "epoch": 4.831968230330107, "grad_norm": 0.030086811251245887, "kl": 0.14935302734375, "learning_rate": 5.345180530661376e-07, "loss": 0.0001494913303758949, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2431, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.125, "completions/mean_length": 73.28125190734863, "completions/min_length": 31.375, "epoch": 4.833953834698436, "grad_norm": 0.005192728882534674, "kl": 0.1104736328125, "learning_rate": 5.342033146108747e-07, "loss": 0.00011036378418793902, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2432, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.75, "completions/mean_length": 58.781251430511475, "completions/min_length": 30.75, "epoch": 4.835939439066766, "grad_norm": 1.6542870348295509, "kl": 0.09246826171875, "learning_rate": 5.338885625385419e-07, "loss": 0.005943110212683678, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2433, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.125, "completions/mean_length": 69.92708492279053, "completions/min_length": 30.375, "epoch": 4.837925043435096, "grad_norm": 1.4833259338393363, "kl": 0.13726806640625, "learning_rate": 5.335737969744484e-07, "loss": -0.0009108378435485065, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2434, "train_speed(iter/s)": 0.022705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.125, "completions/mean_length": 72.83333587646484, "completions/min_length": 29.75, "epoch": 4.839910647803425, "grad_norm": 0.6162476187060693, "kl": 0.1016845703125, "learning_rate": 5.332590180439093e-07, "loss": 0.0042025865986943245, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2435, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 76.55208587646484, "completions/min_length": 30.5, "epoch": 4.841896252171755, "grad_norm": 0.009750830485556718, "kl": 0.0950927734375, "learning_rate": 5.329442258722446e-07, "loss": 9.516437421552837e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2436, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.25, "completions/mean_length": 57.114585399627686, "completions/min_length": 28.25, "epoch": 4.843881856540085, "grad_norm": 0.0036481253289747276, "kl": 0.103515625, "learning_rate": 5.326294205847799e-07, "loss": 0.00010345465125283226, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2437, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.625, "completions/mean_length": 71.28125190734863, "completions/min_length": 27.875, "epoch": 4.845867460908414, "grad_norm": 0.005900224576327768, "kl": 0.0970458984375, "learning_rate": 5.323146023068459e-07, "loss": 9.70427063293755e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2438, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 67.11458539962769, "completions/min_length": 26.25, "epoch": 4.847853065276744, "grad_norm": 0.900041579322121, "kl": 0.15936279296875, "learning_rate": 5.319997711637785e-07, "loss": -0.00158620101865381, "memory(GiB)": 94.21, "reward": 1.6041666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6041666679084301, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2439, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 72.01041889190674, "completions/min_length": 32.875, "epoch": 4.849838669645074, "grad_norm": 0.03612891042038138, "kl": 0.11517333984375, "learning_rate": 5.31684927280919e-07, "loss": 0.00011502523557282984, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2440, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 60.41666841506958, "completions/min_length": 27.25, "epoch": 4.851824274013403, "grad_norm": 0.39799840829665395, "kl": 0.144287109375, "learning_rate": 5.313700707836128e-07, "loss": 0.00014442240353673697, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2441, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 72.92708539962769, "completions/min_length": 27.5, "epoch": 4.853809878381733, "grad_norm": 0.0037649828080419364, "kl": 0.08746337890625, "learning_rate": 5.310552017972115e-07, "loss": 8.744312799535692e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2442, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 76.42708587646484, "completions/min_length": 30.5, "epoch": 4.855795482750062, "grad_norm": 0.006160073643950107, "kl": 0.091217041015625, "learning_rate": 5.307403204470711e-07, "loss": 9.12101095309481e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2443, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.125, "completions/mean_length": 65.114586353302, "completions/min_length": 31.25, "epoch": 4.857781087118392, "grad_norm": 0.0038459676714678154, "kl": 0.09649658203125, "learning_rate": 5.304254268585525e-07, "loss": 9.642692020861432e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2444, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 70.25000095367432, "completions/min_length": 35.125, "epoch": 4.859766691486721, "grad_norm": 0.004112866337073631, "kl": 0.09722900390625, "learning_rate": 5.301105211570215e-07, "loss": 9.71705885604024e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2445, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 77.20833587646484, "completions/min_length": 30.875, "epoch": 4.861752295855051, "grad_norm": 0.005534104946098873, "kl": 0.0982666015625, "learning_rate": 5.297956034678489e-07, "loss": 9.842171857599169e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2446, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 68.34375238418579, "completions/min_length": 34.0, "epoch": 4.863737900223381, "grad_norm": 0.005883966607315339, "kl": 0.1197509765625, "learning_rate": 5.294806739164103e-07, "loss": 0.00011970134801231325, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2447, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 71.9166693687439, "completions/min_length": 33.0, "epoch": 4.86572350459171, "grad_norm": 0.005637366120279593, "kl": 0.105712890625, "learning_rate": 5.291657326280856e-07, "loss": 0.00010571972234174609, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2448, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.375, "completions/mean_length": 77.21875190734863, "completions/min_length": 30.375, "epoch": 4.86770910896004, "grad_norm": 0.005492113734642495, "kl": 0.117431640625, "learning_rate": 5.288507797282598e-07, "loss": 0.0001174115895992145, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2449, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.625, "completions/mean_length": 64.65625238418579, "completions/min_length": 28.625, "epoch": 4.86969471332837, "grad_norm": 0.006394802479059686, "kl": 0.09759521484375, "learning_rate": 5.285358153423222e-07, "loss": 9.768750169314444e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2450, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 73.53125286102295, "completions/min_length": 31.875, "epoch": 4.871680317696699, "grad_norm": 0.004573515631246396, "kl": 0.10382080078125, "learning_rate": 5.282208395956672e-07, "loss": 0.00010383043991168961, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2451, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 70.07291793823242, "completions/min_length": 28.0, "epoch": 4.873665922065029, "grad_norm": 0.0039011895581462595, "kl": 0.085845947265625, "learning_rate": 5.279058526136932e-07, "loss": 8.593181701144204e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2452, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.375, "completions/mean_length": 65.65625190734863, "completions/min_length": 32.625, "epoch": 4.875651526433359, "grad_norm": 0.004034131568846768, "kl": 0.096527099609375, "learning_rate": 5.275908545218031e-07, "loss": 9.660809155320749e-05, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2453, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 69.42708492279053, "completions/min_length": 33.75, "epoch": 4.877637130801688, "grad_norm": 0.7483998493559936, "kl": 0.1092529296875, "learning_rate": 5.272758454454046e-07, "loss": -0.0015418616821989417, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2454, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.875, "completions/mean_length": 78.88541889190674, "completions/min_length": 31.875, "epoch": 4.879622735170018, "grad_norm": 0.005481427973184682, "kl": 0.089599609375, "learning_rate": 5.269608255099093e-07, "loss": 8.953303040470928e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2455, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 72.9479193687439, "completions/min_length": 34.125, "epoch": 4.881608339538347, "grad_norm": 0.005171843779926468, "kl": 0.10986328125, "learning_rate": 5.266457948407335e-07, "loss": 0.00010994139302056283, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2456, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.625, "completions/mean_length": 72.95833539962769, "completions/min_length": 31.625, "epoch": 4.883593943906677, "grad_norm": 0.9559258584998357, "kl": 0.0894775390625, "learning_rate": 5.263307535632977e-07, "loss": 0.008330987766385078, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2457, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.375, "completions/mean_length": 67.45833587646484, "completions/min_length": 32.375, "epoch": 4.885579548275006, "grad_norm": 1.628687917754534, "kl": 0.11138916015625, "learning_rate": 5.260157018030265e-07, "loss": 0.012987165711820126, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2458, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.75, "completions/mean_length": 71.69791984558105, "completions/min_length": 35.875, "epoch": 4.887565152643336, "grad_norm": 0.003774112920988199, "kl": 0.0972900390625, "learning_rate": 5.257006396853487e-07, "loss": 9.727019642014056e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2459, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 79.36458683013916, "completions/min_length": 36.75, "epoch": 4.889550757011666, "grad_norm": 1.920496296676164, "kl": 0.125518798828125, "learning_rate": 5.253855673356974e-07, "loss": 0.0001255422830581665, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666679084301, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2460, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 64.71875190734863, "completions/min_length": 30.875, "epoch": 4.891536361379995, "grad_norm": 1.0317472636902048, "kl": 0.09527587890625, "learning_rate": 5.250704848795093e-07, "loss": -0.007099680602550507, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2461, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.375, "completions/mean_length": 80.33333492279053, "completions/min_length": 31.25, "epoch": 4.893521965748325, "grad_norm": 0.005030844334698776, "kl": 0.090576171875, "learning_rate": 5.247553924422259e-07, "loss": 9.064136247616261e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2462, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 67.96875143051147, "completions/min_length": 30.875, "epoch": 4.895507570116655, "grad_norm": 0.006903793937059978, "kl": 0.0970458984375, "learning_rate": 5.244402901492917e-07, "loss": 9.697987115941942e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2463, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 63.95833492279053, "completions/min_length": 30.0, "epoch": 4.897493174484984, "grad_norm": 1.2059173504728449, "kl": 0.08685302734375, "learning_rate": 5.241251781261563e-07, "loss": -0.00625160988420248, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2464, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 74.57291984558105, "completions/min_length": 28.875, "epoch": 4.899478778853314, "grad_norm": 0.0054901913850022395, "kl": 0.1116943359375, "learning_rate": 5.23810056498272e-07, "loss": 0.00011166576587129384, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2465, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.125, "completions/mean_length": 61.45833492279053, "completions/min_length": 32.625, "epoch": 4.901464383221644, "grad_norm": 1.609978848652296, "kl": 0.15814208984375, "learning_rate": 5.234949253910957e-07, "loss": -0.012825253419578075, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.31764985248446465, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2466, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.125, "completions/mean_length": 60.145835399627686, "completions/min_length": 29.625, "epoch": 4.903449987589973, "grad_norm": 1.4608919173600083, "kl": 0.122802734375, "learning_rate": 5.231797849300878e-07, "loss": 0.008462455123662949, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2467, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.0, "completions/mean_length": 60.34375190734863, "completions/min_length": 30.25, "epoch": 4.905435591958303, "grad_norm": 0.9285653224750158, "kl": 0.12066650390625, "learning_rate": 5.228646352407123e-07, "loss": 0.011191550642251968, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2468, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.125, "completions/mean_length": 58.75000190734863, "completions/min_length": 29.25, "epoch": 4.907421196326632, "grad_norm": 0.008073963425392131, "kl": 0.13873291015625, "learning_rate": 5.225494764484372e-07, "loss": 0.0001389403478242457, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2469, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.125, "completions/mean_length": 57.00000286102295, "completions/min_length": 33.0, "epoch": 4.909406800694962, "grad_norm": 0.008614501150336497, "kl": 0.097015380859375, "learning_rate": 5.222343086787338e-07, "loss": 9.70488108578138e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2470, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.875, "completions/mean_length": 68.39583539962769, "completions/min_length": 32.875, "epoch": 4.911392405063291, "grad_norm": 0.006892782724553454, "kl": 0.08892822265625, "learning_rate": 5.219191320570773e-07, "loss": 8.886732393875718e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2471, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.375, "completions/mean_length": 66.87500190734863, "completions/min_length": 32.0, "epoch": 4.913378009431621, "grad_norm": 0.9343885714201815, "kl": 0.1260986328125, "learning_rate": 5.216039467089462e-07, "loss": 0.00012606134987436235, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2472, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.5, "completions/mean_length": 66.81250190734863, "completions/min_length": 25.125, "epoch": 4.915363613799951, "grad_norm": 0.005987858396304022, "kl": 0.09649658203125, "learning_rate": 5.212887527598224e-07, "loss": 9.650029096519575e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2473, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.875, "completions/mean_length": 66.92708444595337, "completions/min_length": 31.5, "epoch": 4.91734921816828, "grad_norm": 0.007730737343364597, "kl": 0.102142333984375, "learning_rate": 5.209735503351913e-07, "loss": 0.00010213504720013589, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2474, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.25, "completions/mean_length": 59.875001430511475, "completions/min_length": 29.75, "epoch": 4.91933482253661, "grad_norm": 0.006081899602236584, "kl": 0.093048095703125, "learning_rate": 5.20658339560542e-07, "loss": 9.307700383942574e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2475, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 67.15625143051147, "completions/min_length": 32.25, "epoch": 4.9213204269049395, "grad_norm": 0.004609696009971894, "kl": 0.11077880859375, "learning_rate": 5.203431205613663e-07, "loss": 0.00011090931366197765, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2476, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 68.11458539962769, "completions/min_length": 30.625, "epoch": 4.923306031273269, "grad_norm": 0.004358524050330704, "kl": 0.110565185546875, "learning_rate": 5.200278934631599e-07, "loss": 0.00011049947352148592, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2477, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 65.84375286102295, "completions/min_length": 29.75, "epoch": 4.9252916356415986, "grad_norm": 0.007875402272716112, "kl": 0.11273193359375, "learning_rate": 5.197126583914213e-07, "loss": 0.00011280830949544907, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2478, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.875, "completions/mean_length": 64.03125190734863, "completions/min_length": 30.5, "epoch": 4.9272772400099285, "grad_norm": 0.0068509544153078855, "kl": 0.11297607421875, "learning_rate": 5.193974154716523e-07, "loss": 0.00011290029942756519, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2479, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 58.70833396911621, "completions/min_length": 27.625, "epoch": 4.929262844378258, "grad_norm": 0.0070226259483188004, "kl": 0.09039306640625, "learning_rate": 5.190821648293579e-07, "loss": 9.039058932103217e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2480, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.5, "completions/mean_length": 52.59375238418579, "completions/min_length": 29.75, "epoch": 4.9312484487465875, "grad_norm": 0.8564621064781582, "kl": 0.084075927734375, "learning_rate": 5.187669065900461e-07, "loss": -0.005659153684973717, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2481, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 65.82291889190674, "completions/min_length": 31.75, "epoch": 4.933234053114917, "grad_norm": 0.9206816560403929, "kl": 0.107421875, "learning_rate": 5.18451640879228e-07, "loss": 0.00010737528646131977, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2482, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.625, "completions/mean_length": 59.843751430511475, "completions/min_length": 25.75, "epoch": 4.9352196574832465, "grad_norm": 0.005017533976612992, "kl": 0.0865478515625, "learning_rate": 5.181363678224175e-07, "loss": 8.650618110550568e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2483, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 68.54166889190674, "completions/min_length": 35.5, "epoch": 4.937205261851576, "grad_norm": 1.5375153223407034, "kl": 0.12274169921875, "learning_rate": 5.178210875451317e-07, "loss": 0.0116494819521904, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.2357477955520153, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2484, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.875, "completions/mean_length": 60.562501430511475, "completions/min_length": 31.25, "epoch": 4.9391908662199056, "grad_norm": 0.0042275951610442905, "kl": 0.09979248046875, "learning_rate": 5.175058001728901e-07, "loss": 9.977295849239454e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2485, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 59.906251430511475, "completions/min_length": 29.125, "epoch": 4.9411764705882355, "grad_norm": 1.9155401228502713, "kl": 0.103973388671875, "learning_rate": 5.171905058312156e-07, "loss": 0.004133741371333599, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2486, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.0, "completions/mean_length": 64.1666693687439, "completions/min_length": 31.75, "epoch": 4.943162074956565, "grad_norm": 1.4105166445347717, "kl": 0.112548828125, "learning_rate": 5.168752046456335e-07, "loss": 0.00011263291526120156, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2487, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.125, "completions/mean_length": 70.8541693687439, "completions/min_length": 32.125, "epoch": 4.9451476793248945, "grad_norm": 1.4548254308666484, "kl": 0.120361328125, "learning_rate": 5.165598967416721e-07, "loss": 0.003328974125906825, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666679084301, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2488, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.5, "completions/mean_length": 62.14583492279053, "completions/min_length": 32.375, "epoch": 4.9471332836932245, "grad_norm": 0.005293333214922288, "kl": 0.10101318359375, "learning_rate": 5.162445822448622e-07, "loss": 0.00010100413783220574, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2489, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.375, "completions/mean_length": 54.687501430511475, "completions/min_length": 27.625, "epoch": 4.9491188880615535, "grad_norm": 0.0041782129467817985, "kl": 0.094268798828125, "learning_rate": 5.159292612807368e-07, "loss": 9.41972466534935e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2490, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.875, "completions/mean_length": 56.531251430511475, "completions/min_length": 27.875, "epoch": 4.9511044924298835, "grad_norm": 0.01139219662701482, "kl": 0.0928955078125, "learning_rate": 5.156139339748325e-07, "loss": 9.280835365643725e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2491, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 64.51041841506958, "completions/min_length": 30.25, "epoch": 4.953090096798213, "grad_norm": 0.007939537988386461, "kl": 0.09747314453125, "learning_rate": 5.152986004526874e-07, "loss": 9.744924318511039e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2492, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 66.72916889190674, "completions/min_length": 29.875, "epoch": 4.9550757011665425, "grad_norm": 0.006610433613237356, "kl": 0.10546875, "learning_rate": 5.149832608398429e-07, "loss": 0.00010541768278926611, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2493, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.375, "completions/mean_length": 64.06250190734863, "completions/min_length": 27.125, "epoch": 4.9570613055348725, "grad_norm": 0.0061374842737937935, "kl": 0.11907958984375, "learning_rate": 5.14667915261842e-07, "loss": 0.00011894351337105036, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2494, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 66.03125286102295, "completions/min_length": 33.0, "epoch": 4.9590469099032015, "grad_norm": 0.003986665833275534, "kl": 0.099822998046875, "learning_rate": 5.143525638442308e-07, "loss": 9.979990863939747e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2495, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.5, "completions/mean_length": 68.11458539962769, "completions/min_length": 30.125, "epoch": 4.9610325142715315, "grad_norm": 0.00541753933868002, "kl": 0.11151123046875, "learning_rate": 5.140372067125574e-07, "loss": 0.00011152663500979543, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2496, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.25, "completions/mean_length": 69.62500238418579, "completions/min_length": 29.375, "epoch": 4.9630181186398605, "grad_norm": 0.007136145583559292, "kl": 0.1041259765625, "learning_rate": 5.137218439923724e-07, "loss": 0.00010399991151643917, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2497, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 59.75000286102295, "completions/min_length": 24.625, "epoch": 4.9650037230081905, "grad_norm": 0.004386399532308572, "kl": 0.09515380859375, "learning_rate": 5.134064758092279e-07, "loss": 9.512354154139757e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2498, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.125, "completions/mean_length": 65.00000095367432, "completions/min_length": 29.75, "epoch": 4.96698932737652, "grad_norm": 1.2146127555719877, "kl": 0.1259765625, "learning_rate": 5.130911022886791e-07, "loss": 0.01988319493830204, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2499, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.125, "completions/mean_length": 65.19791841506958, "completions/min_length": 28.75, "epoch": 4.9689749317448495, "grad_norm": 0.004769253733993392, "kl": 0.10015869140625, "learning_rate": 5.12775723556283e-07, "loss": 0.0001002311910269782, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2500, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 64.46875190734863, "completions/min_length": 32.375, "epoch": 4.9709605361131795, "grad_norm": 0.0048734435667527045, "kl": 0.0955810546875, "learning_rate": 5.124603397375984e-07, "loss": 9.563114144839346e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2501, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 62.10416793823242, "completions/min_length": 29.375, "epoch": 4.972946140481509, "grad_norm": 0.006828807784757375, "kl": 0.10101318359375, "learning_rate": 5.121449509581864e-07, "loss": 0.00010100104555021971, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2502, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 69.08333492279053, "completions/min_length": 31.375, "epoch": 4.9749317448498385, "grad_norm": 1.626345787410207, "kl": 0.11474609375, "learning_rate": 5.118295573436099e-07, "loss": 0.004872969351708889, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2503, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 60.15625238418579, "completions/min_length": 27.625, "epoch": 4.976917349218168, "grad_norm": 1.62017652658355, "kl": 0.14483642578125, "learning_rate": 5.115141590194341e-07, "loss": -0.012070084922015667, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.18335824459791183, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2504, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 57.35416841506958, "completions/min_length": 25.625, "epoch": 4.978902953586498, "grad_norm": 2.8349164905215627, "kl": 0.087493896484375, "learning_rate": 5.111987561112255e-07, "loss": 0.009440974332392216, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.1783013790845871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2505, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 72.67708587646484, "completions/min_length": 31.25, "epoch": 4.980888557954827, "grad_norm": 0.005239150046712194, "kl": 0.109619140625, "learning_rate": 5.108833487445531e-07, "loss": 0.00010942328663077205, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2506, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 60.666667461395264, "completions/min_length": 33.875, "epoch": 4.982874162323157, "grad_norm": 0.006901586637242084, "kl": 0.09808349609375, "learning_rate": 5.10567937044987e-07, "loss": 9.804704313864931e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2507, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.5, "completions/mean_length": 57.05208492279053, "completions/min_length": 27.375, "epoch": 4.9848597666914864, "grad_norm": 0.0037095853385381385, "kl": 0.117156982421875, "learning_rate": 5.102525211380993e-07, "loss": 0.0001172378397313878, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2508, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.375, "completions/mean_length": 68.05208492279053, "completions/min_length": 29.625, "epoch": 4.986845371059816, "grad_norm": 0.008658319096287257, "kl": 0.1197509765625, "learning_rate": 5.09937101149464e-07, "loss": 0.00011973457731073722, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2509, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 60.468751430511475, "completions/min_length": 25.75, "epoch": 4.9888309754281455, "grad_norm": 0.007608958178471858, "kl": 0.1156005859375, "learning_rate": 5.096216772046566e-07, "loss": 0.0001156711223302409, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2510, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 69.27083539962769, "completions/min_length": 26.625, "epoch": 4.990816579796475, "grad_norm": 0.008256515988531997, "kl": 0.12310791015625, "learning_rate": 5.09306249429254e-07, "loss": 0.00012306516873650253, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2511, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.375, "completions/mean_length": 57.906250953674316, "completions/min_length": 21.5, "epoch": 4.992802184164805, "grad_norm": 0.011586377581446278, "kl": 0.100341796875, "learning_rate": 5.089908179488346e-07, "loss": 0.0001003802681225352, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2512, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 59.16666793823242, "completions/min_length": 27.375, "epoch": 4.994787788533134, "grad_norm": 0.006509803796658807, "kl": 0.113433837890625, "learning_rate": 5.086753828889787e-07, "loss": 0.00011339227057760581, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2513, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 66.36458396911621, "completions/min_length": 29.25, "epoch": 4.996773392901464, "grad_norm": 1.0638713380695677, "kl": 0.10284423828125, "learning_rate": 5.083599443752674e-07, "loss": -0.004439922980964184, "memory(GiB)": 94.21, "reward": 1.5520833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.5520833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2514, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 61.145835399627686, "completions/min_length": 29.625, "epoch": 4.998758997269794, "grad_norm": 1.263638038105288, "kl": 0.09698486328125, "learning_rate": 5.080445025332837e-07, "loss": -0.00285749277099967, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2515, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 60.38541793823242, "completions/min_length": 30.5, "epoch": 5.00198560436833, "grad_norm": 0.00495686285155662, "kl": 0.08428955078125, "learning_rate": 5.077290574886117e-07, "loss": 8.42295921756886e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2516, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.875, "completions/mean_length": 58.625001430511475, "completions/min_length": 27.625, "epoch": 5.003971208736659, "grad_norm": 0.004849195416877281, "kl": 0.10198974609375, "learning_rate": 5.074136093668371e-07, "loss": 0.00010205099533777684, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2517, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.75, "completions/mean_length": 69.84375143051147, "completions/min_length": 32.125, "epoch": 5.005956813104989, "grad_norm": 0.004124959605075942, "kl": 0.098876953125, "learning_rate": 5.070981582935461e-07, "loss": 9.879124991130084e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2518, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.375, "completions/mean_length": 67.52083492279053, "completions/min_length": 30.5, "epoch": 5.007942417473318, "grad_norm": 0.007869952992425459, "kl": 0.1163330078125, "learning_rate": 5.067827043943265e-07, "loss": 0.00011634735710686073, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2519, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.125, "completions/mean_length": 60.76041841506958, "completions/min_length": 29.0, "epoch": 5.009928021841648, "grad_norm": 0.004514639479132769, "kl": 0.0968017578125, "learning_rate": 5.064672477947674e-07, "loss": 9.674839384388179e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2520, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 70.2916693687439, "completions/min_length": 29.5, "epoch": 5.011913626209978, "grad_norm": 2.6740408899220762, "kl": 0.11004638671875, "learning_rate": 5.061517886204592e-07, "loss": 0.006533198058605194, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.06846532225608826, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.17834587395191193, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2521, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 63.43750238418579, "completions/min_length": 31.75, "epoch": 5.013899230578307, "grad_norm": 0.006898258000778394, "kl": 0.1104736328125, "learning_rate": 5.058363269969921e-07, "loss": 0.00011039745004381984, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2522, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 70.04166889190674, "completions/min_length": 31.5, "epoch": 5.015884834946637, "grad_norm": 0.00654909731383044, "kl": 0.111785888671875, "learning_rate": 5.055208630499588e-07, "loss": 0.00011178172280779108, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2523, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.375, "completions/mean_length": 55.947917461395264, "completions/min_length": 29.0, "epoch": 5.017870439314967, "grad_norm": 2.7380943180690016, "kl": 0.0997314453125, "learning_rate": 5.052053969049519e-07, "loss": 0.005968490149825811, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2524, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.875, "completions/mean_length": 64.73958539962769, "completions/min_length": 30.375, "epoch": 5.019856043683296, "grad_norm": 0.007934944139124055, "kl": 0.1082763671875, "learning_rate": 5.048899286875655e-07, "loss": 0.00010820224997587502, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2525, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.875, "completions/mean_length": 59.35416793823242, "completions/min_length": 28.375, "epoch": 5.021841648051626, "grad_norm": 0.0064653226573720135, "kl": 0.10858154296875, "learning_rate": 5.045744585233937e-07, "loss": 0.00010868739627767354, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2526, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.875, "completions/mean_length": 62.572919845581055, "completions/min_length": 31.375, "epoch": 5.023827252419955, "grad_norm": 2.1455342436718032, "kl": 0.1002197265625, "learning_rate": 5.042589865380325e-07, "loss": 0.0008290037512779236, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2527, "train_speed(iter/s)": 0.022717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 60.55208492279053, "completions/min_length": 31.125, "epoch": 5.025812856788285, "grad_norm": 0.005044840531814492, "kl": 0.093353271484375, "learning_rate": 5.039435128570778e-07, "loss": 9.343147394247353e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2528, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.875, "completions/mean_length": 73.46875286102295, "completions/min_length": 28.875, "epoch": 5.027798461156615, "grad_norm": 0.006163497835603591, "kl": 0.100372314453125, "learning_rate": 5.036280376061264e-07, "loss": 0.00010037500032922253, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2529, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.875, "completions/mean_length": 69.97916793823242, "completions/min_length": 30.375, "epoch": 5.029784065524944, "grad_norm": 0.007583189517541824, "kl": 0.1162109375, "learning_rate": 5.033125609107757e-07, "loss": 0.00011631062807282433, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2530, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 66.57291889190674, "completions/min_length": 30.375, "epoch": 5.031769669893274, "grad_norm": 0.004714270366371135, "kl": 0.105712890625, "learning_rate": 5.029970828966236e-07, "loss": 0.0001056095861713402, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2531, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.75, "completions/mean_length": 83.739586353302, "completions/min_length": 31.125, "epoch": 5.033755274261603, "grad_norm": 0.005708425257391247, "kl": 0.12255859375, "learning_rate": 5.02681603689269e-07, "loss": 0.00012249842984601855, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2532, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 64.87500143051147, "completions/min_length": 28.5, "epoch": 5.035740878629933, "grad_norm": 0.004570930730190933, "kl": 0.100189208984375, "learning_rate": 5.023661234143106e-07, "loss": 0.00010015173756983131, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2533, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.875, "completions/mean_length": 55.562500953674316, "completions/min_length": 27.25, "epoch": 5.037726482998263, "grad_norm": 0.006616829847489677, "kl": 0.109161376953125, "learning_rate": 5.020506421973479e-07, "loss": 0.00010916464816546068, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2534, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.625, "completions/mean_length": 65.89583539962769, "completions/min_length": 27.125, "epoch": 5.039712087366592, "grad_norm": 0.0034828922211539307, "kl": 0.0919189453125, "learning_rate": 5.017351601639808e-07, "loss": 9.19168523978442e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2535, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 63.44791889190674, "completions/min_length": 31.0, "epoch": 5.041697691734922, "grad_norm": 0.0051874652623181425, "kl": 0.097686767578125, "learning_rate": 5.014196774398093e-07, "loss": 9.773996862350032e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2536, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 65.20833492279053, "completions/min_length": 29.75, "epoch": 5.043683296103252, "grad_norm": 1.9653830777598411, "kl": 0.38671875, "learning_rate": 5.01104194150434e-07, "loss": -0.0030101314187049866, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2537, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 61.875000953674316, "completions/min_length": 28.375, "epoch": 5.045668900471581, "grad_norm": 0.007699093583365529, "kl": 0.09490966796875, "learning_rate": 5.007887104214553e-07, "loss": 9.496343409409747e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2538, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 66.62500095367432, "completions/min_length": 29.75, "epoch": 5.047654504839911, "grad_norm": 0.003927864273868547, "kl": 0.098236083984375, "learning_rate": 5.004732263784741e-07, "loss": 9.817550017032772e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2539, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 67.05208492279053, "completions/min_length": 29.125, "epoch": 5.04964010920824, "grad_norm": 1.2703279602329227, "kl": 0.10308837890625, "learning_rate": 5.001577421470915e-07, "loss": -0.009337382391095161, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166669771075, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2540, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.375, "completions/mean_length": 67.42708492279053, "completions/min_length": 32.875, "epoch": 5.05162571357657, "grad_norm": 1.6264383387918246, "kl": 0.1051025390625, "learning_rate": 4.998422578529084e-07, "loss": -0.003819418605417013, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2541, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 72.43750190734863, "completions/min_length": 31.75, "epoch": 5.0536113179449, "grad_norm": 0.004149466904815951, "kl": 0.094024658203125, "learning_rate": 4.995267736215257e-07, "loss": 9.400384442415088e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2542, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 65.57291889190674, "completions/min_length": 29.625, "epoch": 5.055596922313229, "grad_norm": 0.020291987098361323, "kl": 0.115631103515625, "learning_rate": 4.992112895785447e-07, "loss": 0.00011562337022041902, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2543, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.875, "completions/mean_length": 67.29166841506958, "completions/min_length": 28.625, "epoch": 5.057582526681559, "grad_norm": 0.023178408926272404, "kl": 0.1390380859375, "learning_rate": 4.98895805849566e-07, "loss": 0.0001389832905260846, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2544, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 67.63541841506958, "completions/min_length": 32.375, "epoch": 5.059568131049888, "grad_norm": 0.004945961044770626, "kl": 0.10650634765625, "learning_rate": 4.985803225601908e-07, "loss": 0.00010636872320901603, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2545, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 70.38541889190674, "completions/min_length": 32.5, "epoch": 5.061553735418218, "grad_norm": 0.003946838572823769, "kl": 0.10302734375, "learning_rate": 4.982648398360193e-07, "loss": 0.00010314557584933937, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2546, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.875, "completions/mean_length": 61.81250190734863, "completions/min_length": 31.5, "epoch": 5.063539339786548, "grad_norm": 0.004165742620855547, "kl": 0.101654052734375, "learning_rate": 4.979493578026522e-07, "loss": 0.00010163609113078564, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2547, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.25, "completions/mean_length": 69.52083539962769, "completions/min_length": 29.75, "epoch": 5.065524944154877, "grad_norm": 0.004493888874728538, "kl": 0.08056640625, "learning_rate": 4.976338765856895e-07, "loss": 8.054426871240139e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2548, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.75, "completions/mean_length": 66.97916889190674, "completions/min_length": 32.0, "epoch": 5.067510548523207, "grad_norm": 0.006237478390847987, "kl": 0.123779296875, "learning_rate": 4.97318396310731e-07, "loss": 0.0001237476826645434, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2549, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 68.70833587646484, "completions/min_length": 29.0, "epoch": 5.069496152891537, "grad_norm": 0.004238779272408308, "kl": 0.124847412109375, "learning_rate": 4.970029171033763e-07, "loss": 0.00012498561409302056, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2550, "train_speed(iter/s)": 0.022723 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 246.125, "completions/mean_length": 76.25000238418579, "completions/min_length": 30.625, "epoch": 5.071481757259866, "grad_norm": 1.0745390531959413, "kl": 0.14013671875, "learning_rate": 4.966874390892243e-07, "loss": 0.02296457812190056, "memory(GiB)": 94.21, "reward": 1.5833333432674408, "reward_std": 0.08330589532852173, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.30885961651802063, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 2551, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.625, "completions/mean_length": 68.57291889190674, "completions/min_length": 29.875, "epoch": 5.073467361628196, "grad_norm": 0.0032185030285077965, "kl": 0.082061767578125, "learning_rate": 4.963719623938737e-07, "loss": 8.208003418985754e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2552, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.625, "completions/mean_length": 64.27083444595337, "completions/min_length": 31.0, "epoch": 5.075452965996525, "grad_norm": 0.003599994897597821, "kl": 0.0941162109375, "learning_rate": 4.960564871429222e-07, "loss": 9.41462567425333e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2553, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.875, "completions/mean_length": 63.94791889190674, "completions/min_length": 31.125, "epoch": 5.077438570364855, "grad_norm": 0.004342573103799313, "kl": 0.09063720703125, "learning_rate": 4.957410134619675e-07, "loss": 9.062183380592614e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2554, "train_speed(iter/s)": 0.022722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 70.37500238418579, "completions/min_length": 27.375, "epoch": 5.079424174733185, "grad_norm": 0.005822693555720189, "kl": 0.099609375, "learning_rate": 4.954255414766061e-07, "loss": 9.948282968252897e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2555, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 71.44791841506958, "completions/min_length": 32.125, "epoch": 5.081409779101514, "grad_norm": 0.00457032319740269, "kl": 0.0821533203125, "learning_rate": 4.951100713124345e-07, "loss": 8.20490822661668e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2556, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.5, "completions/mean_length": 73.79166889190674, "completions/min_length": 31.125, "epoch": 5.083395383469844, "grad_norm": 0.006224522880348564, "kl": 0.115478515625, "learning_rate": 4.94794603095048e-07, "loss": 0.00011529694893397391, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2557, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.875, "completions/mean_length": 62.89583492279053, "completions/min_length": 30.375, "epoch": 5.085380987838173, "grad_norm": 0.004684628436406768, "kl": 0.0975341796875, "learning_rate": 4.944791369500413e-07, "loss": 9.747844160301611e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2558, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 72.29166841506958, "completions/min_length": 33.25, "epoch": 5.087366592206503, "grad_norm": 0.003619857538395175, "kl": 0.0911865234375, "learning_rate": 4.941636730030078e-07, "loss": 9.106974903261289e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2559, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.0, "completions/mean_length": 70.57291889190674, "completions/min_length": 32.625, "epoch": 5.089352196574833, "grad_norm": 1.2921302040655736, "kl": 0.126220703125, "learning_rate": 4.93848211379541e-07, "loss": 0.002803318202495575, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666679084301, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2560, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.875, "completions/mean_length": 68.22916984558105, "completions/min_length": 28.0, "epoch": 5.091337800943162, "grad_norm": 1.1764317021676807, "kl": 0.13055419921875, "learning_rate": 4.935327522052325e-07, "loss": -0.0032629664056003094, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2561, "train_speed(iter/s)": 0.022721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.375, "completions/mean_length": 78.27083444595337, "completions/min_length": 28.375, "epoch": 5.093323405311492, "grad_norm": 0.9938929282267596, "kl": 0.10723876953125, "learning_rate": 4.932172956056734e-07, "loss": 0.010715566575527191, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2562, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 70.97916841506958, "completions/min_length": 34.625, "epoch": 5.095309009679822, "grad_norm": 0.003754987558670675, "kl": 0.10089111328125, "learning_rate": 4.929018417064539e-07, "loss": 0.00010075596946990117, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2563, "train_speed(iter/s)": 0.02272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.875, "completions/mean_length": 71.34375190734863, "completions/min_length": 30.25, "epoch": 5.097294614048151, "grad_norm": 0.008900418972150936, "kl": 0.10845947265625, "learning_rate": 4.92586390633163e-07, "loss": 0.00010846446821233258, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2564, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 70.76041793823242, "completions/min_length": 32.125, "epoch": 5.099280218416481, "grad_norm": 0.03464225694956673, "kl": 0.092041015625, "learning_rate": 4.922709425113883e-07, "loss": 9.195441816700622e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2565, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 64.87500095367432, "completions/min_length": 28.75, "epoch": 5.10126582278481, "grad_norm": 1.5202377386924777, "kl": 0.236419677734375, "learning_rate": 4.919554974667164e-07, "loss": -0.008266448974609375, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2566, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 72.78125238418579, "completions/min_length": 28.125, "epoch": 5.10325142715314, "grad_norm": 0.0064623974494811675, "kl": 0.11212158203125, "learning_rate": 4.916400556247327e-07, "loss": 0.00011209688091184944, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2567, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 70.33333539962769, "completions/min_length": 31.25, "epoch": 5.10523703152147, "grad_norm": 0.004991632533892316, "kl": 0.107666015625, "learning_rate": 4.913246171110215e-07, "loss": 0.0001076382104656659, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2568, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.625, "completions/mean_length": 77.48958539962769, "completions/min_length": 30.375, "epoch": 5.107222635889799, "grad_norm": 0.005269814049016017, "kl": 0.12255859375, "learning_rate": 4.910091820511653e-07, "loss": 0.00012264422548469156, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2569, "train_speed(iter/s)": 0.022719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 71.30208492279053, "completions/min_length": 32.5, "epoch": 5.109208240258129, "grad_norm": 0.9829264788720672, "kl": 0.0919189453125, "learning_rate": 4.90693750570746e-07, "loss": -0.008390005677938461, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2570, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.375, "completions/mean_length": 75.31250238418579, "completions/min_length": 32.75, "epoch": 5.111193844626458, "grad_norm": 1.396398857368396, "kl": 0.1234130859375, "learning_rate": 4.903783227953433e-07, "loss": -0.00040790438652038574, "memory(GiB)": 94.21, "reward": 1.6458333432674408, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.645833333954215, "rewards/CineAccuracyORM/std": 0.30977265536785126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2571, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.625, "completions/mean_length": 67.64583444595337, "completions/min_length": 23.5, "epoch": 5.113179448994788, "grad_norm": 0.004563685966345267, "kl": 0.090576171875, "learning_rate": 4.900628988505359e-07, "loss": 9.065106132766232e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2572, "train_speed(iter/s)": 0.022717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 65.6041693687439, "completions/min_length": 30.0, "epoch": 5.115165053363118, "grad_norm": 0.0050242112121208484, "kl": 0.096832275390625, "learning_rate": 4.897474788619006e-07, "loss": 9.683077223598957e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2573, "train_speed(iter/s)": 0.022717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 66.46875143051147, "completions/min_length": 27.75, "epoch": 5.117150657731447, "grad_norm": 0.0034882707821251575, "kl": 0.11846923828125, "learning_rate": 4.894320629550132e-07, "loss": 0.00011829341383418068, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2574, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 63.031251430511475, "completions/min_length": 28.625, "epoch": 5.119136262099777, "grad_norm": 0.010017185729204422, "kl": 0.0941162109375, "learning_rate": 4.891166512554471e-07, "loss": 9.421360300621018e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2575, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 69.20833587646484, "completions/min_length": 32.25, "epoch": 5.121121866468107, "grad_norm": 0.9940637560083075, "kl": 0.101318359375, "learning_rate": 4.888012438887744e-07, "loss": 0.0015658674528822303, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2576, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.375, "completions/mean_length": 71.22916841506958, "completions/min_length": 30.75, "epoch": 5.123107470836436, "grad_norm": 0.00349634987423695, "kl": 0.088592529296875, "learning_rate": 4.884858409805659e-07, "loss": 8.854376937961206e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2577, "train_speed(iter/s)": 0.022718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 82.92708539962769, "completions/min_length": 39.0, "epoch": 5.125093075204766, "grad_norm": 1.320022766440065, "kl": 0.1160888671875, "learning_rate": 4.881704426563902e-07, "loss": -0.004320295061916113, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2578, "train_speed(iter/s)": 0.022717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.0, "completions/mean_length": 80.23958492279053, "completions/min_length": 33.375, "epoch": 5.127078679573095, "grad_norm": 0.0036361284087622525, "kl": 0.10821533203125, "learning_rate": 4.878550490418137e-07, "loss": 0.00010807962098624557, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2579, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 69.00000286102295, "completions/min_length": 26.875, "epoch": 5.129064283941425, "grad_norm": 0.004746080629299705, "kl": 0.1199951171875, "learning_rate": 4.875396602624017e-07, "loss": 0.00011998966510873288, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2580, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.875, "completions/mean_length": 77.73958587646484, "completions/min_length": 35.25, "epoch": 5.131049888309755, "grad_norm": 0.007535678592219246, "kl": 0.10858154296875, "learning_rate": 4.872242764437171e-07, "loss": 0.00010861566261155531, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2581, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 82.71875286102295, "completions/min_length": 31.375, "epoch": 5.133035492678084, "grad_norm": 0.8705752531512639, "kl": 0.09991455078125, "learning_rate": 4.869088977113207e-07, "loss": 9.991849947255105e-05, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2582, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.625, "completions/mean_length": 73.58333539962769, "completions/min_length": 33.0, "epoch": 5.135021097046414, "grad_norm": 0.004163078036784345, "kl": 0.08905029296875, "learning_rate": 4.86593524190772e-07, "loss": 8.906162111088634e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2583, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 70.66666841506958, "completions/min_length": 32.125, "epoch": 5.137006701414743, "grad_norm": 0.011931836470542755, "kl": 0.11639404296875, "learning_rate": 4.862781560076276e-07, "loss": 0.00011633748363237828, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2584, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 78.9791693687439, "completions/min_length": 34.375, "epoch": 5.138992305783073, "grad_norm": 0.0037652022100950295, "kl": 0.108123779296875, "learning_rate": 4.859627932874425e-07, "loss": 0.0001079783687600866, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2585, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.625, "completions/mean_length": 77.00000238418579, "completions/min_length": 29.0, "epoch": 5.140977910151403, "grad_norm": 0.003763644546763978, "kl": 0.09405517578125, "learning_rate": 4.856474361557691e-07, "loss": 9.412574581801891e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2586, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.125, "completions/mean_length": 73.93750143051147, "completions/min_length": 28.25, "epoch": 5.142963514519732, "grad_norm": 0.0036903668686288885, "kl": 0.09393310546875, "learning_rate": 4.85332084738158e-07, "loss": 9.381485142512247e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2587, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.5, "completions/mean_length": 70.145836353302, "completions/min_length": 29.875, "epoch": 5.144949118888062, "grad_norm": 0.004692676291984332, "kl": 0.085723876953125, "learning_rate": 4.850167391601573e-07, "loss": 8.578070992371067e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2588, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.375, "completions/mean_length": 67.31250190734863, "completions/min_length": 30.625, "epoch": 5.146934723256392, "grad_norm": 0.16010160207592325, "kl": 0.183502197265625, "learning_rate": 4.847013995473124e-07, "loss": 0.00018308368453290313, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2589, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 73.32291889190674, "completions/min_length": 31.125, "epoch": 5.148920327624721, "grad_norm": 0.0036904766295732475, "kl": 0.10711669921875, "learning_rate": 4.843860660251675e-07, "loss": 0.00010705438035074621, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2590, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.5, "completions/mean_length": 72.10417032241821, "completions/min_length": 28.375, "epoch": 5.150905931993051, "grad_norm": 0.00852112985528789, "kl": 0.11773681640625, "learning_rate": 4.840707387192631e-07, "loss": 0.0001178392194560729, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2591, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.875, "completions/mean_length": 73.41666793823242, "completions/min_length": 32.625, "epoch": 5.15289153636138, "grad_norm": 0.005945406501459239, "kl": 0.09442138671875, "learning_rate": 4.837554177551379e-07, "loss": 9.440961002837867e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2592, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.875, "completions/mean_length": 74.03125286102295, "completions/min_length": 37.25, "epoch": 5.15487714072971, "grad_norm": 0.005295513267288697, "kl": 0.11273193359375, "learning_rate": 4.834401032583279e-07, "loss": 0.0001126797214965336, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2593, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.875, "completions/mean_length": 72.15625286102295, "completions/min_length": 33.875, "epoch": 5.1568627450980395, "grad_norm": 0.004223853939786181, "kl": 0.1029052734375, "learning_rate": 4.831247953543665e-07, "loss": 0.00010293864033883438, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2594, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 63.54166841506958, "completions/min_length": 28.125, "epoch": 5.158848349466369, "grad_norm": 0.007317113593867267, "kl": 0.104248046875, "learning_rate": 4.828094941687845e-07, "loss": 0.00010427868983242661, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2595, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 69.96875238418579, "completions/min_length": 32.5, "epoch": 5.160833953834699, "grad_norm": 0.006002254546329571, "kl": 0.09857177734375, "learning_rate": 4.824941998271098e-07, "loss": 9.859363490249962e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2596, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.75, "completions/mean_length": 66.10416841506958, "completions/min_length": 28.125, "epoch": 5.162819558203028, "grad_norm": 0.006945710791103894, "kl": 0.09759521484375, "learning_rate": 4.821789124548684e-07, "loss": 9.772012708708644e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2597, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 66.2291693687439, "completions/min_length": 28.0, "epoch": 5.164805162571358, "grad_norm": 0.043844183191015844, "kl": 0.1370849609375, "learning_rate": 4.818636321775826e-07, "loss": 0.00013703471631743014, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2598, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 64.84375190734863, "completions/min_length": 26.875, "epoch": 5.1667907669396875, "grad_norm": 0.710353268314275, "kl": 0.0999755859375, "learning_rate": 4.81548359120772e-07, "loss": -0.001494353055022657, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2599, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 72.10416793823242, "completions/min_length": 33.0, "epoch": 5.168776371308017, "grad_norm": 1.2268984208000684, "kl": 0.107269287109375, "learning_rate": 4.812330934099539e-07, "loss": -0.007672019302845001, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2600, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.25, "completions/mean_length": 73.50000190734863, "completions/min_length": 34.125, "epoch": 5.1707619756763465, "grad_norm": 0.004836426166943891, "kl": 0.0980224609375, "learning_rate": 4.809178351706421e-07, "loss": 9.809209586819634e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2601, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 74.95833492279053, "completions/min_length": 34.0, "epoch": 5.1727475800446765, "grad_norm": 0.0046030617352338585, "kl": 0.107666015625, "learning_rate": 4.806025845283478e-07, "loss": 0.00010756956180557609, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2602, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.5, "completions/mean_length": 73.67708587646484, "completions/min_length": 30.125, "epoch": 5.174733184413006, "grad_norm": 0.005731457060869514, "kl": 0.09820556640625, "learning_rate": 4.802873416085787e-07, "loss": 9.825353481573984e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2603, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.625, "completions/mean_length": 66.82291746139526, "completions/min_length": 30.5, "epoch": 5.1767187887813355, "grad_norm": 0.007159933235794413, "kl": 0.11114501953125, "learning_rate": 4.7997210653684e-07, "loss": 0.00011110660852864385, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2604, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.5, "completions/mean_length": 68.46875190734863, "completions/min_length": 32.5, "epoch": 5.178704393149665, "grad_norm": 0.006762124669427222, "kl": 0.109954833984375, "learning_rate": 4.796568794386337e-07, "loss": 0.00010990996088366956, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2605, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.375, "completions/mean_length": 77.30208539962769, "completions/min_length": 30.0, "epoch": 5.1806899975179945, "grad_norm": 0.0034161967942836265, "kl": 0.10052490234375, "learning_rate": 4.793416604394581e-07, "loss": 0.00010052922152681276, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2606, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 74.36458539962769, "completions/min_length": 32.625, "epoch": 5.1826756018863245, "grad_norm": 1.7620352293194859, "kl": 0.11260986328125, "learning_rate": 4.790264496648087e-07, "loss": -0.003977940417826176, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2607, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.625, "completions/mean_length": 73.13541793823242, "completions/min_length": 34.25, "epoch": 5.1846612062546535, "grad_norm": 0.006612331233886834, "kl": 0.1142578125, "learning_rate": 4.787112472401778e-07, "loss": 0.00011414119944674894, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2608, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.25, "completions/mean_length": 63.97916841506958, "completions/min_length": 36.25, "epoch": 5.1866468106229835, "grad_norm": 0.004612474305952471, "kl": 0.093505859375, "learning_rate": 4.783960532910539e-07, "loss": 9.349848551210016e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2609, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 66.16666889190674, "completions/min_length": 30.125, "epoch": 5.188632414991313, "grad_norm": 0.0071084556070459696, "kl": 0.09454345703125, "learning_rate": 4.780808679429227e-07, "loss": 9.448091441299766e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2610, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.625, "completions/mean_length": 59.250001430511475, "completions/min_length": 28.5, "epoch": 5.1906180193596425, "grad_norm": 0.005476488372901364, "kl": 0.101776123046875, "learning_rate": 4.777656913212661e-07, "loss": 0.00010169532470172271, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2611, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 72.82291889190674, "completions/min_length": 34.125, "epoch": 5.1926036237279725, "grad_norm": 1.0597091670071848, "kl": 0.09930419921875, "learning_rate": 4.774505235515627e-07, "loss": -0.00698669021949172, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2612, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 78.31250286102295, "completions/min_length": 32.25, "epoch": 5.1945892280963015, "grad_norm": 0.0038470947091824846, "kl": 0.11309814453125, "learning_rate": 4.771353647592877e-07, "loss": 0.00011326118692522869, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2613, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.875, "completions/mean_length": 72.12500238418579, "completions/min_length": 30.375, "epoch": 5.1965748324646315, "grad_norm": 0.005765951218875941, "kl": 0.105224609375, "learning_rate": 4.7682021506991234e-07, "loss": 0.00010529119754210114, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2614, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.75, "completions/mean_length": 64.2916669845581, "completions/min_length": 28.0, "epoch": 5.198560436832961, "grad_norm": 0.005293132131858503, "kl": 0.09222412109375, "learning_rate": 4.765050746089044e-07, "loss": 9.2321504780557e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2615, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.875, "completions/mean_length": 70.47916793823242, "completions/min_length": 32.75, "epoch": 5.2005460412012905, "grad_norm": 0.005480880290834572, "kl": 0.0919189453125, "learning_rate": 4.7618994350172804e-07, "loss": 9.197047620546073e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2616, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 80.48958587646484, "completions/min_length": 35.0, "epoch": 5.2025316455696204, "grad_norm": 0.005516743594659246, "kl": 0.10064697265625, "learning_rate": 4.758748218738437e-07, "loss": 0.00010073870362248272, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2617, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.875, "completions/mean_length": 76.38541841506958, "completions/min_length": 32.375, "epoch": 5.2045172499379495, "grad_norm": 0.006437585680345584, "kl": 0.1337890625, "learning_rate": 4.755597098507081e-07, "loss": 0.00013380989548750222, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2618, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 68.94792032241821, "completions/min_length": 33.625, "epoch": 5.2065028543062795, "grad_norm": 0.0051204374356839475, "kl": 0.12884521484375, "learning_rate": 4.7524460755777416e-07, "loss": 0.00012870071805082262, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2619, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 61.343751430511475, "completions/min_length": 30.625, "epoch": 5.208488458674609, "grad_norm": 0.0047993589187518, "kl": 0.103759765625, "learning_rate": 4.7492951512049067e-07, "loss": 0.0001037592810462229, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2620, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 64.15625143051147, "completions/min_length": 32.875, "epoch": 5.2104740630429385, "grad_norm": 0.006071364234428801, "kl": 0.08392333984375, "learning_rate": 4.7461443266430266e-07, "loss": 8.384877583011985e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2621, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.875, "completions/mean_length": 61.70833492279053, "completions/min_length": 27.0, "epoch": 5.212459667411268, "grad_norm": 0.006307880013792626, "kl": 0.09686279296875, "learning_rate": 4.742993603146514e-07, "loss": 9.688713180366904e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2622, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 70.18750190734863, "completions/min_length": 30.625, "epoch": 5.2144452717795975, "grad_norm": 1.1016312246869608, "kl": 0.466827392578125, "learning_rate": 4.7398429819697363e-07, "loss": 0.0013472680002450943, "memory(GiB)": 94.21, "reward": 1.6145833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6145833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2623, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 64.12500333786011, "completions/min_length": 23.25, "epoch": 5.216430876147927, "grad_norm": 0.005868808569508863, "kl": 0.107269287109375, "learning_rate": 4.7366924643670213e-07, "loss": 0.00010730304347816855, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2624, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 70.79166984558105, "completions/min_length": 29.625, "epoch": 5.218416480516257, "grad_norm": 0.004022996270207801, "kl": 0.102569580078125, "learning_rate": 4.7335420515926643e-07, "loss": 0.00010262371506541967, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2625, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 67.0729193687439, "completions/min_length": 32.75, "epoch": 5.2204020848845865, "grad_norm": 0.004658142665238755, "kl": 0.09912109375, "learning_rate": 4.7303917449009075e-07, "loss": 9.901373414322734e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2626, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.375, "completions/mean_length": 68.71875238418579, "completions/min_length": 27.375, "epoch": 5.222387689252916, "grad_norm": 0.00399450224956877, "kl": 0.105987548828125, "learning_rate": 4.7272415455459545e-07, "loss": 0.00010581130482023582, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2627, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.375, "completions/mean_length": 57.156250953674316, "completions/min_length": 28.625, "epoch": 5.224373293621246, "grad_norm": 0.005320007114002999, "kl": 0.101318359375, "learning_rate": 4.724091454781969e-07, "loss": 0.00010140084486920387, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2628, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.5, "completions/mean_length": 65.96875238418579, "completions/min_length": 32.375, "epoch": 5.226358897989575, "grad_norm": 0.003597931263653341, "kl": 0.08306884765625, "learning_rate": 4.7209414738630684e-07, "loss": 8.3007755165454e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2629, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.875, "completions/mean_length": 72.72916841506958, "completions/min_length": 31.625, "epoch": 5.228344502357905, "grad_norm": 0.8677619530771741, "kl": 0.129425048828125, "learning_rate": 4.7177916040433285e-07, "loss": -1.3140961527824402e-05, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6354166669771075, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2630, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 65.65625190734863, "completions/min_length": 31.875, "epoch": 5.230330106726234, "grad_norm": 0.601444733348804, "kl": 0.11334228515625, "learning_rate": 4.714641846576776e-07, "loss": 0.015820711851119995, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2631, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 69.09375381469727, "completions/min_length": 31.625, "epoch": 5.232315711094564, "grad_norm": 0.004478085129754446, "kl": 0.1082763671875, "learning_rate": 4.7114922027174014e-07, "loss": 0.00010826517245732248, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2632, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 69.72916793823242, "completions/min_length": 35.0, "epoch": 5.234301315462894, "grad_norm": 1.7281601441009582, "kl": 0.09722900390625, "learning_rate": 4.7083426737191433e-07, "loss": 0.0013518178602680564, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.3624799847602844, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2633, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.75, "completions/mean_length": 62.64583492279053, "completions/min_length": 29.875, "epoch": 5.236286919831223, "grad_norm": 0.009172960753181211, "kl": 0.085479736328125, "learning_rate": 4.705193260835898e-07, "loss": 8.555130625609308e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2634, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 74.26041793823242, "completions/min_length": 27.625, "epoch": 5.238272524199553, "grad_norm": 0.008078826512667574, "kl": 0.133392333984375, "learning_rate": 4.702043965321511e-07, "loss": 0.00013350852532312274, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2635, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.0, "completions/mean_length": 64.20833587646484, "completions/min_length": 27.375, "epoch": 5.240258128567882, "grad_norm": 0.005743604620532447, "kl": 0.08721923828125, "learning_rate": 4.698894788429785e-07, "loss": 8.715804142411798e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2636, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.875, "completions/mean_length": 70.03125190734863, "completions/min_length": 36.125, "epoch": 5.242243732936212, "grad_norm": 2.7837359727853075, "kl": 0.113037109375, "learning_rate": 4.6957457314144763e-07, "loss": 0.0014959839172661304, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2637, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.625, "completions/mean_length": 62.19791889190674, "completions/min_length": 30.875, "epoch": 5.244229337304542, "grad_norm": 0.003928589652051801, "kl": 0.10589599609375, "learning_rate": 4.692596795529289e-07, "loss": 0.00010605131683405489, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2638, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 64.34375238418579, "completions/min_length": 28.875, "epoch": 5.246214941672871, "grad_norm": 0.006335306633372392, "kl": 0.10504150390625, "learning_rate": 4.689447982027884e-07, "loss": 0.00010493789159227163, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2639, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 69.61458539962769, "completions/min_length": 27.75, "epoch": 5.248200546041201, "grad_norm": 0.00806029531397031, "kl": 0.11553955078125, "learning_rate": 4.6862992921638715e-07, "loss": 0.00011543689470272511, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2640, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 69.75000095367432, "completions/min_length": 29.5, "epoch": 5.250186150409531, "grad_norm": 0.7667616665562859, "kl": 0.090789794921875, "learning_rate": 4.683150727190811e-07, "loss": -0.002860710024833679, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2641, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 71.0416693687439, "completions/min_length": 34.125, "epoch": 5.25217175477786, "grad_norm": 0.005673957075700008, "kl": 0.12005615234375, "learning_rate": 4.6800022883622146e-07, "loss": 0.00012004860764136538, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2642, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.875, "completions/mean_length": 65.00000238418579, "completions/min_length": 34.875, "epoch": 5.25415735914619, "grad_norm": 0.9480174734421917, "kl": 0.1240234375, "learning_rate": 4.676853976931541e-07, "loss": -0.0024216333404183388, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.30977265536785126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2643, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.875, "completions/mean_length": 74.87500238418579, "completions/min_length": 28.0, "epoch": 5.256142963514519, "grad_norm": 0.011234550629193851, "kl": 0.1002197265625, "learning_rate": 4.673705794152202e-07, "loss": 0.00010017223394243047, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2644, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 70.79166841506958, "completions/min_length": 32.5, "epoch": 5.258128567882849, "grad_norm": 0.9134315240616419, "kl": 0.1092529296875, "learning_rate": 4.670557741277554e-07, "loss": 6.292884791037068e-05, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.770833333954215, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2645, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.375, "completions/mean_length": 71.61458587646484, "completions/min_length": 36.125, "epoch": 5.260114172251179, "grad_norm": 0.0038829953407312034, "kl": 0.09124755859375, "learning_rate": 4.667409819560908e-07, "loss": 9.123456402448937e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2646, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.0, "completions/mean_length": 58.677085876464844, "completions/min_length": 26.125, "epoch": 5.262099776619508, "grad_norm": 0.003763861756350485, "kl": 0.1295166015625, "learning_rate": 4.6642620302555156e-07, "loss": 0.00012935773702338338, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2647, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 65.19791793823242, "completions/min_length": 27.875, "epoch": 5.264085380987838, "grad_norm": 0.004885265924074659, "kl": 0.104888916015625, "learning_rate": 4.661114374614581e-07, "loss": 0.00010491409193491563, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2648, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 66.70833539962769, "completions/min_length": 26.875, "epoch": 5.266070985356167, "grad_norm": 0.8058727246602714, "kl": 0.1195068359375, "learning_rate": 4.657966853891252e-07, "loss": 0.006727383937686682, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2649, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.375, "completions/mean_length": 70.91666984558105, "completions/min_length": 32.25, "epoch": 5.268056589724497, "grad_norm": 1.1097048789467958, "kl": 0.08563232421875, "learning_rate": 4.6548194693386253e-07, "loss": -0.004129012115299702, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2650, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 65.71875095367432, "completions/min_length": 32.0, "epoch": 5.270042194092827, "grad_norm": 0.0034455468977714772, "kl": 0.10345458984375, "learning_rate": 4.6516722222097375e-07, "loss": 0.00010354255209676921, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2651, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 69.05208492279053, "completions/min_length": 30.0, "epoch": 5.272027798461156, "grad_norm": 1.207912891456734, "kl": 0.120361328125, "learning_rate": 4.6485251137575804e-07, "loss": 0.007184509187936783, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166669771075, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2652, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 69.70833444595337, "completions/min_length": 33.5, "epoch": 5.274013402829486, "grad_norm": 0.0052535366879299245, "kl": 0.105133056640625, "learning_rate": 4.6453781452350825e-07, "loss": 0.00010509626008570194, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2653, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 72.72916984558105, "completions/min_length": 30.875, "epoch": 5.275999007197816, "grad_norm": 1.0883078647094717, "kl": 0.1103515625, "learning_rate": 4.642231317895121e-07, "loss": -0.0037160050123929977, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2654, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 70.31250238418579, "completions/min_length": 30.125, "epoch": 5.277984611566145, "grad_norm": 0.004094744670164528, "kl": 0.114990234375, "learning_rate": 4.639084632990512e-07, "loss": 0.00011494129284983501, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2655, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 67.22916889190674, "completions/min_length": 29.75, "epoch": 5.279970215934475, "grad_norm": 0.0034838891279340436, "kl": 0.09283447265625, "learning_rate": 4.63593809177402e-07, "loss": 9.281394159188494e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2656, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.125, "completions/mean_length": 64.04166984558105, "completions/min_length": 30.75, "epoch": 5.281955820302804, "grad_norm": 0.00645498781148834, "kl": 0.08660888671875, "learning_rate": 4.63279169549835e-07, "loss": 8.655428973725066e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2657, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 67.07291793823242, "completions/min_length": 28.375, "epoch": 5.283941424671134, "grad_norm": 0.010495825577192846, "kl": 0.097991943359375, "learning_rate": 4.629645445416148e-07, "loss": 9.803565626498312e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2658, "train_speed(iter/s)": 0.022716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 71.11458539962769, "completions/min_length": 32.25, "epoch": 5.285927029039464, "grad_norm": 0.0038338828230228164, "kl": 0.082305908203125, "learning_rate": 4.626499342780006e-07, "loss": 8.228716615121812e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2659, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 67.64583444595337, "completions/min_length": 34.625, "epoch": 5.287912633407793, "grad_norm": 0.003817949493188409, "kl": 0.0902099609375, "learning_rate": 4.623353388842453e-07, "loss": 9.016209514811635e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2660, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 77.76041984558105, "completions/min_length": 33.375, "epoch": 5.289898237776123, "grad_norm": 0.0039011975311036514, "kl": 0.1116943359375, "learning_rate": 4.6202075848559615e-07, "loss": 0.0001117331994464621, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2661, "train_speed(iter/s)": 0.022715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 65.81250238418579, "completions/min_length": 30.125, "epoch": 5.291883842144452, "grad_norm": 0.006911450215343453, "kl": 0.091064453125, "learning_rate": 4.6170619320729435e-07, "loss": 9.098846203414723e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2662, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 70.17708396911621, "completions/min_length": 34.875, "epoch": 5.293869446512782, "grad_norm": 0.003645681409957642, "kl": 0.0985107421875, "learning_rate": 4.613916431745749e-07, "loss": 9.846442117122933e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2663, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.875, "completions/mean_length": 75.00000238418579, "completions/min_length": 34.125, "epoch": 5.295855050881112, "grad_norm": 1.0842185962248985, "kl": 0.092529296875, "learning_rate": 4.610771085126669e-07, "loss": 0.01603594794869423, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2664, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.0, "completions/mean_length": 71.48958539962769, "completions/min_length": 33.25, "epoch": 5.297840655249441, "grad_norm": 0.006304608783192985, "kl": 0.10736083984375, "learning_rate": 4.6076258934679326e-07, "loss": 0.00010729036148404703, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2665, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 77.70833683013916, "completions/min_length": 36.875, "epoch": 5.299826259617771, "grad_norm": 1.0294134706974216, "kl": 0.10662841796875, "learning_rate": 4.60448085802171e-07, "loss": 0.008762389421463013, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2666, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 65.6354193687439, "completions/min_length": 30.875, "epoch": 5.301811863986101, "grad_norm": 0.0034014275666769452, "kl": 0.10137939453125, "learning_rate": 4.6013359800401066e-07, "loss": 0.00010149140143766999, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2667, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 65.114586353302, "completions/min_length": 27.5, "epoch": 5.30379746835443, "grad_norm": 0.7136705891887115, "kl": 0.092559814453125, "learning_rate": 4.5981912607751644e-07, "loss": 0.005484148394316435, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2668, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 77.72916984558105, "completions/min_length": 30.0, "epoch": 5.30578307272276, "grad_norm": 0.0057599309424786705, "kl": 0.10968017578125, "learning_rate": 4.5950467014788635e-07, "loss": 0.00010955911420751363, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2669, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 67.95833444595337, "completions/min_length": 28.375, "epoch": 5.307768677091089, "grad_norm": 0.005557647229112153, "kl": 0.11376953125, "learning_rate": 4.591902303403122e-07, "loss": 0.00011361856013536453, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2670, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 68.7291693687439, "completions/min_length": 30.125, "epoch": 5.309754281459419, "grad_norm": 0.0051339243785557525, "kl": 0.10882568359375, "learning_rate": 4.588758067799788e-07, "loss": 0.00010882370406761765, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2671, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 69.29166841506958, "completions/min_length": 31.625, "epoch": 5.311739885827749, "grad_norm": 0.006618099108824196, "kl": 0.10284423828125, "learning_rate": 4.58561399592065e-07, "loss": 0.00010289880447089672, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2672, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 72.29166793823242, "completions/min_length": 35.5, "epoch": 5.313725490196078, "grad_norm": 1.4420723541614189, "kl": 0.0989990234375, "learning_rate": 4.582470089017434e-07, "loss": -0.0015166203957051039, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2673, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.125, "completions/mean_length": 74.03125190734863, "completions/min_length": 30.875, "epoch": 5.315711094564408, "grad_norm": 1.0284370583636606, "kl": 0.1016845703125, "learning_rate": 4.579326348341794e-07, "loss": -0.006248476915061474, "memory(GiB)": 94.21, "reward": 1.9583333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9583333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2674, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 74.48958539962769, "completions/min_length": 36.5, "epoch": 5.317696698932737, "grad_norm": 0.0046209224330496, "kl": 0.09393310546875, "learning_rate": 4.576182775145319e-07, "loss": 9.3873604782857e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2675, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 62.47916793823242, "completions/min_length": 28.875, "epoch": 5.319682303301067, "grad_norm": 0.004443433702788989, "kl": 0.083984375, "learning_rate": 4.573039370679534e-07, "loss": 8.400942897424102e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2676, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 72.489586353302, "completions/min_length": 34.875, "epoch": 5.321667907669397, "grad_norm": 0.004462116333901407, "kl": 0.093505859375, "learning_rate": 4.569896136195895e-07, "loss": 9.333356138085946e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2677, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.25, "completions/mean_length": 63.145835399627686, "completions/min_length": 29.25, "epoch": 5.323653512037726, "grad_norm": 0.00423805904934543, "kl": 0.104736328125, "learning_rate": 4.566753072945791e-07, "loss": 0.0001045277458615601, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2678, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.125, "completions/mean_length": 71.29166889190674, "completions/min_length": 33.875, "epoch": 5.325639116406056, "grad_norm": 0.005030354068867479, "kl": 0.09124755859375, "learning_rate": 4.5636101821805416e-07, "loss": 9.109963139053434e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2679, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.75, "completions/mean_length": 65.43750143051147, "completions/min_length": 29.0, "epoch": 5.327624720774386, "grad_norm": 0.005062764091960789, "kl": 0.08856201171875, "learning_rate": 4.560467465151401e-07, "loss": 8.857058128342032e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2680, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.625, "completions/mean_length": 63.69791889190674, "completions/min_length": 26.0, "epoch": 5.329610325142715, "grad_norm": 0.004766491730719321, "kl": 0.0863037109375, "learning_rate": 4.5573249231095506e-07, "loss": 8.619282016297802e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2681, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 64.46875238418579, "completions/min_length": 26.875, "epoch": 5.331595929511045, "grad_norm": 0.004526369587408466, "kl": 0.09234619140625, "learning_rate": 4.5541825573061045e-07, "loss": 9.22142862691544e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2682, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 240.875, "completions/mean_length": 78.552086353302, "completions/min_length": 33.5, "epoch": 5.333581533879374, "grad_norm": 1.421181422291279, "kl": 0.0811767578125, "learning_rate": 4.551040368992104e-07, "loss": 0.03504526615142822, "memory(GiB)": 94.21, "reward": 1.8437500149011612, "reward_std": 0.0765465572476387, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.20272701978683472, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 2683, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 69.43750286102295, "completions/min_length": 30.0, "epoch": 5.335567138247704, "grad_norm": 0.004339324024029077, "kl": 0.11724853515625, "learning_rate": 4.547898359418522e-07, "loss": 0.00011722946510417387, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2684, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 66.90625095367432, "completions/min_length": 31.625, "epoch": 5.337552742616034, "grad_norm": 0.7031222032366499, "kl": 0.1109619140625, "learning_rate": 4.54475652983626e-07, "loss": -0.007756437640637159, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2685, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.125, "completions/mean_length": 61.07291793823242, "completions/min_length": 30.375, "epoch": 5.339538346984363, "grad_norm": 0.004755163514806301, "kl": 0.091583251953125, "learning_rate": 4.541614881496146e-07, "loss": 9.161501657217741e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2686, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.875, "completions/mean_length": 68.19791984558105, "completions/min_length": 30.625, "epoch": 5.341523951352693, "grad_norm": 0.0042779028794131795, "kl": 0.091796875, "learning_rate": 4.5384734156489394e-07, "loss": 9.182580834021792e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2687, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 72.37500286102295, "completions/min_length": 33.25, "epoch": 5.343509555721022, "grad_norm": 0.004643375402621083, "kl": 0.13031005859375, "learning_rate": 4.5353321335453244e-07, "loss": 0.00013027619570493698, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2688, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 67.09375190734863, "completions/min_length": 32.625, "epoch": 5.345495160089352, "grad_norm": 1.250908987366685, "kl": 0.08953857421875, "learning_rate": 4.5321910364359115e-07, "loss": 0.002979150740429759, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2689, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.125, "completions/mean_length": 74.89583587646484, "completions/min_length": 30.5, "epoch": 5.347480764457682, "grad_norm": 0.005494014440971535, "kl": 0.1156005859375, "learning_rate": 4.529050125571241e-07, "loss": 0.00011558862024685368, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2690, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.375, "completions/mean_length": 70.1354193687439, "completions/min_length": 35.75, "epoch": 5.349466368826011, "grad_norm": 0.009284451392173784, "kl": 0.11395263671875, "learning_rate": 4.5259094022017735e-07, "loss": 0.00011395406909286976, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2691, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.75, "completions/mean_length": 58.56250190734863, "completions/min_length": 28.5, "epoch": 5.351451973194341, "grad_norm": 0.003516243504162406, "kl": 0.076904296875, "learning_rate": 4.5227688675778993e-07, "loss": 7.687011384405196e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2692, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.375, "completions/mean_length": 70.93750238418579, "completions/min_length": 27.75, "epoch": 5.353437577562671, "grad_norm": 1.0759825607814095, "kl": 0.1058349609375, "learning_rate": 4.519628522949931e-07, "loss": -0.007826106622815132, "memory(GiB)": 94.21, "reward": 1.5833333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.583333333954215, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2693, "train_speed(iter/s)": 0.022705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.25, "completions/mean_length": 60.78125190734863, "completions/min_length": 24.25, "epoch": 5.355423181931, "grad_norm": 0.0038787620834825893, "kl": 0.09002685546875, "learning_rate": 4.516488369568109e-07, "loss": 8.985436579678208e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2694, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.125, "completions/mean_length": 75.93750286102295, "completions/min_length": 31.5, "epoch": 5.35740878629933, "grad_norm": 0.0037901288279317247, "kl": 0.0902099609375, "learning_rate": 4.513348408682596e-07, "loss": 9.014080569613725e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2695, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.625, "completions/mean_length": 73.89583492279053, "completions/min_length": 28.75, "epoch": 5.359394390667659, "grad_norm": 0.005275113504058885, "kl": 0.1119384765625, "learning_rate": 4.510208641543475e-07, "loss": 0.00011191405064892024, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2696, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 68.31250143051147, "completions/min_length": 29.25, "epoch": 5.361379995035989, "grad_norm": 0.021379974501245513, "kl": 0.12518310546875, "learning_rate": 4.5070690694007554e-07, "loss": 0.0001250390923814848, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2697, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.125, "completions/mean_length": 63.052086353302, "completions/min_length": 25.75, "epoch": 5.363365599404319, "grad_norm": 0.00412966554760557, "kl": 0.0975341796875, "learning_rate": 4.503929693504368e-07, "loss": 9.764985588844866e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2698, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.125, "completions/mean_length": 64.75000333786011, "completions/min_length": 32.25, "epoch": 5.365351203772648, "grad_norm": 0.003937340794297293, "kl": 0.093841552734375, "learning_rate": 4.5007905151041667e-07, "loss": 9.39401361392811e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2699, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.125, "completions/mean_length": 69.92708587646484, "completions/min_length": 30.625, "epoch": 5.367336808140978, "grad_norm": 0.0037077765540929667, "kl": 0.118408203125, "learning_rate": 4.4976515354499215e-07, "loss": 0.00011831421579699963, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2700, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.75, "completions/mean_length": 69.2916693687439, "completions/min_length": 26.625, "epoch": 5.369322412509307, "grad_norm": 0.0040773690346505055, "kl": 0.0986328125, "learning_rate": 4.494512755791332e-07, "loss": 9.857376426225528e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2701, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.125, "completions/mean_length": 61.88541889190674, "completions/min_length": 28.625, "epoch": 5.371308016877637, "grad_norm": 0.0047566332830194895, "kl": 0.09454345703125, "learning_rate": 4.4913741773780123e-07, "loss": 9.452088852412999e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2702, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 63.64583396911621, "completions/min_length": 30.875, "epoch": 5.373293621245967, "grad_norm": 5.475690907559695, "kl": 0.0948486328125, "learning_rate": 4.4882358014594953e-07, "loss": 0.008709586225450039, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.08330589346587658, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.2231760062277317, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2703, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.375, "completions/mean_length": 62.47916793823242, "completions/min_length": 34.25, "epoch": 5.375279225614296, "grad_norm": 0.9947492539809185, "kl": 0.096893310546875, "learning_rate": 4.485097629285237e-07, "loss": -0.0009502036264166236, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2704, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.125, "completions/mean_length": 69.28125190734863, "completions/min_length": 30.5, "epoch": 5.377264829982626, "grad_norm": 0.007534213449500174, "kl": 0.092926025390625, "learning_rate": 4.4819596621046104e-07, "loss": 9.282486280426383e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2705, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 64.7916693687439, "completions/min_length": 30.25, "epoch": 5.379250434350956, "grad_norm": 0.8825768328386991, "kl": 0.105712890625, "learning_rate": 4.478821901166907e-07, "loss": -0.021621834486722946, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2706, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 62.19791793823242, "completions/min_length": 33.25, "epoch": 5.381236038719285, "grad_norm": 2.2184819558525875, "kl": 0.177001953125, "learning_rate": 4.4756843477213365e-07, "loss": 0.0025391201488673687, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2707, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.375, "completions/mean_length": 78.98958492279053, "completions/min_length": 35.25, "epoch": 5.383221643087615, "grad_norm": 0.005625743754760762, "kl": 0.111572265625, "learning_rate": 4.472547003017027e-07, "loss": 0.00011153100058436394, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2708, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 79.97916889190674, "completions/min_length": 37.875, "epoch": 5.385207247455944, "grad_norm": 0.0048962897343131015, "kl": 0.1190185546875, "learning_rate": 4.469409868303022e-07, "loss": 0.0001189793911180459, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2709, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.625, "completions/mean_length": 70.6666693687439, "completions/min_length": 30.125, "epoch": 5.387192851824274, "grad_norm": 0.0041510375051012375, "kl": 0.09808349609375, "learning_rate": 4.466272944828282e-07, "loss": 9.807931201066822e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2710, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 69.52083587646484, "completions/min_length": 34.25, "epoch": 5.389178456192604, "grad_norm": 0.003736015138750805, "kl": 0.11456298828125, "learning_rate": 4.4631362338416824e-07, "loss": 0.00011433372128522024, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2711, "train_speed(iter/s)": 0.022714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 66.81250190734863, "completions/min_length": 27.125, "epoch": 5.391164060560933, "grad_norm": 0.004311131701960796, "kl": 0.104339599609375, "learning_rate": 4.459999736592015e-07, "loss": 0.00010438440222060308, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2712, "train_speed(iter/s)": 0.022713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 248.625, "completions/mean_length": 82.19791984558105, "completions/min_length": 34.25, "epoch": 5.393149664929263, "grad_norm": 0.47187730217754664, "kl": 0.11151123046875, "learning_rate": 4.456863454327986e-07, "loss": 0.02150268852710724, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 2713, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 72.59375143051147, "completions/min_length": 33.5, "epoch": 5.395135269297592, "grad_norm": 0.6022694409384836, "kl": 0.11199951171875, "learning_rate": 4.453727388298217e-07, "loss": 0.014322001487016678, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2714, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.125, "completions/mean_length": 68.22916793823242, "completions/min_length": 35.25, "epoch": 5.397120873665922, "grad_norm": 0.0035347124586965705, "kl": 0.097137451171875, "learning_rate": 4.4505915397512433e-07, "loss": 9.70976470853202e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2715, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.875, "completions/mean_length": 72.82291841506958, "completions/min_length": 32.0, "epoch": 5.399106478034252, "grad_norm": 0.004458249324434249, "kl": 0.10284423828125, "learning_rate": 4.447455909935513e-07, "loss": 0.0001026418904075399, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2716, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 71.56250381469727, "completions/min_length": 34.875, "epoch": 5.401092082402581, "grad_norm": 0.004972111275595303, "kl": 0.10711669921875, "learning_rate": 4.444320500099387e-07, "loss": 0.00010711140203056857, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2717, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 77.63541984558105, "completions/min_length": 37.25, "epoch": 5.403077686770911, "grad_norm": 0.005823599494328186, "kl": 0.1009521484375, "learning_rate": 4.441185311491139e-07, "loss": 0.00010087410919368267, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2718, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.625, "completions/mean_length": 74.04166984558105, "completions/min_length": 34.125, "epoch": 5.405063291139241, "grad_norm": 1.1652621577955118, "kl": 0.12432861328125, "learning_rate": 4.438050345358955e-07, "loss": 0.00012446939945220947, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2719, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 71.26041746139526, "completions/min_length": 31.25, "epoch": 5.40704889550757, "grad_norm": 0.008644861278789832, "kl": 0.123779296875, "learning_rate": 4.434915602950931e-07, "loss": 0.0001238467521034181, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2720, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 69.22916889190674, "completions/min_length": 35.5, "epoch": 5.4090344998759, "grad_norm": 0.9950152246198813, "kl": 0.11181640625, "learning_rate": 4.431781085515073e-07, "loss": 0.00025595849729143083, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2721, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.25, "completions/mean_length": 71.8229193687439, "completions/min_length": 28.5, "epoch": 5.411020104244229, "grad_norm": 0.00530408725328939, "kl": 0.10540771484375, "learning_rate": 4.428646794299305e-07, "loss": 0.00010551625018706545, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2722, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 67.77083587646484, "completions/min_length": 28.5, "epoch": 5.413005708612559, "grad_norm": 0.004754276706151285, "kl": 0.1080322265625, "learning_rate": 4.425512730551451e-07, "loss": 0.00010791717795655131, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2723, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 74.69791793823242, "completions/min_length": 30.75, "epoch": 5.414991312980889, "grad_norm": 0.004741860412861385, "kl": 0.0977783203125, "learning_rate": 4.4223788955192496e-07, "loss": 9.790381591301411e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2724, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.625, "completions/mean_length": 73.89583539962769, "completions/min_length": 36.875, "epoch": 5.416976917349218, "grad_norm": 1.3053207298235663, "kl": 0.12725830078125, "learning_rate": 4.419245290450347e-07, "loss": 0.0027800463140010834, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2725, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.375, "completions/mean_length": 76.20833492279053, "completions/min_length": 26.5, "epoch": 5.418962521717548, "grad_norm": 0.0053440894425301165, "kl": 0.08514404296875, "learning_rate": 4.4161119165923e-07, "loss": 8.523924043402076e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2726, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 73.80208396911621, "completions/min_length": 34.375, "epoch": 5.420948126085877, "grad_norm": 0.004176322705832965, "kl": 0.104095458984375, "learning_rate": 4.412978775192569e-07, "loss": 0.00010409795504529029, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2727, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 65.66666889190674, "completions/min_length": 32.375, "epoch": 5.422933730454207, "grad_norm": 0.005299010607605375, "kl": 0.1090087890625, "learning_rate": 4.4098458674985273e-07, "loss": 0.00010888499673455954, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2728, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.875, "completions/mean_length": 73.36458587646484, "completions/min_length": 31.75, "epoch": 5.424919334822537, "grad_norm": 0.003750018330106704, "kl": 0.12127685546875, "learning_rate": 4.406713194757451e-07, "loss": 0.0001212742063216865, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2729, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 74.18750381469727, "completions/min_length": 31.75, "epoch": 5.426904939190866, "grad_norm": 0.9352347713699581, "kl": 0.10321044921875, "learning_rate": 4.403580758216525e-07, "loss": 0.015215136110782623, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2730, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 80.07291984558105, "completions/min_length": 37.25, "epoch": 5.428890543559196, "grad_norm": 0.0041428450854698005, "kl": 0.11962890625, "learning_rate": 4.400448559122838e-07, "loss": 0.00011956026719417423, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2731, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.125, "completions/mean_length": 68.04166841506958, "completions/min_length": 31.0, "epoch": 5.430876147927526, "grad_norm": 0.008000993279135282, "kl": 0.10699462890625, "learning_rate": 4.397316598723385e-07, "loss": 0.00010696284880395979, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2732, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/mean_length": 62.46875190734863, "completions/min_length": 30.25, "epoch": 5.432861752295855, "grad_norm": 0.0044862733685761165, "kl": 0.08282470703125, "learning_rate": 4.3941848782650676e-07, "loss": 8.273782441392541e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2733, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 69.37500238418579, "completions/min_length": 24.5, "epoch": 5.434847356664185, "grad_norm": 0.004645467035789828, "kl": 0.08270263671875, "learning_rate": 4.391053398994689e-07, "loss": 8.269631507573649e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2734, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 73.18750143051147, "completions/min_length": 32.375, "epoch": 5.436832961032514, "grad_norm": 0.0047027933561566105, "kl": 0.09796142578125, "learning_rate": 4.38792216215896e-07, "loss": 9.794151992537081e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2735, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 74.46875286102295, "completions/min_length": 33.25, "epoch": 5.438818565400844, "grad_norm": 0.005357913022830147, "kl": 0.091552734375, "learning_rate": 4.384791169004492e-07, "loss": 9.156801388598979e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2736, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/mean_length": 68.66666841506958, "completions/min_length": 26.75, "epoch": 5.440804169769174, "grad_norm": 0.004121777672769144, "kl": 0.10382080078125, "learning_rate": 4.3816604207777997e-07, "loss": 0.00010377737635280937, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2737, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.625, "completions/mean_length": 78.52083492279053, "completions/min_length": 36.875, "epoch": 5.442789774137503, "grad_norm": 0.003545212904099309, "kl": 0.10894775390625, "learning_rate": 4.3785299187253014e-07, "loss": 0.00010903298243647441, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2738, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 69.07291889190674, "completions/min_length": 27.5, "epoch": 5.444775378505833, "grad_norm": 1.3973487238593616, "kl": 0.10791015625, "learning_rate": 4.375399664093318e-07, "loss": 0.008710963651537895, "memory(GiB)": 94.21, "reward": 1.8125000149011612, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.23100870847702026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2739, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 73.98958492279053, "completions/min_length": 34.625, "epoch": 5.446760982874162, "grad_norm": 0.00561296944755905, "kl": 0.1016845703125, "learning_rate": 4.372269658128069e-07, "loss": 0.00010171587928198278, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2740, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.5, "completions/mean_length": 64.69791746139526, "completions/min_length": 34.875, "epoch": 5.448746587242492, "grad_norm": 0.0033485004630473252, "kl": 0.07318115234375, "learning_rate": 4.369139902075674e-07, "loss": 7.320643635466695e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2741, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.875, "completions/mean_length": 75.42708539962769, "completions/min_length": 34.25, "epoch": 5.450732191610822, "grad_norm": 0.1101558695389743, "kl": 0.117431640625, "learning_rate": 4.3660103971821627e-07, "loss": 0.00011730985715985298, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2742, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.5, "completions/mean_length": 66.33333539962769, "completions/min_length": 29.125, "epoch": 5.452717795979151, "grad_norm": 0.005121904845751762, "kl": 0.10894775390625, "learning_rate": 4.362881144693453e-07, "loss": 0.00010876153100980446, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2743, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 63.052085399627686, "completions/min_length": 30.625, "epoch": 5.454703400347481, "grad_norm": 0.00422751582903777, "kl": 0.11529541015625, "learning_rate": 4.3597521458553674e-07, "loss": 0.00011522645218065009, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2744, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.875, "completions/mean_length": 64.34375143051147, "completions/min_length": 27.75, "epoch": 5.456689004715811, "grad_norm": 0.004916915939521794, "kl": 0.103546142578125, "learning_rate": 4.3566234019136284e-07, "loss": 0.00010361853492213413, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2745, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 67.89583539962769, "completions/min_length": 36.375, "epoch": 5.45867460908414, "grad_norm": 0.003740552757059603, "kl": 0.10321044921875, "learning_rate": 4.3534949141138553e-07, "loss": 0.00010333474347135052, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2746, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 75.80208539962769, "completions/min_length": 33.0, "epoch": 5.46066021345247, "grad_norm": 0.004279682183233835, "kl": 0.12255859375, "learning_rate": 4.350366683701567e-07, "loss": 0.00012245573452673852, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2747, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 76.8229193687439, "completions/min_length": 30.125, "epoch": 5.462645817820799, "grad_norm": 0.0038114538094619556, "kl": 0.09649658203125, "learning_rate": 4.347238711922175e-07, "loss": 9.651340951677412e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2748, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 69.45833492279053, "completions/min_length": 33.0, "epoch": 5.464631422189129, "grad_norm": 0.009061094087616113, "kl": 0.08917236328125, "learning_rate": 4.344111000020996e-07, "loss": 8.919953688746318e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2749, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 69.16666889190674, "completions/min_length": 30.5, "epoch": 5.466617026557459, "grad_norm": 0.006235587790266846, "kl": 0.10162353515625, "learning_rate": 4.340983549243238e-07, "loss": 0.00010143526014871895, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2750, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.75, "completions/mean_length": 71.69791841506958, "completions/min_length": 28.75, "epoch": 5.468602630925788, "grad_norm": 1.640871944411773, "kl": 0.089813232421875, "learning_rate": 4.337856360834005e-07, "loss": -0.01011543907225132, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2751, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 65.31250143051147, "completions/min_length": 29.375, "epoch": 5.470588235294118, "grad_norm": 0.005109223648818194, "kl": 0.09039306640625, "learning_rate": 4.3347294360382974e-07, "loss": 9.038949792739004e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2752, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 71.51041793823242, "completions/min_length": 30.0, "epoch": 5.472573839662447, "grad_norm": 0.004346783368402475, "kl": 0.123291015625, "learning_rate": 4.3316027761010115e-07, "loss": 0.00012337403313722461, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2753, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.375, "completions/mean_length": 72.895836353302, "completions/min_length": 30.5, "epoch": 5.474559444030777, "grad_norm": 0.0032625851940395133, "kl": 0.09619140625, "learning_rate": 4.328476382266937e-07, "loss": 9.625362872611731e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2754, "train_speed(iter/s)": 0.022712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.875, "completions/mean_length": 73.75000095367432, "completions/min_length": 30.5, "epoch": 5.476545048399107, "grad_norm": 0.9956874669307478, "kl": 0.084747314453125, "learning_rate": 4.325350255780757e-07, "loss": -0.01096474938094616, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2755, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 72.46875286102295, "completions/min_length": 29.625, "epoch": 5.478530652767436, "grad_norm": 0.004596652531736091, "kl": 0.10223388671875, "learning_rate": 4.3222243978870514e-07, "loss": 0.00010214014764642343, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2756, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.125, "completions/mean_length": 71.08333396911621, "completions/min_length": 31.5, "epoch": 5.480516257135766, "grad_norm": 1.3504932140077563, "kl": 0.0909423828125, "learning_rate": 4.31909880983029e-07, "loss": 0.006385432090610266, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2757, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 77.87500190734863, "completions/min_length": 33.125, "epoch": 5.482501861504096, "grad_norm": 0.004577171923515002, "kl": 0.11993408203125, "learning_rate": 4.315973492854836e-07, "loss": 0.00011998624540865421, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2758, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 74.75000286102295, "completions/min_length": 31.875, "epoch": 5.484487465872425, "grad_norm": 0.003835642536919521, "kl": 0.10211181640625, "learning_rate": 4.312848448204946e-07, "loss": 0.00010209978063357994, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2759, "train_speed(iter/s)": 0.022711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.125, "completions/mean_length": 78.73958539962769, "completions/min_length": 33.875, "epoch": 5.486473070240755, "grad_norm": 0.006161170813168482, "kl": 0.09149169921875, "learning_rate": 4.3097236771247653e-07, "loss": 9.150305413641036e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2760, "train_speed(iter/s)": 0.02271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 78.98958587646484, "completions/min_length": 31.25, "epoch": 5.488458674609084, "grad_norm": 0.006641730782599897, "kl": 0.099761962890625, "learning_rate": 4.306599180858332e-07, "loss": 9.973629494197667e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2761, "train_speed(iter/s)": 0.022709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.875, "completions/mean_length": 74.32291889190674, "completions/min_length": 33.125, "epoch": 5.490444278977414, "grad_norm": 1.1114238278661688, "kl": 0.09893798828125, "learning_rate": 4.3034749606495754e-07, "loss": -0.012792940251529217, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2762, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 76.52083587646484, "completions/min_length": 31.0, "epoch": 5.492429883345744, "grad_norm": 0.00834590355009779, "kl": 0.12042236328125, "learning_rate": 4.300351017742315e-07, "loss": 0.00012039339344482869, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2763, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 72.32291793823242, "completions/min_length": 31.625, "epoch": 5.494415487714073, "grad_norm": 0.007555199005939186, "kl": 0.100433349609375, "learning_rate": 4.2972273533802584e-07, "loss": 0.00010044292139355093, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2764, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 71.71875190734863, "completions/min_length": 29.0, "epoch": 5.496401092082403, "grad_norm": 0.006999279441102167, "kl": 0.11468505859375, "learning_rate": 4.294103968807003e-07, "loss": 0.00011442082177381963, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2765, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 76.43750190734863, "completions/min_length": 30.5, "epoch": 5.498386696450732, "grad_norm": 0.0053074686751306, "kl": 0.0948486328125, "learning_rate": 4.2909808652660355e-07, "loss": 9.486137423664331e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2766, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 74.71875286102295, "completions/min_length": 36.125, "epoch": 5.500372300819062, "grad_norm": 1.5960427560363841, "kl": 0.10015869140625, "learning_rate": 4.2878580440007313e-07, "loss": 0.00593077577650547, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2767, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.875, "completions/mean_length": 78.03125238418579, "completions/min_length": 32.375, "epoch": 5.502357905187392, "grad_norm": 0.004398314903651077, "kl": 0.08953857421875, "learning_rate": 4.284735506254349e-07, "loss": 8.946903835749254e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2768, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 71.1666693687439, "completions/min_length": 34.0, "epoch": 5.504343509555721, "grad_norm": 0.9460714344328963, "kl": 0.12957763671875, "learning_rate": 4.2816132532700377e-07, "loss": 0.001300673931837082, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2769, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.25, "completions/mean_length": 67.60416746139526, "completions/min_length": 32.875, "epoch": 5.506329113924051, "grad_norm": 0.004698637822917459, "kl": 0.10321044921875, "learning_rate": 4.2784912862908377e-07, "loss": 0.00010317980195395648, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2770, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.375, "completions/mean_length": 79.76041889190674, "completions/min_length": 29.75, "epoch": 5.508314718292381, "grad_norm": 0.0055203803039646756, "kl": 0.10186767578125, "learning_rate": 4.275369606559667e-07, "loss": 0.00010178033699048683, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2771, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 79.1979193687439, "completions/min_length": 37.0, "epoch": 5.51030032266071, "grad_norm": 0.007044136385528007, "kl": 0.09283447265625, "learning_rate": 4.2722482153193336e-07, "loss": 9.287783177569509e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2772, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 75.80208587646484, "completions/min_length": 34.875, "epoch": 5.51228592702904, "grad_norm": 0.015216403875062002, "kl": 0.1114501953125, "learning_rate": 4.2691271138125296e-07, "loss": 0.00011135396198369563, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2773, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.125, "completions/mean_length": 76.53125238418579, "completions/min_length": 34.75, "epoch": 5.514271531397369, "grad_norm": 0.011715117900308119, "kl": 0.12481689453125, "learning_rate": 4.266006303281833e-07, "loss": 0.0001247400650754571, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2774, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.375, "completions/mean_length": 68.90625095367432, "completions/min_length": 24.125, "epoch": 5.516257135765699, "grad_norm": 0.012060594862211975, "kl": 0.09033203125, "learning_rate": 4.262885784969705e-07, "loss": 9.040775330504403e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2775, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 73.10416889190674, "completions/min_length": 31.25, "epoch": 5.518242740134029, "grad_norm": 0.003547073998586838, "kl": 0.08782958984375, "learning_rate": 4.259765560118489e-07, "loss": 8.780855569057167e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2776, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.25, "completions/mean_length": 61.208335399627686, "completions/min_length": 32.875, "epoch": 5.520228344502358, "grad_norm": 0.006252713256961044, "kl": 0.094024658203125, "learning_rate": 4.256645629970416e-07, "loss": 9.388947364641353e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2777, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.375, "completions/mean_length": 79.94791984558105, "completions/min_length": 25.75, "epoch": 5.522213948870688, "grad_norm": 0.8101613636592477, "kl": 0.09478759765625, "learning_rate": 4.253525995767595e-07, "loss": 0.010251425206661224, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2778, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 75.40625143051147, "completions/min_length": 36.25, "epoch": 5.524199553239017, "grad_norm": 0.7213241319177761, "kl": 0.1138916015625, "learning_rate": 4.2504066587520206e-07, "loss": 0.007435914129018784, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166669771075, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2779, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 73.79166889190674, "completions/min_length": 29.375, "epoch": 5.526185157607347, "grad_norm": 0.004234806914973846, "kl": 0.083099365234375, "learning_rate": 4.247287620165565e-07, "loss": 8.310205157613382e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2780, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 76.73958492279053, "completions/min_length": 33.375, "epoch": 5.528170761975677, "grad_norm": 0.0035572609430518137, "kl": 0.112060546875, "learning_rate": 4.244168881249986e-07, "loss": 0.00011203760368516669, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2781, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.125, "completions/mean_length": 87.70833683013916, "completions/min_length": 37.375, "epoch": 5.530156366344006, "grad_norm": 0.00333882602804208, "kl": 0.10821533203125, "learning_rate": 4.241050443246919e-07, "loss": 0.00010833704436663538, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2782, "train_speed(iter/s)": 0.022708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.125, "completions/mean_length": 77.35416889190674, "completions/min_length": 26.5, "epoch": 5.532141970712336, "grad_norm": 0.005040117171171672, "kl": 0.10223388671875, "learning_rate": 4.23793230739788e-07, "loss": 0.00010223597928415984, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2783, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 81.36458492279053, "completions/min_length": 34.125, "epoch": 5.5341275750806656, "grad_norm": 0.00737583101403247, "kl": 0.10992431640625, "learning_rate": 4.234814474944269e-07, "loss": 0.00010996578203048557, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2784, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 71.40625286102295, "completions/min_length": 24.625, "epoch": 5.536113179448995, "grad_norm": 0.0037289528468716316, "kl": 0.09576416015625, "learning_rate": 4.231696947127358e-07, "loss": 9.570604743203148e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2785, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 77.92708444595337, "completions/min_length": 33.75, "epoch": 5.538098783817325, "grad_norm": 0.0034856449937609364, "kl": 0.090240478515625, "learning_rate": 4.228579725188304e-07, "loss": 9.02874962775968e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2786, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.625, "completions/mean_length": 79.90625286102295, "completions/min_length": 30.75, "epoch": 5.540084388185654, "grad_norm": 0.8492295518933224, "kl": 0.097869873046875, "learning_rate": 4.2254628103681395e-07, "loss": -0.001284400699660182, "memory(GiB)": 94.21, "reward": 1.5729166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2787, "train_speed(iter/s)": 0.022707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.375, "completions/mean_length": 81.81250190734863, "completions/min_length": 30.625, "epoch": 5.542069992553984, "grad_norm": 0.7979358263391226, "kl": 0.09454345703125, "learning_rate": 4.222346203907773e-07, "loss": 0.0003351215273141861, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2788, "train_speed(iter/s)": 0.022705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.625, "completions/mean_length": 63.031250953674316, "completions/min_length": 22.5, "epoch": 5.5440555969223135, "grad_norm": 0.004731990258344243, "kl": 0.080230712890625, "learning_rate": 4.2192299070479923e-07, "loss": 8.015319326659665e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2789, "train_speed(iter/s)": 0.022705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.5, "completions/mean_length": 75.18750190734863, "completions/min_length": 39.0, "epoch": 5.546041201290643, "grad_norm": 0.003992872859329794, "kl": 0.0897216796875, "learning_rate": 4.216113921029462e-07, "loss": 8.980886195786297e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2790, "train_speed(iter/s)": 0.022706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.625, "completions/mean_length": 83.18750286102295, "completions/min_length": 32.0, "epoch": 5.5480268056589725, "grad_norm": 0.0034274734631158763, "kl": 0.0958251953125, "learning_rate": 4.212998247092724e-07, "loss": 9.573270654072985e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2791, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.875, "completions/mean_length": 87.88541889190674, "completions/min_length": 35.125, "epoch": 5.550012410027302, "grad_norm": 0.004416743496037694, "kl": 0.084869384765625, "learning_rate": 4.2098828864781937e-07, "loss": 8.485731086693704e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2792, "train_speed(iter/s)": 0.022704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 78.50000190734863, "completions/min_length": 31.125, "epoch": 5.551998014395632, "grad_norm": 0.0035557824975480583, "kl": 0.07843017578125, "learning_rate": 4.206767840426163e-07, "loss": 7.842038758099079e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2793, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.125, "completions/mean_length": 71.81250190734863, "completions/min_length": 26.625, "epoch": 5.5539836187639615, "grad_norm": 0.004262223429072593, "kl": 0.07781982421875, "learning_rate": 4.203653110176798e-07, "loss": 7.77176464907825e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2794, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.625, "completions/mean_length": 90.82291984558105, "completions/min_length": 33.75, "epoch": 5.555969223132291, "grad_norm": 0.7148260267243509, "kl": 0.09979248046875, "learning_rate": 4.2005386969701395e-07, "loss": 0.00037801143480464816, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2795, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 78.78125095367432, "completions/min_length": 34.375, "epoch": 5.5579548275006205, "grad_norm": 0.004525739703181718, "kl": 0.0941162109375, "learning_rate": 4.197424602046103e-07, "loss": 9.414648229721934e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2796, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 78.79166984558105, "completions/min_length": 30.0, "epoch": 5.5599404318689505, "grad_norm": 0.0036919676371623025, "kl": 0.1055908203125, "learning_rate": 4.1943108266444716e-07, "loss": 0.0001056083056028001, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2797, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.875, "completions/mean_length": 77.02083587646484, "completions/min_length": 28.5, "epoch": 5.5619260362372795, "grad_norm": 0.004006692445778771, "kl": 0.080718994140625, "learning_rate": 4.1911973720049117e-07, "loss": 8.070625335676596e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2798, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.875, "completions/mean_length": 77.58333539962769, "completions/min_length": 29.0, "epoch": 5.5639116406056095, "grad_norm": 0.0036904961820925907, "kl": 0.11065673828125, "learning_rate": 4.1880842393669543e-07, "loss": 0.00011045071005355567, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2799, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 76.54166841506958, "completions/min_length": 29.125, "epoch": 5.565897244973939, "grad_norm": 0.00476950471355126, "kl": 0.09918212890625, "learning_rate": 4.1849714299700024e-07, "loss": 9.918311843648553e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2800, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.625, "completions/mean_length": 71.39583587646484, "completions/min_length": 29.0, "epoch": 5.5678828493422685, "grad_norm": 0.004399608508489987, "kl": 0.084075927734375, "learning_rate": 4.1818589450533323e-07, "loss": 8.404294203501195e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2801, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 76.79167079925537, "completions/min_length": 34.375, "epoch": 5.5698684537105985, "grad_norm": 0.0045738558151656875, "kl": 0.08685302734375, "learning_rate": 4.178746785856092e-07, "loss": 8.682149928063154e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2802, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.375, "completions/mean_length": 73.31250286102295, "completions/min_length": 31.75, "epoch": 5.5718540580789275, "grad_norm": 0.0040530072078604545, "kl": 0.093994140625, "learning_rate": 4.1756349536172967e-07, "loss": 9.394960943609476e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2803, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 73.16666793823242, "completions/min_length": 33.625, "epoch": 5.5738396624472575, "grad_norm": 0.003492079602908078, "kl": 0.086578369140625, "learning_rate": 4.1725234495758355e-07, "loss": 8.657629950903356e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2804, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 68.26041889190674, "completions/min_length": 31.125, "epoch": 5.5758252668155865, "grad_norm": 0.004189735944043726, "kl": 0.09124755859375, "learning_rate": 4.169412274970463e-07, "loss": 9.126631630351767e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2805, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.75, "completions/mean_length": 75.71875095367432, "completions/min_length": 28.25, "epoch": 5.5778108711839165, "grad_norm": 0.0039298656870085154, "kl": 0.09027099609375, "learning_rate": 4.1663014310398053e-07, "loss": 9.025847248267382e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2806, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 71.39583587646484, "completions/min_length": 27.0, "epoch": 5.5797964755522464, "grad_norm": 0.004541390025132296, "kl": 0.08673095703125, "learning_rate": 4.163190919022356e-07, "loss": 8.66981572471559e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2807, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.875, "completions/mean_length": 83.48958587646484, "completions/min_length": 32.75, "epoch": 5.5817820799205755, "grad_norm": 0.003499435353091427, "kl": 0.10589599609375, "learning_rate": 4.1600807401564754e-07, "loss": 0.00010582594404695556, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2808, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 78.18750238418579, "completions/min_length": 33.375, "epoch": 5.5837676842889055, "grad_norm": 0.00460714383538939, "kl": 0.08837890625, "learning_rate": 4.1569708956803917e-07, "loss": 8.837709901854396e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2809, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.625, "completions/mean_length": 79.85416984558105, "completions/min_length": 28.0, "epoch": 5.585753288657235, "grad_norm": 0.0037842346853757293, "kl": 0.09576416015625, "learning_rate": 4.1538613868322e-07, "loss": 9.578990284353495e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2810, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 69.76041841506958, "completions/min_length": 36.0, "epoch": 5.5877388930255645, "grad_norm": 0.004071194172173444, "kl": 0.083221435546875, "learning_rate": 4.150752214849864e-07, "loss": 8.315254672197625e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2811, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.625, "completions/mean_length": 74.0104193687439, "completions/min_length": 29.75, "epoch": 5.589724497393894, "grad_norm": 0.003568085591484155, "kl": 0.096405029296875, "learning_rate": 4.1476433809712117e-07, "loss": 9.640575444791466e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2812, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 64.27083444595337, "completions/min_length": 24.25, "epoch": 5.5917101017622235, "grad_norm": 0.003298590255078025, "kl": 0.07684326171875, "learning_rate": 4.144534886433935e-07, "loss": 7.677805842831731e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2813, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.375, "completions/mean_length": 75.95833683013916, "completions/min_length": 30.875, "epoch": 5.5936957061305534, "grad_norm": 0.00348809985468097, "kl": 0.08978271484375, "learning_rate": 4.141426732475592e-07, "loss": 8.975945820566267e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2814, "train_speed(iter/s)": 0.022697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 71.85416889190674, "completions/min_length": 31.375, "epoch": 5.595681310498883, "grad_norm": 0.004052753480553658, "kl": 0.09381103515625, "learning_rate": 4.138318920333605e-07, "loss": 9.384001896250993e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2815, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 72.07291793823242, "completions/min_length": 27.125, "epoch": 5.5976669148672125, "grad_norm": 0.004321734421488672, "kl": 0.08721923828125, "learning_rate": 4.135211451245264e-07, "loss": 8.720854384591803e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2816, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 66.76041889190674, "completions/min_length": 31.625, "epoch": 5.599652519235542, "grad_norm": 2.400922390158963, "kl": 0.10882568359375, "learning_rate": 4.1321043264477107e-07, "loss": 0.011667689308524132, "memory(GiB)": 94.21, "reward": 1.9062500149011612, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.9062500074505806, "rewards/CineAccuracyORM/std": 0.12591182813048363, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2817, "train_speed(iter/s)": 0.022697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.625, "completions/mean_length": 69.61458396911621, "completions/min_length": 29.0, "epoch": 5.6016381236038715, "grad_norm": 0.003499673092992823, "kl": 0.087249755859375, "learning_rate": 4.1289975471779653e-07, "loss": 8.720988989807665e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2818, "train_speed(iter/s)": 0.022698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 69.833336353302, "completions/min_length": 31.375, "epoch": 5.603623727972201, "grad_norm": 0.0040166137241982106, "kl": 0.085693359375, "learning_rate": 4.125891114672902e-07, "loss": 8.561991853639483e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2819, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 80.64583587646484, "completions/min_length": 35.125, "epoch": 5.605609332340531, "grad_norm": 0.003384935053344692, "kl": 0.08905029296875, "learning_rate": 4.122785030169255e-07, "loss": 8.904648711904883e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2820, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.75, "completions/mean_length": 74.00000190734863, "completions/min_length": 32.625, "epoch": 5.6075949367088604, "grad_norm": 0.0034837593373750813, "kl": 0.094940185546875, "learning_rate": 4.119679294903625e-07, "loss": 9.492408571531996e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2821, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.25, "completions/mean_length": 67.51041841506958, "completions/min_length": 27.75, "epoch": 5.60958054107719, "grad_norm": 0.6566340598572169, "kl": 0.10760498046875, "learning_rate": 4.1165739101124704e-07, "loss": 0.013741843402385712, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2822, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.375, "completions/mean_length": 78.98958587646484, "completions/min_length": 32.875, "epoch": 5.61156614544552, "grad_norm": 0.003500954751752791, "kl": 0.085540771484375, "learning_rate": 4.1134688770321117e-07, "loss": 8.559721754863858e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2823, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.375, "completions/mean_length": 74.39583492279053, "completions/min_length": 29.5, "epoch": 5.613551749813849, "grad_norm": 1.1100773902979642, "kl": 0.09246826171875, "learning_rate": 4.110364196898728e-07, "loss": -0.009677091613411903, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2824, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 71.12500286102295, "completions/min_length": 30.25, "epoch": 5.615537354182179, "grad_norm": 0.003721097562736777, "kl": 0.092376708984375, "learning_rate": 4.1072598709483606e-07, "loss": 9.241603402188048e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2825, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.125, "completions/mean_length": 75.18750143051147, "completions/min_length": 32.375, "epoch": 5.617522958550508, "grad_norm": 0.008131444471014607, "kl": 0.085968017578125, "learning_rate": 4.1041559004169073e-07, "loss": 8.589683420723304e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2826, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 68.38541889190674, "completions/min_length": 29.875, "epoch": 5.619508562918838, "grad_norm": 0.00364861352457814, "kl": 0.092376708984375, "learning_rate": 4.1010522865401257e-07, "loss": 9.24561609281227e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2827, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.125, "completions/mean_length": 69.64583492279053, "completions/min_length": 24.25, "epoch": 5.621494167287168, "grad_norm": 0.0038094154290808113, "kl": 0.0887451171875, "learning_rate": 4.097949030553629e-07, "loss": 8.8644286734052e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2828, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 65.2604193687439, "completions/min_length": 19.0, "epoch": 5.623479771655497, "grad_norm": 0.0038007389427531196, "kl": 0.08843994140625, "learning_rate": 4.094846133692891e-07, "loss": 8.835688640829176e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2829, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 86.66666984558105, "completions/min_length": 30.875, "epoch": 5.625465376023827, "grad_norm": 0.003930123066646798, "kl": 0.11083984375, "learning_rate": 4.0917435971932403e-07, "loss": 0.00011081757838837802, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2830, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.125, "completions/mean_length": 65.3541693687439, "completions/min_length": 27.875, "epoch": 5.627450980392156, "grad_norm": 0.0046369701533612705, "kl": 0.091552734375, "learning_rate": 4.0886414222898626e-07, "loss": 9.159876935882494e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2831, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 75.15625190734863, "completions/min_length": 26.75, "epoch": 5.629436584760486, "grad_norm": 0.0036160326580972763, "kl": 0.10333251953125, "learning_rate": 4.085539610217802e-07, "loss": 0.00010320844012312591, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2832, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 74.73958539962769, "completions/min_length": 30.625, "epoch": 5.631422189128816, "grad_norm": 0.003160076416520772, "kl": 0.0965576171875, "learning_rate": 4.0824381622119543e-07, "loss": 9.652607695898041e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2833, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 63.53125333786011, "completions/min_length": 23.25, "epoch": 5.633407793497145, "grad_norm": 0.9661848731161083, "kl": 0.11309814453125, "learning_rate": 4.0793370795070737e-07, "loss": -0.008300778456032276, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2834, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.125, "completions/mean_length": 62.98958492279053, "completions/min_length": 28.125, "epoch": 5.635393397865475, "grad_norm": 0.004041444232851166, "kl": 0.0953369140625, "learning_rate": 4.076236363337766e-07, "loss": 9.529919771011919e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2835, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.875, "completions/mean_length": 73.53125190734863, "completions/min_length": 29.5, "epoch": 5.637379002233805, "grad_norm": 0.003670685330173102, "kl": 0.09912109375, "learning_rate": 4.073136014938495e-07, "loss": 9.907546336762607e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2836, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.875, "completions/mean_length": 71.68750286102295, "completions/min_length": 31.875, "epoch": 5.639364606602134, "grad_norm": 0.004118533487192548, "kl": 0.0882568359375, "learning_rate": 4.070036035543572e-07, "loss": 8.822607196634635e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2837, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 70.91666889190674, "completions/min_length": 29.0, "epoch": 5.641350210970464, "grad_norm": 2.8281828765365593, "kl": 0.0960693359375, "learning_rate": 4.0669364263871655e-07, "loss": -0.005358380731195211, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.10518955811858177, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2838, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.375, "completions/mean_length": 76.59375190734863, "completions/min_length": 29.0, "epoch": 5.643335815338793, "grad_norm": 0.0032349761202531387, "kl": 0.0941162109375, "learning_rate": 4.0638371887032996e-07, "loss": 9.409929043613374e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2839, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 65.90625190734863, "completions/min_length": 27.125, "epoch": 5.645321419707123, "grad_norm": 0.005501191730010586, "kl": 0.106201171875, "learning_rate": 4.0607383237258445e-07, "loss": 0.00010618512169457972, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2840, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.625, "completions/mean_length": 63.43750190734863, "completions/min_length": 26.125, "epoch": 5.647307024075453, "grad_norm": 0.0033587813387234294, "kl": 0.08978271484375, "learning_rate": 4.057639832688525e-07, "loss": 8.96783167263493e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2841, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 75.51041889190674, "completions/min_length": 29.0, "epoch": 5.649292628443782, "grad_norm": 0.005688036601464512, "kl": 0.10888671875, "learning_rate": 4.0545417168249157e-07, "loss": 0.00010891951387748122, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2842, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 82.26041889190674, "completions/min_length": 28.375, "epoch": 5.651278232812112, "grad_norm": 1.2719389705932045, "kl": 0.122711181640625, "learning_rate": 4.051443977368444e-07, "loss": 0.014759061858057976, "memory(GiB)": 94.21, "reward": 1.6145833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6145833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2843, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.625, "completions/mean_length": 69.020836353302, "completions/min_length": 24.5, "epoch": 5.653263837180441, "grad_norm": 0.7673907687171013, "kl": 0.088897705078125, "learning_rate": 4.048346615552387e-07, "loss": -0.005343483295291662, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2844, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.875, "completions/mean_length": 76.86458683013916, "completions/min_length": 30.125, "epoch": 5.655249441548771, "grad_norm": 1.261682669100062, "kl": 0.0936279296875, "learning_rate": 4.045249632609865e-07, "loss": 0.003070330247282982, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2845, "train_speed(iter/s)": 0.022703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.125, "completions/mean_length": 79.85416984558105, "completions/min_length": 26.875, "epoch": 5.657235045917101, "grad_norm": 2.1128298929039504, "kl": 0.11700439453125, "learning_rate": 4.0421530297738603e-07, "loss": -0.006457652896642685, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2846, "train_speed(iter/s)": 0.022702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 80.33333396911621, "completions/min_length": 32.125, "epoch": 5.65922065028543, "grad_norm": 0.00367759112604411, "kl": 0.0732421875, "learning_rate": 4.039056808277194e-07, "loss": 7.310794899240136e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2847, "train_speed(iter/s)": 0.022701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.875, "completions/mean_length": 73.37500286102295, "completions/min_length": 28.0, "epoch": 5.66120625465376, "grad_norm": 0.021929967897152867, "kl": 0.1116943359375, "learning_rate": 4.035960969352537e-07, "loss": 0.00011159037239849567, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2848, "train_speed(iter/s)": 0.0227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.875, "completions/mean_length": 82.08333587646484, "completions/min_length": 29.5, "epoch": 5.66319185902209, "grad_norm": 0.007933532407083242, "kl": 0.09375, "learning_rate": 4.0328655142324097e-07, "loss": 9.373571811011061e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2849, "train_speed(iter/s)": 0.022699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 77.85416841506958, "completions/min_length": 35.5, "epoch": 5.665177463390419, "grad_norm": 0.01950427463338215, "kl": 0.09857177734375, "learning_rate": 4.029770444149178e-07, "loss": 9.865294850897044e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2850, "train_speed(iter/s)": 0.022697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 75.82291984558105, "completions/min_length": 26.375, "epoch": 5.667163067758749, "grad_norm": 1.113681173242187, "kl": 0.107666015625, "learning_rate": 4.0266757603350565e-07, "loss": 0.0012330388417467475, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2851, "train_speed(iter/s)": 0.022697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.125, "completions/mean_length": 84.84375286102295, "completions/min_length": 33.875, "epoch": 5.669148672127078, "grad_norm": 0.012310216612396499, "kl": 0.111328125, "learning_rate": 4.023581464022103e-07, "loss": 0.00011126259050797671, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2852, "train_speed(iter/s)": 0.022697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.875, "completions/mean_length": 73.71875190734863, "completions/min_length": 25.875, "epoch": 5.671134276495408, "grad_norm": 1.967958842481573, "kl": 0.1077880859375, "learning_rate": 4.020487556442227e-07, "loss": -0.0020981894340366125, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2853, "train_speed(iter/s)": 0.022697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.875, "completions/mean_length": 85.06250190734863, "completions/min_length": 30.875, "epoch": 5.673119880863738, "grad_norm": 0.7212718210509244, "kl": 0.128662109375, "learning_rate": 4.0173940388271755e-07, "loss": 0.006483552046120167, "memory(GiB)": 94.21, "reward": 1.6041666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6041666679084301, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2854, "train_speed(iter/s)": 0.022697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 81.28125190734863, "completions/min_length": 34.125, "epoch": 5.675105485232067, "grad_norm": 0.0076694494668766485, "kl": 0.089111328125, "learning_rate": 4.014300912408545e-07, "loss": 8.910287579055876e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2855, "train_speed(iter/s)": 0.022696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 76.34375190734863, "completions/min_length": 25.875, "epoch": 5.677091089600397, "grad_norm": 0.008991046188857488, "kl": 0.0914306640625, "learning_rate": 4.0112081784177767e-07, "loss": 9.137419692706317e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2856, "train_speed(iter/s)": 0.022696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.375, "completions/mean_length": 78.48958539962769, "completions/min_length": 26.0, "epoch": 5.679076693968726, "grad_norm": 0.007259986515532278, "kl": 0.10211181640625, "learning_rate": 4.008115838086151e-07, "loss": 0.00010215988731943071, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2857, "train_speed(iter/s)": 0.022695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.75, "completions/mean_length": 84.71875190734863, "completions/min_length": 31.5, "epoch": 5.681062298337056, "grad_norm": 0.009064280885433934, "kl": 0.10443115234375, "learning_rate": 4.0050238926447974e-07, "loss": 0.00010429555550217628, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2858, "train_speed(iter/s)": 0.022694 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.375, "completions/mean_length": 75.20833492279053, "completions/min_length": 20.625, "epoch": 5.683047902705386, "grad_norm": 0.009373881983971306, "kl": 0.0899658203125, "learning_rate": 4.001932343324683e-07, "loss": 8.994461677502841e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2859, "train_speed(iter/s)": 0.022694 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.625, "completions/mean_length": 81.21875286102295, "completions/min_length": 27.375, "epoch": 5.685033507073715, "grad_norm": 0.0163795483525699, "kl": 0.10748291015625, "learning_rate": 3.998841191356622e-07, "loss": 0.0001074122847057879, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2860, "train_speed(iter/s)": 0.022693 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.125, "completions/mean_length": 87.7604193687439, "completions/min_length": 41.125, "epoch": 5.687019111442045, "grad_norm": 0.005258456943394062, "kl": 0.09344482421875, "learning_rate": 3.9957504379712667e-07, "loss": 9.331519686384127e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2861, "train_speed(iter/s)": 0.022692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 76.42708683013916, "completions/min_length": 26.625, "epoch": 5.689004715810375, "grad_norm": 0.004046060953278138, "kl": 0.0819091796875, "learning_rate": 3.992660084399112e-07, "loss": 8.185824117390439e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2862, "train_speed(iter/s)": 0.022692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.0, "completions/mean_length": 74.79166889190674, "completions/min_length": 30.75, "epoch": 5.690990320178704, "grad_norm": 2.1318269900085824, "kl": 0.09063720703125, "learning_rate": 3.989570131870494e-07, "loss": -0.001156588434241712, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2863, "train_speed(iter/s)": 0.022692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 77.25000190734863, "completions/min_length": 32.5, "epoch": 5.692975924547034, "grad_norm": 0.007583302065831584, "kl": 0.105712890625, "learning_rate": 3.986480581615591e-07, "loss": 0.0001056445762515068, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2864, "train_speed(iter/s)": 0.022692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 79.58333587646484, "completions/min_length": 23.875, "epoch": 5.694961528915364, "grad_norm": 0.004385651748224059, "kl": 0.1146240234375, "learning_rate": 3.983391434864414e-07, "loss": 0.00011460146924946457, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2865, "train_speed(iter/s)": 0.022692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.875, "completions/mean_length": 88.16666889190674, "completions/min_length": 31.125, "epoch": 5.696947133283693, "grad_norm": 0.0034099568272014983, "kl": 0.088470458984375, "learning_rate": 3.9803026928468205e-07, "loss": 8.846410491969436e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2866, "train_speed(iter/s)": 0.022691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.625, "completions/mean_length": 80.43750095367432, "completions/min_length": 35.125, "epoch": 5.698932737652023, "grad_norm": 0.011822260535962208, "kl": 0.11077880859375, "learning_rate": 3.9772143567925076e-07, "loss": 0.00011074334906879812, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2867, "train_speed(iter/s)": 0.022691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.25, "completions/mean_length": 87.53125190734863, "completions/min_length": 30.5, "epoch": 5.700918342020352, "grad_norm": 0.0034870339685979966, "kl": 0.093597412109375, "learning_rate": 3.9741264279310047e-07, "loss": 9.35839198064059e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2868, "train_speed(iter/s)": 0.02269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.125, "completions/mean_length": 85.07291793823242, "completions/min_length": 29.5, "epoch": 5.702903946388682, "grad_norm": 0.005243673408938612, "kl": 0.07794189453125, "learning_rate": 3.9710389074916825e-07, "loss": 7.790450763422996e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2869, "train_speed(iter/s)": 0.022688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 77.06250095367432, "completions/min_length": 32.0, "epoch": 5.704889550757011, "grad_norm": 0.005938725781157538, "kl": 0.101531982421875, "learning_rate": 3.9679517967037495e-07, "loss": 0.00010142349492525682, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2870, "train_speed(iter/s)": 0.022689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.375, "completions/mean_length": 74.79166841506958, "completions/min_length": 28.25, "epoch": 5.706875155125341, "grad_norm": 0.004702679930715288, "kl": 0.090087890625, "learning_rate": 3.9648650967962505e-07, "loss": 9.016798867378384e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2871, "train_speed(iter/s)": 0.022688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 80.41666889190674, "completions/min_length": 27.625, "epoch": 5.708860759493671, "grad_norm": 0.003765407027215818, "kl": 0.09271240234375, "learning_rate": 3.961778808998065e-07, "loss": 9.270670125260949e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2872, "train_speed(iter/s)": 0.022688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 75.32291889190674, "completions/min_length": 29.5, "epoch": 5.710846363862, "grad_norm": 1.1173074582664202, "kl": 0.10137939453125, "learning_rate": 3.9586929345379127e-07, "loss": 0.017305200919508934, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2873, "train_speed(iter/s)": 0.022687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 74.98958539962769, "completions/min_length": 29.875, "epoch": 5.71283196823033, "grad_norm": 0.0040203168515333355, "kl": 0.1070556640625, "learning_rate": 3.955607474644345e-07, "loss": 0.00010717045370256528, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2874, "train_speed(iter/s)": 0.022688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 78.739586353302, "completions/min_length": 24.75, "epoch": 5.71481757259866, "grad_norm": 0.004629111068471344, "kl": 0.086578369140625, "learning_rate": 3.9525224305457495e-07, "loss": 8.650859672343358e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2875, "train_speed(iter/s)": 0.022688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.5, "completions/mean_length": 87.72916746139526, "completions/min_length": 34.375, "epoch": 5.716803176966989, "grad_norm": 0.005799682580273611, "kl": 0.08709716796875, "learning_rate": 3.949437803470349e-07, "loss": 8.704730134923011e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2876, "train_speed(iter/s)": 0.022688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 69.96875095367432, "completions/min_length": 28.625, "epoch": 5.718788781335319, "grad_norm": 0.005373147582449111, "kl": 0.08203125, "learning_rate": 3.9463535946461974e-07, "loss": 8.200074080377817e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2877, "train_speed(iter/s)": 0.022688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 77.01041889190674, "completions/min_length": 31.75, "epoch": 5.720774385703649, "grad_norm": 0.004989757980520973, "kl": 0.093048095703125, "learning_rate": 3.9432698053011855e-07, "loss": 9.297170618083328e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2878, "train_speed(iter/s)": 0.022685 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 79.98958587646484, "completions/min_length": 32.5, "epoch": 5.722759990071978, "grad_norm": 1.1965771845272757, "kl": 0.10614013671875, "learning_rate": 3.940186436663033e-07, "loss": -0.004934785421937704, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2879, "train_speed(iter/s)": 0.022684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 81.76041889190674, "completions/min_length": 33.875, "epoch": 5.724745594440308, "grad_norm": 1.3269164319492914, "kl": 0.1026611328125, "learning_rate": 3.9371034899593e-07, "loss": 0.00010267397010466084, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2880, "train_speed(iter/s)": 0.022684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.375, "completions/mean_length": 82.06250190734863, "completions/min_length": 30.875, "epoch": 5.726731198808637, "grad_norm": 0.004972271735632945, "kl": 0.10040283203125, "learning_rate": 3.9340209664173693e-07, "loss": 0.00010049731645267457, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2881, "train_speed(iter/s)": 0.022685 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 90.46875238418579, "completions/min_length": 36.0, "epoch": 5.728716803176967, "grad_norm": 0.0038756212511577215, "kl": 0.09271240234375, "learning_rate": 3.930938867264461e-07, "loss": 9.273842442780733e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2882, "train_speed(iter/s)": 0.022684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.0, "completions/mean_length": 80.75000286102295, "completions/min_length": 23.0, "epoch": 5.730702407545296, "grad_norm": 0.0035390928132083372, "kl": 0.09417724609375, "learning_rate": 3.9278571937276247e-07, "loss": 9.413848601980135e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2883, "train_speed(iter/s)": 0.022684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.25, "completions/mean_length": 73.8541693687439, "completions/min_length": 27.25, "epoch": 5.732688011913626, "grad_norm": 0.0069725602021814435, "kl": 0.1033935546875, "learning_rate": 3.9247759470337403e-07, "loss": 0.00010328639473300427, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2884, "train_speed(iter/s)": 0.022684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 79.145836353302, "completions/min_length": 33.25, "epoch": 5.734673616281956, "grad_norm": 0.0037228136304540502, "kl": 0.08056640625, "learning_rate": 3.921695128409517e-07, "loss": 8.065340807661414e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2885, "train_speed(iter/s)": 0.022684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.875, "completions/mean_length": 77.18750190734863, "completions/min_length": 37.375, "epoch": 5.736659220650285, "grad_norm": 0.005487361410448833, "kl": 0.084808349609375, "learning_rate": 3.918614739081493e-07, "loss": 8.481842087348923e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2886, "train_speed(iter/s)": 0.022684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 80.15625286102295, "completions/min_length": 33.875, "epoch": 5.738644825018615, "grad_norm": 0.00400914441986354, "kl": 0.10321044921875, "learning_rate": 3.915534780276042e-07, "loss": 0.0001032254658639431, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2887, "train_speed(iter/s)": 0.022682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.875, "completions/mean_length": 79.78125238418579, "completions/min_length": 30.75, "epoch": 5.740630429386945, "grad_norm": 0.004728472274261351, "kl": 0.109771728515625, "learning_rate": 3.912455253219358e-07, "loss": 0.0001098291395464912, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2888, "train_speed(iter/s)": 0.022682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 76.1354193687439, "completions/min_length": 29.5, "epoch": 5.742616033755274, "grad_norm": 0.040205722024986866, "kl": 0.14013671875, "learning_rate": 3.9093761591374675e-07, "loss": 0.00014028666191734374, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2889, "train_speed(iter/s)": 0.022681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 76.59375238418579, "completions/min_length": 30.125, "epoch": 5.744601638123604, "grad_norm": 0.003686857667146047, "kl": 0.086822509765625, "learning_rate": 3.9062974992562224e-07, "loss": 8.68609276949428e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2890, "train_speed(iter/s)": 0.022681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.75, "completions/mean_length": 84.60416984558105, "completions/min_length": 27.125, "epoch": 5.746587242491934, "grad_norm": 0.003979245273432298, "kl": 0.0970458984375, "learning_rate": 3.9032192748013043e-07, "loss": 9.707448771223426e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2891, "train_speed(iter/s)": 0.02268 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.375, "completions/mean_length": 67.33333587646484, "completions/min_length": 21.75, "epoch": 5.748572846860263, "grad_norm": 0.004204862100027973, "kl": 0.0811767578125, "learning_rate": 3.9001414869982206e-07, "loss": 8.112274372251704e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2892, "train_speed(iter/s)": 0.022681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 78.67708683013916, "completions/min_length": 24.25, "epoch": 5.750558451228593, "grad_norm": 1.7226839420927158, "kl": 0.0838623046875, "learning_rate": 3.8970641370723e-07, "loss": 0.00878245010972023, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.8333333395421505, "rewards/CineAccuracyORM/std": 0.17548104748129845, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2893, "train_speed(iter/s)": 0.022681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 78.46875286102295, "completions/min_length": 30.0, "epoch": 5.752544055596922, "grad_norm": 0.0036201103180731925, "kl": 0.1190185546875, "learning_rate": 3.893987226248707e-07, "loss": 0.00011911365436390042, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2894, "train_speed(iter/s)": 0.02268 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 77.76041841506958, "completions/min_length": 31.75, "epoch": 5.754529659965252, "grad_norm": 0.0039046202016866354, "kl": 0.09246826171875, "learning_rate": 3.890910755752424e-07, "loss": 9.243890963261947e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2895, "train_speed(iter/s)": 0.02268 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.875, "completions/mean_length": 91.23958492279053, "completions/min_length": 40.75, "epoch": 5.756515264333581, "grad_norm": 0.8238213184679289, "kl": 0.0931396484375, "learning_rate": 3.8878347268082577e-07, "loss": -0.007115792483091354, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2896, "train_speed(iter/s)": 0.02268 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.875, "completions/mean_length": 78.89583539962769, "completions/min_length": 26.875, "epoch": 5.758500868701911, "grad_norm": 0.0038819953302039055, "kl": 0.09844970703125, "learning_rate": 3.884759140640842e-07, "loss": 9.849973139353096e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2897, "train_speed(iter/s)": 0.02268 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.875, "completions/mean_length": 74.84375190734863, "completions/min_length": 29.5, "epoch": 5.760486473070241, "grad_norm": 0.004409141246914084, "kl": 0.08538818359375, "learning_rate": 3.881683998474633e-07, "loss": 8.543799049220979e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2898, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.5, "completions/mean_length": 83.93750190734863, "completions/min_length": 33.125, "epoch": 5.76247207743857, "grad_norm": 1.359126264867561, "kl": 0.106353759765625, "learning_rate": 3.878609301533912e-07, "loss": -0.0023418504279106855, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2899, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 76.76041793823242, "completions/min_length": 29.75, "epoch": 5.7644576818069, "grad_norm": 0.004656560504781321, "kl": 0.10186767578125, "learning_rate": 3.875535051042778e-07, "loss": 0.00010198411473538727, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2900, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 70.09375333786011, "completions/min_length": 21.875, "epoch": 5.76644328617523, "grad_norm": 0.0043039560461887606, "kl": 0.09051513671875, "learning_rate": 3.87246124822516e-07, "loss": 9.051974484464154e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2901, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 83.41666984558105, "completions/min_length": 30.75, "epoch": 5.768428890543559, "grad_norm": 0.003974606641506099, "kl": 0.10223388671875, "learning_rate": 3.8693878943048025e-07, "loss": 0.0001022075884975493, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2902, "train_speed(iter/s)": 0.022678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.875, "completions/mean_length": 68.92708539962769, "completions/min_length": 28.125, "epoch": 5.770414494911889, "grad_norm": 1.147784167066906, "kl": 0.095062255859375, "learning_rate": 3.8663149905052737e-07, "loss": -0.0020088721066713333, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2903, "train_speed(iter/s)": 0.022677 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 79.052086353302, "completions/min_length": 28.875, "epoch": 5.772400099280219, "grad_norm": 0.0033709819089969145, "kl": 0.09381103515625, "learning_rate": 3.8632425380499635e-07, "loss": 9.371935448143631e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2904, "train_speed(iter/s)": 0.022677 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 76.93750190734863, "completions/min_length": 28.5, "epoch": 5.774385703648548, "grad_norm": 0.8454195716256399, "kl": 0.08197021484375, "learning_rate": 3.8601705381620774e-07, "loss": 0.014056676998734474, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2905, "train_speed(iter/s)": 0.022677 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 70.92708587646484, "completions/min_length": 18.625, "epoch": 5.776371308016878, "grad_norm": 0.01833428620684871, "kl": 0.12359619140625, "learning_rate": 3.857098992064647e-07, "loss": 0.00012341572437435389, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2906, "train_speed(iter/s)": 0.022676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.875, "completions/mean_length": 73.82292032241821, "completions/min_length": 22.75, "epoch": 5.778356912385207, "grad_norm": 0.003682226585944467, "kl": 0.0858154296875, "learning_rate": 3.8540279009805185e-07, "loss": 8.579586574342102e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2907, "train_speed(iter/s)": 0.022676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.875, "completions/mean_length": 73.44791889190674, "completions/min_length": 20.125, "epoch": 5.780342516753537, "grad_norm": 0.0037552103918148017, "kl": 0.08489990234375, "learning_rate": 3.850957266132361e-07, "loss": 8.486880687996745e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2908, "train_speed(iter/s)": 0.022675 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 80.55208396911621, "completions/min_length": 21.25, "epoch": 5.782328121121866, "grad_norm": 1.7545861382134982, "kl": 0.107666015625, "learning_rate": 3.847887088742659e-07, "loss": 0.00010767951607704163, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2909, "train_speed(iter/s)": 0.022675 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 73.12500143051147, "completions/min_length": 26.25, "epoch": 5.784313725490196, "grad_norm": 1.0189915019535853, "kl": 0.087982177734375, "learning_rate": 3.844817370033716e-07, "loss": 0.016254860907793045, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2910, "train_speed(iter/s)": 0.022673 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.875, "completions/mean_length": 75.33333587646484, "completions/min_length": 27.25, "epoch": 5.786299329858526, "grad_norm": 0.0040009282264094085, "kl": 0.1044921875, "learning_rate": 3.841748111227651e-07, "loss": 0.00010461873898748308, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2911, "train_speed(iter/s)": 0.022674 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 70.85416841506958, "completions/min_length": 31.5, "epoch": 5.788284934226855, "grad_norm": 0.004523081990784025, "kl": 0.094451904296875, "learning_rate": 3.838679313546405e-07, "loss": 9.434594539925456e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2912, "train_speed(iter/s)": 0.022674 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.625, "completions/mean_length": 80.58333492279053, "completions/min_length": 34.0, "epoch": 5.790270538595185, "grad_norm": 1.9379524164896957, "kl": 0.0933837890625, "learning_rate": 3.8356109782117275e-07, "loss": 9.336074435850605e-05, "memory(GiB)": 94.21, "reward": 1.6458333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.3879413418471813, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2913, "train_speed(iter/s)": 0.022674 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 79.83333492279053, "completions/min_length": 27.75, "epoch": 5.792256142963515, "grad_norm": 1.6970084821016755, "kl": 0.089599609375, "learning_rate": 3.832543106445188e-07, "loss": -0.0005534527590498328, "memory(GiB)": 94.21, "reward": 1.6145833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6145833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2914, "train_speed(iter/s)": 0.022673 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 64.33333539962769, "completions/min_length": 27.25, "epoch": 5.794241747331844, "grad_norm": 0.01129079340426373, "kl": 0.113037109375, "learning_rate": 3.8294756994681776e-07, "loss": 0.00011286174412816763, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2915, "train_speed(iter/s)": 0.022673 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 75.19791889190674, "completions/min_length": 26.125, "epoch": 5.796227351700174, "grad_norm": 1.5064358724183429, "kl": 0.090789794921875, "learning_rate": 3.8264087585018924e-07, "loss": 0.007380732800811529, "memory(GiB)": 94.21, "reward": 1.7500000149011612, "reward_std": 0.05103103630244732, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.29628782719373703, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2916, "train_speed(iter/s)": 0.022673 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.875, "completions/mean_length": 85.20833492279053, "completions/min_length": 31.25, "epoch": 5.798212956068504, "grad_norm": 0.003846152313106342, "kl": 0.09326171875, "learning_rate": 3.8233422847673475e-07, "loss": 9.313251212006435e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2917, "train_speed(iter/s)": 0.022672 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.375, "completions/mean_length": 83.46875286102295, "completions/min_length": 37.25, "epoch": 5.800198560436833, "grad_norm": 1.0992639251272731, "kl": 0.1005859375, "learning_rate": 3.8202762794853715e-07, "loss": 0.02382952719926834, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.375051774084568, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2918, "train_speed(iter/s)": 0.02267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.875, "completions/mean_length": 75.71875190734863, "completions/min_length": 25.625, "epoch": 5.802184164805163, "grad_norm": 1.743163953784932, "kl": 0.08819580078125, "learning_rate": 3.8172107438766076e-07, "loss": -0.01659288816154003, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.05974817834794521, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12089945748448372, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2919, "train_speed(iter/s)": 0.022669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.375, "completions/mean_length": 82.79166889190674, "completions/min_length": 27.375, "epoch": 5.804169769173492, "grad_norm": 0.004523808945714486, "kl": 0.1009521484375, "learning_rate": 3.81414567916151e-07, "loss": 0.00010098607162944973, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2920, "train_speed(iter/s)": 0.022668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.25, "completions/mean_length": 72.57291841506958, "completions/min_length": 27.5, "epoch": 5.806155373541822, "grad_norm": 0.24544418874165735, "kl": 0.212646484375, "learning_rate": 3.811081086560346e-07, "loss": 0.00021282854140736163, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2921, "train_speed(iter/s)": 0.022669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.25, "completions/mean_length": 71.62500381469727, "completions/min_length": 28.125, "epoch": 5.808140977910151, "grad_norm": 0.0043468580674483745, "kl": 0.099853515625, "learning_rate": 3.808016967293197e-07, "loss": 9.988778037950397e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2922, "train_speed(iter/s)": 0.022669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 68.51041841506958, "completions/min_length": 29.125, "epoch": 5.810126582278481, "grad_norm": 0.0039887505140692205, "kl": 0.07208251953125, "learning_rate": 3.8049533225799534e-07, "loss": 7.210955664049834e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2923, "train_speed(iter/s)": 0.02267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.5, "completions/mean_length": 78.75000333786011, "completions/min_length": 24.375, "epoch": 5.812112186646811, "grad_norm": 0.0043856681120003765, "kl": 0.1015625, "learning_rate": 3.8018901536403194e-07, "loss": 0.00010158519580727443, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2924, "train_speed(iter/s)": 0.02267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 69.5104193687439, "completions/min_length": 31.875, "epoch": 5.81409779101514, "grad_norm": 1.3953477739577373, "kl": 0.094268798828125, "learning_rate": 3.7988274616938043e-07, "loss": -0.004629717208445072, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2925, "train_speed(iter/s)": 0.022671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.125, "completions/mean_length": 70.69791841506958, "completions/min_length": 19.875, "epoch": 5.81608339538347, "grad_norm": 0.006662755789561431, "kl": 0.08282470703125, "learning_rate": 3.7957652479597333e-07, "loss": 8.284844079753384e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2926, "train_speed(iter/s)": 0.022671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 66.833336353302, "completions/min_length": 25.0, "epoch": 5.8180689997518, "grad_norm": 0.003897694013395826, "kl": 0.073394775390625, "learning_rate": 3.7927035136572393e-07, "loss": 7.329390064114705e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2927, "train_speed(iter/s)": 0.022671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 75.46875238418579, "completions/min_length": 30.875, "epoch": 5.820054604120129, "grad_norm": 0.0034434875166394164, "kl": 0.09765625, "learning_rate": 3.7896422600052625e-07, "loss": 9.767961455509067e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2928, "train_speed(iter/s)": 0.022671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.125, "completions/mean_length": 71.92708444595337, "completions/min_length": 25.25, "epoch": 5.822040208488459, "grad_norm": 1.9162569131966805, "kl": 0.087982177734375, "learning_rate": 3.786581488222556e-07, "loss": -0.015171239152550697, "memory(GiB)": 94.21, "reward": 1.9791666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9791666716337204, "rewards/CineAccuracyORM/std": 0.04865618050098419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2929, "train_speed(iter/s)": 0.022671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.125, "completions/mean_length": 72.42708444595337, "completions/min_length": 27.625, "epoch": 5.824025812856789, "grad_norm": 0.0039399923731347425, "kl": 0.0806884765625, "learning_rate": 3.7835211995276765e-07, "loss": 8.071921183727682e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2930, "train_speed(iter/s)": 0.02267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 74.645836353302, "completions/min_length": 21.875, "epoch": 5.826011417225118, "grad_norm": 0.00324747499578154, "kl": 0.07037353515625, "learning_rate": 3.780461395138991e-07, "loss": 7.030913548078388e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2931, "train_speed(iter/s)": 0.022669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 68.27083587646484, "completions/min_length": 22.375, "epoch": 5.827997021593448, "grad_norm": 0.00431383145438088, "kl": 0.07781982421875, "learning_rate": 3.7774020762746745e-07, "loss": 7.780019222991541e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2932, "train_speed(iter/s)": 0.02267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 72.47916889190674, "completions/min_length": 25.0, "epoch": 5.829982625961777, "grad_norm": 1.9090636097110216, "kl": 0.09674072265625, "learning_rate": 3.774343244152704e-07, "loss": 0.0058203041553497314, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.18335824459791183, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2933, "train_speed(iter/s)": 0.02267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 79.52083492279053, "completions/min_length": 28.75, "epoch": 5.831968230330107, "grad_norm": 0.006316364041589908, "kl": 0.08758544921875, "learning_rate": 3.7712848999908676e-07, "loss": 8.758005424169824e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2934, "train_speed(iter/s)": 0.02267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.0, "completions/mean_length": 71.81250143051147, "completions/min_length": 22.125, "epoch": 5.833953834698436, "grad_norm": 0.0037255999544836727, "kl": 0.0833740234375, "learning_rate": 3.768227045006756e-07, "loss": 8.34009115351364e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2935, "train_speed(iter/s)": 0.022668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.375, "completions/mean_length": 75.97916984558105, "completions/min_length": 25.5, "epoch": 5.835939439066766, "grad_norm": 0.0035334653910409583, "kl": 0.0794677734375, "learning_rate": 3.765169680417769e-07, "loss": 7.949472637847066e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2936, "train_speed(iter/s)": 0.022668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 83.92708492279053, "completions/min_length": 28.875, "epoch": 5.837925043435096, "grad_norm": 0.003771749128448991, "kl": 0.0833740234375, "learning_rate": 3.7621128074411076e-07, "loss": 8.329372212756425e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2937, "train_speed(iter/s)": 0.022668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.875, "completions/mean_length": 73.61458587646484, "completions/min_length": 22.5, "epoch": 5.839910647803425, "grad_norm": 0.022936256618794247, "kl": 0.1143798828125, "learning_rate": 3.759056427293778e-07, "loss": 0.00011428249854361638, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2938, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.5, "completions/mean_length": 70.61458587646484, "completions/min_length": 23.125, "epoch": 5.841896252171755, "grad_norm": 0.00970710425053875, "kl": 0.1025390625, "learning_rate": 3.756000541192591e-07, "loss": 0.00010258870315738022, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2939, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 70.95833587646484, "completions/min_length": 25.625, "epoch": 5.843881856540085, "grad_norm": 0.0061579946628700895, "kl": 0.0809326171875, "learning_rate": 3.752945150354159e-07, "loss": 8.097306999843568e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2940, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.75, "completions/mean_length": 80.84375190734863, "completions/min_length": 30.25, "epoch": 5.845867460908414, "grad_norm": 0.003636028911850159, "kl": 0.08221435546875, "learning_rate": 3.7498902559949006e-07, "loss": 8.218929724534974e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2941, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.75, "completions/mean_length": 79.06250238418579, "completions/min_length": 31.375, "epoch": 5.847853065276744, "grad_norm": 0.003120829864230285, "kl": 0.07861328125, "learning_rate": 3.7468358593310303e-07, "loss": 7.856819865992293e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2942, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.375, "completions/mean_length": 73.90625190734863, "completions/min_length": 25.875, "epoch": 5.849838669645074, "grad_norm": 0.003801615504914575, "kl": 0.0906982421875, "learning_rate": 3.743781961578573e-07, "loss": 9.066134225577116e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2943, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 72.88541841506958, "completions/min_length": 26.875, "epoch": 5.851824274013403, "grad_norm": 0.0036436166747744335, "kl": 0.08294677734375, "learning_rate": 3.7407285639533505e-07, "loss": 8.297446765936911e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2944, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.875, "completions/mean_length": 74.6041693687439, "completions/min_length": 21.25, "epoch": 5.853809878381733, "grad_norm": 1.5290246542422399, "kl": 0.09075927734375, "learning_rate": 3.737675667670983e-07, "loss": 0.01472728606313467, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2945, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.5, "completions/mean_length": 84.37500190734863, "completions/min_length": 31.0, "epoch": 5.855795482750062, "grad_norm": 0.005983407809149179, "kl": 0.09405517578125, "learning_rate": 3.7346232739468944e-07, "loss": 9.40181635087356e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2946, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 73.17708539962769, "completions/min_length": 21.0, "epoch": 5.857781087118392, "grad_norm": 0.003353061161646867, "kl": 0.08172607421875, "learning_rate": 3.731571383996308e-07, "loss": 8.175710536306724e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2947, "train_speed(iter/s)": 0.022665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.375, "completions/mean_length": 62.468750953674316, "completions/min_length": 23.375, "epoch": 5.859766691486721, "grad_norm": 0.004986112007044284, "kl": 0.081817626953125, "learning_rate": 3.7285199990342465e-07, "loss": 8.168508065864444e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2948, "train_speed(iter/s)": 0.022665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 70.25000190734863, "completions/min_length": 29.75, "epoch": 5.861752295855051, "grad_norm": 0.005495352875061472, "kl": 0.08514404296875, "learning_rate": 3.725469120275532e-07, "loss": 8.517235983163118e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2949, "train_speed(iter/s)": 0.022665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 68.47916889190674, "completions/min_length": 20.875, "epoch": 5.863737900223381, "grad_norm": 0.006157837253065416, "kl": 0.083221435546875, "learning_rate": 3.7224187489347844e-07, "loss": 8.318398613482714e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2950, "train_speed(iter/s)": 0.022664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 65.63541889190674, "completions/min_length": 24.0, "epoch": 5.86572350459171, "grad_norm": 0.006483468559431403, "kl": 0.09039306640625, "learning_rate": 3.7193688862264214e-07, "loss": 9.046150080394e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2951, "train_speed(iter/s)": 0.022665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 77.72916984558105, "completions/min_length": 20.875, "epoch": 5.86770910896004, "grad_norm": 0.007216988320940811, "kl": 0.0992431640625, "learning_rate": 3.7163195333646594e-07, "loss": 9.906500781653449e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2952, "train_speed(iter/s)": 0.022665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 67.7916693687439, "completions/min_length": 23.875, "epoch": 5.86969471332837, "grad_norm": 0.006972280389571689, "kl": 0.089508056640625, "learning_rate": 3.7132706915635083e-07, "loss": 8.944849105319008e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2953, "train_speed(iter/s)": 0.022664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.75, "completions/mean_length": 80.79167032241821, "completions/min_length": 28.0, "epoch": 5.871680317696699, "grad_norm": 0.005931778356708559, "kl": 0.089874267578125, "learning_rate": 3.710222362036779e-07, "loss": 9.001302532851696e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2954, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 68.46875190734863, "completions/min_length": 19.375, "epoch": 5.873665922065029, "grad_norm": 0.005969014872074029, "kl": 0.0859375, "learning_rate": 3.707174545998076e-07, "loss": 8.594428072683513e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2955, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.875, "completions/mean_length": 71.67708492279053, "completions/min_length": 23.125, "epoch": 5.875651526433359, "grad_norm": 0.09805706996430967, "kl": 0.27642822265625, "learning_rate": 3.7041272446608007e-07, "loss": 0.000276578008197248, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2956, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.875, "completions/mean_length": 81.66666984558105, "completions/min_length": 30.125, "epoch": 5.877637130801688, "grad_norm": 0.006089357713340864, "kl": 0.08837890625, "learning_rate": 3.701080459238148e-07, "loss": 8.834289474179968e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2957, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.875, "completions/mean_length": 79.95833587646484, "completions/min_length": 22.375, "epoch": 5.879622735170018, "grad_norm": 0.0048544631225078535, "kl": 0.09759521484375, "learning_rate": 3.6980341909431103e-07, "loss": 9.758198575582355e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2958, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.625, "completions/mean_length": 78.30208492279053, "completions/min_length": 19.625, "epoch": 5.881608339538347, "grad_norm": 0.004803929641272386, "kl": 0.11053466796875, "learning_rate": 3.69498844098847e-07, "loss": 0.00011050906323362142, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2959, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 77.79166984558105, "completions/min_length": 26.5, "epoch": 5.883593943906677, "grad_norm": 0.009623782732732174, "kl": 0.086151123046875, "learning_rate": 3.6919432105868053e-07, "loss": 8.609489304944873e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2960, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.75, "completions/mean_length": 70.57291889190674, "completions/min_length": 21.625, "epoch": 5.885579548275006, "grad_norm": 0.005301205789896645, "kl": 0.0797119140625, "learning_rate": 3.688898500950489e-07, "loss": 7.974475738592446e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2961, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 76.63541889190674, "completions/min_length": 31.0, "epoch": 5.887565152643336, "grad_norm": 0.003450906596375903, "kl": 0.08489990234375, "learning_rate": 3.6858543132916806e-07, "loss": 8.487096056342125e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2962, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.875, "completions/mean_length": 67.30208539962769, "completions/min_length": 23.375, "epoch": 5.889550757011666, "grad_norm": 0.005052504895692179, "kl": 0.071685791015625, "learning_rate": 3.6828106488223427e-07, "loss": 7.170936441980302e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2963, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.0, "completions/mean_length": 73.06250143051147, "completions/min_length": 24.875, "epoch": 5.891536361379995, "grad_norm": 0.005036433030709688, "kl": 0.08502197265625, "learning_rate": 3.67976750875422e-07, "loss": 8.504463767167181e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2964, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.75, "completions/mean_length": 61.791667461395264, "completions/min_length": 19.75, "epoch": 5.893521965748325, "grad_norm": 0.00408712473934292, "kl": 0.078704833984375, "learning_rate": 3.6767248942988514e-07, "loss": 7.877701864344999e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2965, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.75, "completions/mean_length": 71.03125286102295, "completions/min_length": 27.625, "epoch": 5.895507570116655, "grad_norm": 0.00573599659279665, "kl": 0.086181640625, "learning_rate": 3.6736828066675664e-07, "loss": 8.620007429271936e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2966, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 73.78125143051147, "completions/min_length": 22.25, "epoch": 5.897493174484984, "grad_norm": 0.005248226277860616, "kl": 0.0999755859375, "learning_rate": 3.6706412470714856e-07, "loss": 0.00010005956573877484, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2967, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 75.28125190734863, "completions/min_length": 28.375, "epoch": 5.899478778853314, "grad_norm": 0.004118834077966699, "kl": 0.07537841796875, "learning_rate": 3.667600216721519e-07, "loss": 7.537403871538118e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2968, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.625, "completions/mean_length": 77.16666841506958, "completions/min_length": 28.25, "epoch": 5.901464383221644, "grad_norm": 0.003818959198151138, "kl": 0.0887451171875, "learning_rate": 3.6645597168283636e-07, "loss": 8.871472527971491e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2969, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.875, "completions/mean_length": 76.39583492279053, "completions/min_length": 27.5, "epoch": 5.903449987589973, "grad_norm": 0.007421889798294304, "kl": 0.11346435546875, "learning_rate": 3.661519748602511e-07, "loss": 0.00011346233077347279, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2970, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 63.843751430511475, "completions/min_length": 26.0, "epoch": 5.905435591958303, "grad_norm": 1.3520482868020578, "kl": 0.088348388671875, "learning_rate": 3.6584803132542356e-07, "loss": 0.016340874135494232, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2971, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 69.927086353302, "completions/min_length": 30.375, "epoch": 5.907421196326632, "grad_norm": 0.007912083235969224, "kl": 0.09619140625, "learning_rate": 3.655441411993603e-07, "loss": 9.617401519790292e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2972, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 68.7916669845581, "completions/min_length": 21.0, "epoch": 5.909406800694962, "grad_norm": 0.00868182488605808, "kl": 0.09735107421875, "learning_rate": 3.652403046030462e-07, "loss": 9.739540837472305e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2973, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.875, "completions/mean_length": 76.89583492279053, "completions/min_length": 17.5, "epoch": 5.911392405063291, "grad_norm": 0.008532889612779303, "kl": 0.10479736328125, "learning_rate": 3.649365216574453e-07, "loss": 0.00010475765157025307, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2974, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.75, "completions/mean_length": 69.51041889190674, "completions/min_length": 20.125, "epoch": 5.913378009431621, "grad_norm": 0.003483703631857241, "kl": 0.08673095703125, "learning_rate": 3.646327924835e-07, "loss": 8.664555934956297e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2975, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.5, "completions/mean_length": 82.14583683013916, "completions/min_length": 24.125, "epoch": 5.915363613799951, "grad_norm": 0.004752179572459591, "kl": 0.07733154296875, "learning_rate": 3.6432911720213124e-07, "loss": 7.731119694653898e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2976, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.125, "completions/mean_length": 69.20833444595337, "completions/min_length": 18.625, "epoch": 5.91734921816828, "grad_norm": 0.5853186624109498, "kl": 0.20550537109375, "learning_rate": 3.6402549593423893e-07, "loss": 0.016102951020002365, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2977, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 87.23958587646484, "completions/min_length": 33.5, "epoch": 5.91933482253661, "grad_norm": 0.7611127935842638, "kl": 0.11114501953125, "learning_rate": 3.6372192880070097e-07, "loss": 0.0026620272547006607, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2978, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.625, "completions/mean_length": 85.13541984558105, "completions/min_length": 26.75, "epoch": 5.9213204269049395, "grad_norm": 0.005794860863805745, "kl": 0.08740234375, "learning_rate": 3.6341841592237407e-07, "loss": 8.740430348552763e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2979, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.375, "completions/mean_length": 76.7291693687439, "completions/min_length": 31.0, "epoch": 5.923306031273269, "grad_norm": 1.21158157663013, "kl": 0.097900390625, "learning_rate": 3.6311495742009304e-07, "loss": 0.002060196129605174, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2980, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 77.25000143051147, "completions/min_length": 26.875, "epoch": 5.9252916356415986, "grad_norm": 0.004366902761875255, "kl": 0.0908203125, "learning_rate": 3.628115534146714e-07, "loss": 9.082164615392685e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2981, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 74.98958587646484, "completions/min_length": 27.375, "epoch": 5.9272772400099285, "grad_norm": 0.0034984088361081018, "kl": 0.09613037109375, "learning_rate": 3.6250820402690053e-07, "loss": 9.61034675128758e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2982, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.25, "completions/mean_length": 88.87500286102295, "completions/min_length": 22.875, "epoch": 5.929262844378258, "grad_norm": 0.9122087914842973, "kl": 0.1951904296875, "learning_rate": 3.622049093775501e-07, "loss": 0.010865895077586174, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2983, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 74.1354193687439, "completions/min_length": 25.25, "epoch": 5.9312484487465875, "grad_norm": 0.004693587594116673, "kl": 0.096893310546875, "learning_rate": 3.619016695873689e-07, "loss": 9.692844469100237e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2984, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.875, "completions/mean_length": 76.0729193687439, "completions/min_length": 16.0, "epoch": 5.933234053114917, "grad_norm": 0.004897663390481458, "kl": 0.09503173828125, "learning_rate": 3.6159848477708255e-07, "loss": 9.503068577032536e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2985, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.25, "completions/mean_length": 83.36458587646484, "completions/min_length": 27.125, "epoch": 5.9352196574832465, "grad_norm": 0.004857511985454761, "kl": 0.080780029296875, "learning_rate": 3.612953550673957e-07, "loss": 8.082823478616774e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2986, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.875, "completions/mean_length": 75.15625143051147, "completions/min_length": 25.375, "epoch": 5.937205261851576, "grad_norm": 0.011694055729766768, "kl": 0.079345703125, "learning_rate": 3.6099228057899055e-07, "loss": 7.924922101665288e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2987, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 70.23958587646484, "completions/min_length": 22.25, "epoch": 5.9391908662199056, "grad_norm": 0.00624293029874596, "kl": 0.10369873046875, "learning_rate": 3.6068926143252774e-07, "loss": 0.00010362219472881407, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2988, "train_speed(iter/s)": 0.022649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 67.33333587646484, "completions/min_length": 21.625, "epoch": 5.9411764705882355, "grad_norm": 0.004475921118529464, "kl": 0.074798583984375, "learning_rate": 3.603862977486456e-07, "loss": 7.480620843125507e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2989, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 72.81250238418579, "completions/min_length": 17.375, "epoch": 5.943162074956565, "grad_norm": 0.0050260122735072315, "kl": 0.1019287109375, "learning_rate": 3.6008338964796013e-07, "loss": 0.00010196936636930332, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2990, "train_speed(iter/s)": 0.022649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 71.89583492279053, "completions/min_length": 24.875, "epoch": 5.9451476793248945, "grad_norm": 0.005172651937816761, "kl": 0.11724853515625, "learning_rate": 3.59780537251066e-07, "loss": 0.00011721058399416506, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2991, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 74.13541984558105, "completions/min_length": 23.375, "epoch": 5.9471332836932245, "grad_norm": 0.005079963609127899, "kl": 0.10150146484375, "learning_rate": 3.594777406785351e-07, "loss": 0.00010150492744287476, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2992, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 80.89583587646484, "completions/min_length": 24.875, "epoch": 5.9491188880615535, "grad_norm": 0.00582672905249592, "kl": 0.09539794921875, "learning_rate": 3.5917500005091704e-07, "loss": 9.530916577205062e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2993, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.5, "completions/mean_length": 77.36458492279053, "completions/min_length": 23.75, "epoch": 5.9511044924298835, "grad_norm": 0.005135815814550793, "kl": 0.080596923828125, "learning_rate": 3.5887231548873935e-07, "loss": 8.065038127824664e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2994, "train_speed(iter/s)": 0.022649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 73.51041793823242, "completions/min_length": 19.625, "epoch": 5.953090096798213, "grad_norm": 0.004943173991657411, "kl": 0.09637451171875, "learning_rate": 3.5856968711250735e-07, "loss": 9.636885806685314e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2995, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.75, "completions/mean_length": 81.71875286102295, "completions/min_length": 25.25, "epoch": 5.9550757011665425, "grad_norm": 0.8318914149732651, "kl": 0.10394287109375, "learning_rate": 3.5826711504270376e-07, "loss": 0.00010397534060757607, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2996, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 68.93750095367432, "completions/min_length": 22.5, "epoch": 5.9570613055348725, "grad_norm": 0.8835777121408408, "kl": 0.177490234375, "learning_rate": 3.5796459939978893e-07, "loss": 0.003880441188812256, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2997, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.375, "completions/mean_length": 69.79166889190674, "completions/min_length": 21.375, "epoch": 5.9590469099032015, "grad_norm": 0.00442838891107234, "kl": 0.08392333984375, "learning_rate": 3.5766214030420095e-07, "loss": 8.392141171498224e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2998, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.625, "completions/mean_length": 70.45833492279053, "completions/min_length": 19.625, "epoch": 5.9610325142715315, "grad_norm": 1.039928644425254, "kl": 0.10028076171875, "learning_rate": 3.573597378763552e-07, "loss": -0.0047088852152228355, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166669771075, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2999, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.875, "completions/mean_length": 68.16666841506958, "completions/min_length": 22.0, "epoch": 5.9630181186398605, "grad_norm": 2.005029702131463, "kl": 0.10845947265625, "learning_rate": 3.5705739223664455e-07, "loss": -0.011583349667489529, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3000, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.875, "completions/mean_length": 76.35416841506958, "completions/min_length": 23.5, "epoch": 5.9650037230081905, "grad_norm": 1.3729627300662692, "kl": 0.08941650390625, "learning_rate": 3.5675510350543933e-07, "loss": 0.0010114660253748298, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3001, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 69.68750286102295, "completions/min_length": 16.25, "epoch": 5.96698932737652, "grad_norm": 0.0037574323421503265, "kl": 0.09136962890625, "learning_rate": 3.564528718030869e-07, "loss": 9.133273124461994e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3002, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 69.65625333786011, "completions/min_length": 21.875, "epoch": 5.9689749317448495, "grad_norm": 1.9937825834877494, "kl": 0.09912109375, "learning_rate": 3.561506972499123e-07, "loss": 9.93019639281556e-05, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3003, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/mean_length": 52.59375190734863, "completions/min_length": 16.25, "epoch": 5.9709605361131795, "grad_norm": 0.0052650178946769405, "kl": 0.077423095703125, "learning_rate": 3.5584857996621766e-07, "loss": 7.736920088063926e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3004, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.5, "completions/mean_length": 61.44791793823242, "completions/min_length": 21.375, "epoch": 5.972946140481509, "grad_norm": 1.8411907169207193, "kl": 0.1083984375, "learning_rate": 3.5554652007228236e-07, "loss": -0.00985028874129057, "memory(GiB)": 94.21, "reward": 1.7500000149011612, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.306039284914732, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3005, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.625, "completions/mean_length": 70.37500286102295, "completions/min_length": 19.875, "epoch": 5.9749317448498385, "grad_norm": 0.005944787733696136, "kl": 0.10211181640625, "learning_rate": 3.552445176883629e-07, "loss": 0.00010209568426944315, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3006, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 70.83333492279053, "completions/min_length": 24.375, "epoch": 5.976917349218168, "grad_norm": 0.004626703184411186, "kl": 0.1005859375, "learning_rate": 3.5494257293469285e-07, "loss": 0.00010056763130705804, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3007, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.625, "completions/mean_length": 67.75000190734863, "completions/min_length": 23.875, "epoch": 5.978902953586498, "grad_norm": 0.004506350776103028, "kl": 0.094818115234375, "learning_rate": 3.546406859314829e-07, "loss": 9.477618732489645e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3008, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 67.96875190734863, "completions/min_length": 18.0, "epoch": 5.980888557954827, "grad_norm": 0.00680046156960817, "kl": 0.08392333984375, "learning_rate": 3.5433885679892075e-07, "loss": 8.388960122829303e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3009, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.5, "completions/mean_length": 70.0104193687439, "completions/min_length": 20.875, "epoch": 5.982874162323157, "grad_norm": 0.004347740317199421, "kl": 0.0872802734375, "learning_rate": 3.5403708565717086e-07, "loss": 8.725229417905211e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3010, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.875, "completions/mean_length": 73.93750190734863, "completions/min_length": 24.125, "epoch": 5.9848597666914864, "grad_norm": 1.0430656049182414, "kl": 0.2037353515625, "learning_rate": 3.5373537262637465e-07, "loss": -0.008169452659785748, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3011, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 71.73958444595337, "completions/min_length": 27.625, "epoch": 5.986845371059816, "grad_norm": 0.016795101580157106, "kl": 0.13067626953125, "learning_rate": 3.5343371782665105e-07, "loss": 0.00013058530748821795, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3012, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 73.42708587646484, "completions/min_length": 22.875, "epoch": 5.9888309754281455, "grad_norm": 0.8903363707388529, "kl": 0.11309814453125, "learning_rate": 3.531321213780949e-07, "loss": 0.005973357707262039, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3013, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 63.94791841506958, "completions/min_length": 15.5, "epoch": 5.990816579796475, "grad_norm": 0.007654375298453226, "kl": 0.09576416015625, "learning_rate": 3.528305834007782e-07, "loss": 9.577290620654821e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3014, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.375, "completions/mean_length": 66.73958587646484, "completions/min_length": 20.375, "epoch": 5.992802184164805, "grad_norm": 1.5989800342398168, "kl": 0.1014404296875, "learning_rate": 3.525291040147498e-07, "loss": -0.00199575605802238, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3015, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 64.54166889190674, "completions/min_length": 17.0, "epoch": 5.994787788533134, "grad_norm": 0.00716979784356222, "kl": 0.1171875, "learning_rate": 3.522276833400349e-07, "loss": 0.00011689918756019324, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3016, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.375, "completions/mean_length": 70.18750190734863, "completions/min_length": 22.875, "epoch": 5.996773392901464, "grad_norm": 1.0281974600293744, "kl": 0.10400390625, "learning_rate": 3.519263214966355e-07, "loss": -0.0014040371170267463, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.10518955811858177, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3017, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.375, "completions/mean_length": 58.92708444595337, "completions/min_length": 21.375, "epoch": 5.998758997269794, "grad_norm": 0.007848128392484928, "kl": 0.0992431640625, "learning_rate": 3.5162501860453044e-07, "loss": 9.92728746496141e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3018, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.0, "completions/mean_length": 69.36458396911621, "completions/min_length": 17.375, "epoch": 6.00198560436833, "grad_norm": 0.007164685475978047, "kl": 0.091064453125, "learning_rate": 3.513237747836747e-07, "loss": 9.108962694881484e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3019, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 66.86458587646484, "completions/min_length": 19.75, "epoch": 6.003971208736659, "grad_norm": 0.7847226339114398, "kl": 0.08758544921875, "learning_rate": 3.510225901539998e-07, "loss": -0.005176226608455181, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3020, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.875, "completions/mean_length": 71.91666889190674, "completions/min_length": 26.5, "epoch": 6.005956813104989, "grad_norm": 0.013268198316585754, "kl": 0.093505859375, "learning_rate": 3.507214648354141e-07, "loss": 9.360029071103781e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3021, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 68.18750047683716, "completions/min_length": 24.25, "epoch": 6.007942417473318, "grad_norm": 0.0062959331303120945, "kl": 0.095458984375, "learning_rate": 3.504203989478015e-07, "loss": 9.544835484120995e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3022, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 73.75000095367432, "completions/min_length": 19.75, "epoch": 6.009928021841648, "grad_norm": 2.4589892143615204, "kl": 0.11370849609375, "learning_rate": 3.501193926110231e-07, "loss": -0.0061433217488229275, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.05779037997126579, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.30885962024331093, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3023, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.75, "completions/mean_length": 77.90625143051147, "completions/min_length": 22.75, "epoch": 6.011913626209978, "grad_norm": 0.0074195218885132735, "kl": 0.08648681640625, "learning_rate": 3.4981844594491577e-07, "loss": 8.649392839288339e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3024, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.0, "completions/mean_length": 72.75000333786011, "completions/min_length": 19.875, "epoch": 6.013899230578307, "grad_norm": 0.008399255612897923, "kl": 0.10888671875, "learning_rate": 3.49517559069293e-07, "loss": 0.00010886974632740021, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3025, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.875, "completions/mean_length": 80.30208587646484, "completions/min_length": 28.875, "epoch": 6.015884834946637, "grad_norm": 0.006814297355306778, "kl": 0.107666015625, "learning_rate": 3.492167321039442e-07, "loss": 0.00010780902812257409, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3026, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 66.27083539962769, "completions/min_length": 18.625, "epoch": 6.017870439314967, "grad_norm": 0.010580291487204947, "kl": 0.098388671875, "learning_rate": 3.4891596516863505e-07, "loss": 9.841322753345594e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3027, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.375, "completions/mean_length": 60.020836353302, "completions/min_length": 22.375, "epoch": 6.019856043683296, "grad_norm": 0.006138909046486966, "kl": 0.07861328125, "learning_rate": 3.486152583831072e-07, "loss": 7.861968333600089e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3028, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.125, "completions/mean_length": 59.708335876464844, "completions/min_length": 17.125, "epoch": 6.021841648051626, "grad_norm": 0.0073077468221807215, "kl": 0.104248046875, "learning_rate": 3.4831461186707854e-07, "loss": 0.00010423504863865674, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3029, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 70.6041693687439, "completions/min_length": 26.375, "epoch": 6.023827252419955, "grad_norm": 0.0047598606273438785, "kl": 0.0849609375, "learning_rate": 3.4801402574024284e-07, "loss": 8.498937677359208e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3030, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 67.70833492279053, "completions/min_length": 20.125, "epoch": 6.025812856788285, "grad_norm": 0.005005653858880377, "kl": 0.103240966796875, "learning_rate": 3.477135001222695e-07, "loss": 0.00010328226198907942, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3031, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.0, "completions/mean_length": 80.33333539962769, "completions/min_length": 26.375, "epoch": 6.027798461156615, "grad_norm": 0.003975941244766172, "kl": 0.10101318359375, "learning_rate": 3.4741303513280493e-07, "loss": 0.00010101804218720645, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3032, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.875, "completions/mean_length": 61.270835399627686, "completions/min_length": 21.0, "epoch": 6.029784065524944, "grad_norm": 0.006204868818693939, "kl": 0.0948486328125, "learning_rate": 3.4711263089147015e-07, "loss": 9.481459710514173e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3033, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 66.60416984558105, "completions/min_length": 21.5, "epoch": 6.031769669893274, "grad_norm": 0.006210913413401623, "kl": 0.09979248046875, "learning_rate": 3.4681228751786255e-07, "loss": 9.9724973551929e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3034, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.75, "completions/mean_length": 60.59375238418579, "completions/min_length": 15.875, "epoch": 6.033755274261603, "grad_norm": 0.690022699096484, "kl": 0.07293701171875, "learning_rate": 3.4651200513155535e-07, "loss": -0.005853736307471991, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.30977265536785126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3035, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 76.83333396911621, "completions/min_length": 21.875, "epoch": 6.035740878629933, "grad_norm": 0.009130938690803664, "kl": 0.100341796875, "learning_rate": 3.462117838520974e-07, "loss": 0.00010030520934378728, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3036, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.875, "completions/mean_length": 60.73958492279053, "completions/min_length": 20.375, "epoch": 6.037726482998263, "grad_norm": 0.02838405887534334, "kl": 0.08935546875, "learning_rate": 3.45911623799013e-07, "loss": 8.938732207752764e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3037, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.875, "completions/mean_length": 84.92708683013916, "completions/min_length": 26.625, "epoch": 6.039712087366592, "grad_norm": 0.0037929333154969602, "kl": 0.10345458984375, "learning_rate": 3.4561152509180234e-07, "loss": 0.0001035341338138096, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3038, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.75, "completions/mean_length": 72.96875190734863, "completions/min_length": 20.875, "epoch": 6.041697691734922, "grad_norm": 1.2769924363734104, "kl": 0.097747802734375, "learning_rate": 3.4531148784994135e-07, "loss": 0.014284975826740265, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3039, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 66.60416841506958, "completions/min_length": 23.25, "epoch": 6.043683296103252, "grad_norm": 0.00956282812315992, "kl": 0.10498046875, "learning_rate": 3.450115121928812e-07, "loss": 0.00010505445970920846, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3040, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.625, "completions/mean_length": 75.11458444595337, "completions/min_length": 17.5, "epoch": 6.045668900471581, "grad_norm": 0.631342415016975, "kl": 0.10052490234375, "learning_rate": 3.447115982400485e-07, "loss": -0.0039808619767427444, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3041, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.625, "completions/mean_length": 73.12500238418579, "completions/min_length": 19.375, "epoch": 6.047654504839911, "grad_norm": 0.7969507650453275, "kl": 0.09503173828125, "learning_rate": 3.4441174611084536e-07, "loss": 0.0029293957632035017, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3042, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.25, "completions/mean_length": 68.47916746139526, "completions/min_length": 16.625, "epoch": 6.04964010920824, "grad_norm": 0.009191736156857703, "kl": 0.10650634765625, "learning_rate": 3.4411195592464936e-07, "loss": 0.0001064623793354258, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3043, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/mean_length": 61.40625047683716, "completions/min_length": 21.625, "epoch": 6.05162571357657, "grad_norm": 0.775572556156759, "kl": 0.098602294921875, "learning_rate": 3.438122278008134e-07, "loss": 0.0008362072403542697, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666679084301, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3044, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 68.94791793823242, "completions/min_length": 18.25, "epoch": 6.0536113179449, "grad_norm": 0.004270799095794153, "kl": 0.082427978515625, "learning_rate": 3.435125618586656e-07, "loss": 8.24808594188653e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3045, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.625, "completions/mean_length": 66.41666841506958, "completions/min_length": 18.75, "epoch": 6.055596922313229, "grad_norm": 0.0076512697958888536, "kl": 0.0960693359375, "learning_rate": 3.4321295821750943e-07, "loss": 9.598436736268923e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3046, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.375, "completions/mean_length": 52.29166841506958, "completions/min_length": 14.5, "epoch": 6.057582526681559, "grad_norm": 0.00961010614499657, "kl": 0.082244873046875, "learning_rate": 3.4291341699662357e-07, "loss": 8.218455332098529e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3047, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 60.395835399627686, "completions/min_length": 21.5, "epoch": 6.059568131049888, "grad_norm": 0.009199954859576568, "kl": 0.1094970703125, "learning_rate": 3.4261393831526165e-07, "loss": 0.00010940534411929548, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3048, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.125, "completions/mean_length": 62.28125047683716, "completions/min_length": 24.0, "epoch": 6.061553735418218, "grad_norm": 0.004586109928473245, "kl": 0.08966064453125, "learning_rate": 3.423145222926527e-07, "loss": 8.965361485024914e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3049, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 66.05208444595337, "completions/min_length": 21.375, "epoch": 6.063539339786548, "grad_norm": 0.007509344100748538, "kl": 0.0994873046875, "learning_rate": 3.4201516904800044e-07, "loss": 9.94074362097308e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3050, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 71.677086353302, "completions/min_length": 22.875, "epoch": 6.065524944154877, "grad_norm": 0.009464178818630315, "kl": 0.09075927734375, "learning_rate": 3.417158787004838e-07, "loss": 9.082347969524562e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3051, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 69.39583396911621, "completions/min_length": 25.625, "epoch": 6.067510548523207, "grad_norm": 2.1232579850193622, "kl": 0.087371826171875, "learning_rate": 3.4141665136925657e-07, "loss": 0.0032394803129136562, "memory(GiB)": 94.21, "reward": 1.7916666865348816, "reward_std": 0.08330589532852173, "rewards/CineAccuracyORM/mean": 0.791666679084301, "rewards/CineAccuracyORM/std": 0.27966488897800446, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3052, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 57.94791793823242, "completions/min_length": 18.25, "epoch": 6.069496152891537, "grad_norm": 0.003931791342280391, "kl": 0.075592041015625, "learning_rate": 3.411174871734479e-07, "loss": 7.559516234323382e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3053, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 69.96875238418579, "completions/min_length": 23.75, "epoch": 6.071481757259866, "grad_norm": 0.004388787428620289, "kl": 0.0919189453125, "learning_rate": 3.4081838623216117e-07, "loss": 9.189383126795292e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3054, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 61.927085399627686, "completions/min_length": 22.5, "epoch": 6.073467361628196, "grad_norm": 0.005364773065299665, "kl": 0.08087158203125, "learning_rate": 3.4051934866447495e-07, "loss": 8.09452249086462e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3055, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.375, "completions/mean_length": 67.520836353302, "completions/min_length": 23.625, "epoch": 6.075452965996525, "grad_norm": 0.004018455230183852, "kl": 0.12127685546875, "learning_rate": 3.402203745894425e-07, "loss": 0.00012133817654103041, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3056, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.375, "completions/mean_length": 72.90625238418579, "completions/min_length": 25.25, "epoch": 6.077438570364855, "grad_norm": 0.009318553599931523, "kl": 0.11895751953125, "learning_rate": 3.3992146412609166e-07, "loss": 0.00011895672651007771, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3057, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.375, "completions/mean_length": 68.70833492279053, "completions/min_length": 25.75, "epoch": 6.079424174733185, "grad_norm": 0.005132911519764411, "kl": 0.08837890625, "learning_rate": 3.396226173934253e-07, "loss": 8.842172974254936e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3058, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.0, "completions/mean_length": 58.114585876464844, "completions/min_length": 24.125, "epoch": 6.081409779101514, "grad_norm": 0.004618095086480621, "kl": 0.0836181640625, "learning_rate": 3.393238345104202e-07, "loss": 8.366788824787363e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3059, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 66.42708587646484, "completions/min_length": 24.5, "epoch": 6.083395383469844, "grad_norm": 0.0051910155184405745, "kl": 0.094024658203125, "learning_rate": 3.3902511559602876e-07, "loss": 9.40771060413681e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3060, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 65.84375238418579, "completions/min_length": 29.25, "epoch": 6.085380987838173, "grad_norm": 1.414046222039776, "kl": 0.1005859375, "learning_rate": 3.387264607691772e-07, "loss": 0.01586427353322506, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3061, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 69.10416841506958, "completions/min_length": 25.25, "epoch": 6.087366592206503, "grad_norm": 2.696646432201644, "kl": 0.109222412109375, "learning_rate": 3.3842787014876635e-07, "loss": -0.003164414083585143, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.05653337761759758, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3062, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 64.55208444595337, "completions/min_length": 23.75, "epoch": 6.089352196574833, "grad_norm": 0.003736785452553901, "kl": 0.09490966796875, "learning_rate": 3.3812934385367143e-07, "loss": 9.490475349593908e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3063, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 64.17708539962769, "completions/min_length": 16.125, "epoch": 6.091337800943162, "grad_norm": 0.008472093439139752, "kl": 0.10333251953125, "learning_rate": 3.3783088200274214e-07, "loss": 0.00010339477739762515, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3064, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.5, "completions/mean_length": 72.69791793823242, "completions/min_length": 21.375, "epoch": 6.093323405311492, "grad_norm": 0.004180293151649204, "kl": 0.10955810546875, "learning_rate": 3.375324847148027e-07, "loss": 0.00010954905883409083, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3065, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 59.34375190734863, "completions/min_length": 25.875, "epoch": 6.095309009679822, "grad_norm": 0.005980884123344347, "kl": 0.0999755859375, "learning_rate": 3.372341521086511e-07, "loss": 9.992992272600532e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3066, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 73.40625286102295, "completions/min_length": 23.375, "epoch": 6.097294614048151, "grad_norm": 0.0056832568749368645, "kl": 0.11175537109375, "learning_rate": 3.369358843030603e-07, "loss": 0.00011180696310475469, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3067, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.125, "completions/mean_length": 66.25000143051147, "completions/min_length": 23.875, "epoch": 6.099280218416481, "grad_norm": 0.0066111654280472144, "kl": 0.084564208984375, "learning_rate": 3.3663768141677693e-07, "loss": 8.459092350676656e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3068, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 58.90625190734863, "completions/min_length": 22.5, "epoch": 6.10126582278481, "grad_norm": 0.004695928888639055, "kl": 0.08880615234375, "learning_rate": 3.36339543568522e-07, "loss": 8.8829779997468e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3069, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 70.20833444595337, "completions/min_length": 25.875, "epoch": 6.10325142715314, "grad_norm": 0.0070060813514598655, "kl": 0.0997314453125, "learning_rate": 3.360414708769904e-07, "loss": 9.97364113572985e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3070, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.375, "completions/mean_length": 59.44791889190674, "completions/min_length": 21.25, "epoch": 6.10523703152147, "grad_norm": 0.004990590257159498, "kl": 0.08837890625, "learning_rate": 3.357434634608513e-07, "loss": 8.825818076729774e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3071, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 64.71875143051147, "completions/min_length": 29.625, "epoch": 6.107222635889799, "grad_norm": 0.004008167002824629, "kl": 0.0865478515625, "learning_rate": 3.354455214387479e-07, "loss": 8.653854456497356e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3072, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 61.395835876464844, "completions/min_length": 23.75, "epoch": 6.109208240258129, "grad_norm": 0.005212367280413786, "kl": 0.080078125, "learning_rate": 3.35147644929297e-07, "loss": 7.999087392818183e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3073, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.875, "completions/mean_length": 66.22916841506958, "completions/min_length": 25.75, "epoch": 6.111193844626458, "grad_norm": 0.004117133300155993, "kl": 0.09332275390625, "learning_rate": 3.3484983405109e-07, "loss": 9.326735744252801e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3074, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.625, "completions/mean_length": 61.16666841506958, "completions/min_length": 19.75, "epoch": 6.113179448994788, "grad_norm": 0.00493495322908631, "kl": 0.093658447265625, "learning_rate": 3.345520889226916e-07, "loss": 9.373845387017354e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3075, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 72.72916889190674, "completions/min_length": 29.625, "epoch": 6.115165053363118, "grad_norm": 0.00420416598356941, "kl": 0.08868408203125, "learning_rate": 3.3425440966264046e-07, "loss": 8.86603957042098e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3076, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.875, "completions/mean_length": 55.572917461395264, "completions/min_length": 21.25, "epoch": 6.117150657731447, "grad_norm": 0.00451635606535441, "kl": 0.090179443359375, "learning_rate": 3.3395679638944905e-07, "loss": 9.020412107929587e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3077, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 67.18750190734863, "completions/min_length": 24.75, "epoch": 6.119136262099777, "grad_norm": 0.0036372871808990986, "kl": 0.09808349609375, "learning_rate": 3.336592492216038e-07, "loss": 9.809157199924812e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3078, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.375, "completions/mean_length": 66.12500238418579, "completions/min_length": 27.5, "epoch": 6.121121866468107, "grad_norm": 1.8198851418740167, "kl": 0.09490966796875, "learning_rate": 3.333617682775642e-07, "loss": -0.0024086367338895798, "memory(GiB)": 94.21, "reward": 1.7812500149011612, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.7812500055879354, "rewards/CineAccuracyORM/std": 0.1783013790845871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3079, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.75, "completions/mean_length": 62.29166793823242, "completions/min_length": 19.0, "epoch": 6.123107470836436, "grad_norm": 0.0047328098487623635, "kl": 0.085968017578125, "learning_rate": 3.3306435367576374e-07, "loss": 8.594350947532803e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3080, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 72.0416693687439, "completions/min_length": 27.875, "epoch": 6.125093075204766, "grad_norm": 0.003855597711700682, "kl": 0.0885009765625, "learning_rate": 3.327670055346101e-07, "loss": 8.844790136208758e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3081, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 69.87500190734863, "completions/min_length": 26.5, "epoch": 6.127078679573095, "grad_norm": 0.007619012137971114, "kl": 0.1019287109375, "learning_rate": 3.324697239724834e-07, "loss": 0.00010188139276579022, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3082, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.125, "completions/mean_length": 66.06250286102295, "completions/min_length": 21.75, "epoch": 6.129064283941425, "grad_norm": 0.004541770081438353, "kl": 0.09552001953125, "learning_rate": 3.321725091077381e-07, "loss": 9.55801151576452e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3083, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.5, "completions/mean_length": 55.97916889190674, "completions/min_length": 22.875, "epoch": 6.131049888309755, "grad_norm": 0.006358819020512164, "kl": 0.096405029296875, "learning_rate": 3.318753610587015e-07, "loss": 9.647863771533594e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3084, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.125, "completions/mean_length": 59.87500190734863, "completions/min_length": 29.625, "epoch": 6.133035492678084, "grad_norm": 0.0046408929976012115, "kl": 0.094482421875, "learning_rate": 3.315782799436747e-07, "loss": 9.442068403586745e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3085, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 62.59375190734863, "completions/min_length": 26.875, "epoch": 6.135021097046414, "grad_norm": 0.8790198056599116, "kl": 0.081817626953125, "learning_rate": 3.312812658809323e-07, "loss": 0.008015388622879982, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3086, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 59.57291793823242, "completions/min_length": 25.375, "epoch": 6.137006701414743, "grad_norm": 0.006116939070145271, "kl": 0.071868896484375, "learning_rate": 3.3098431898872124e-07, "loss": 7.179949170676991e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3087, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 65.22916889190674, "completions/min_length": 29.375, "epoch": 6.138992305783073, "grad_norm": 0.005717984469833963, "kl": 0.10540771484375, "learning_rate": 3.3068743938526323e-07, "loss": 0.00010539943468756974, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3088, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 58.63541793823242, "completions/min_length": 24.375, "epoch": 6.140977910151403, "grad_norm": 0.004018240263837148, "kl": 0.090789794921875, "learning_rate": 3.3039062718875206e-07, "loss": 9.088363003684208e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3089, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 65.47916889190674, "completions/min_length": 25.0, "epoch": 6.142963514519732, "grad_norm": 0.006415860515539578, "kl": 0.0791015625, "learning_rate": 3.3009388251735487e-07, "loss": 7.90932826930657e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3090, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 76.72916793823242, "completions/min_length": 24.375, "epoch": 6.144949118888062, "grad_norm": 0.003787808274003879, "kl": 0.08837890625, "learning_rate": 3.297972054892122e-07, "loss": 8.840192458592355e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3091, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.375, "completions/mean_length": 72.80208587646484, "completions/min_length": 26.125, "epoch": 6.146934723256392, "grad_norm": 0.01912692166296288, "kl": 0.10272216796875, "learning_rate": 3.2950059622243744e-07, "loss": 0.0001027283287839964, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3092, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 63.239585876464844, "completions/min_length": 21.75, "epoch": 6.148920327624721, "grad_norm": 1.686119681058139, "kl": 0.08721923828125, "learning_rate": 3.29204054835117e-07, "loss": 0.00876564346253872, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3093, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.25, "completions/mean_length": 56.28125238418579, "completions/min_length": 22.125, "epoch": 6.150905931993051, "grad_norm": 0.004917477590137736, "kl": 0.080841064453125, "learning_rate": 3.2890758144531054e-07, "loss": 8.092878124443814e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3094, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.375, "completions/mean_length": 69.83333587646484, "completions/min_length": 22.875, "epoch": 6.15289153636138, "grad_norm": 0.003942004129715165, "kl": 0.0992431640625, "learning_rate": 3.2861117617105037e-07, "loss": 9.917528223013505e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3095, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.125, "completions/mean_length": 69.36458587646484, "completions/min_length": 27.5, "epoch": 6.15487714072971, "grad_norm": 0.004561870749420421, "kl": 0.09515380859375, "learning_rate": 3.2831483913034173e-07, "loss": 9.505756315775216e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3096, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.125, "completions/mean_length": 61.114585399627686, "completions/min_length": 18.5, "epoch": 6.1568627450980395, "grad_norm": 1.054855954888787, "kl": 0.073089599609375, "learning_rate": 3.2801857044116276e-07, "loss": -0.009976135566830635, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3097, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 61.114585399627686, "completions/min_length": 24.625, "epoch": 6.158848349466369, "grad_norm": 0.0074671436207640425, "kl": 0.1092529296875, "learning_rate": 3.277223702214645e-07, "loss": 0.00010932209261227399, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3098, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 63.22916841506958, "completions/min_length": 22.125, "epoch": 6.160833953834699, "grad_norm": 1.1157098221300585, "kl": 0.10870361328125, "learning_rate": 3.2742623858917026e-07, "loss": -0.0015669962158426642, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3099, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 55.44791793823242, "completions/min_length": 21.625, "epoch": 6.162819558203028, "grad_norm": 0.004786355941594205, "kl": 0.082794189453125, "learning_rate": 3.2713017566217626e-07, "loss": 8.283949864562601e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3100, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 58.46875190734863, "completions/min_length": 24.0, "epoch": 6.164805162571358, "grad_norm": 0.006038470188776243, "kl": 0.08477783203125, "learning_rate": 3.268341815583522e-07, "loss": 8.474091009702533e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3101, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 74.73958492279053, "completions/min_length": 27.375, "epoch": 6.1667907669396875, "grad_norm": 0.006091407069440559, "kl": 0.090240478515625, "learning_rate": 3.26538256395539e-07, "loss": 9.038918506121263e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3102, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.875, "completions/mean_length": 60.750001430511475, "completions/min_length": 30.75, "epoch": 6.168776371308017, "grad_norm": 1.6498493795873581, "kl": 0.101470947265625, "learning_rate": 3.262424002915509e-07, "loss": 0.00010155017662327737, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3103, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.25, "completions/mean_length": 57.312501430511475, "completions/min_length": 25.5, "epoch": 6.1707619756763465, "grad_norm": 0.005529079432820773, "kl": 0.09033203125, "learning_rate": 3.259466133641748e-07, "loss": 9.037533891387284e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3104, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.875, "completions/mean_length": 61.27083444595337, "completions/min_length": 19.5, "epoch": 6.1727475800446765, "grad_norm": 0.006956608097374181, "kl": 0.105316162109375, "learning_rate": 3.256508957311695e-07, "loss": 0.00010536552872508764, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3105, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 65.64583492279053, "completions/min_length": 26.25, "epoch": 6.174733184413006, "grad_norm": 0.003881511516277086, "kl": 0.086669921875, "learning_rate": 3.253552475102668e-07, "loss": 8.665473433211446e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3106, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 63.58333444595337, "completions/min_length": 21.75, "epoch": 6.1767187887813355, "grad_norm": 0.0042699233008304804, "kl": 0.0902099609375, "learning_rate": 3.2505966881917e-07, "loss": 9.024595783557743e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3107, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 65.87500238418579, "completions/min_length": 23.75, "epoch": 6.178704393149665, "grad_norm": 0.9087596886124952, "kl": 0.09161376953125, "learning_rate": 3.247641597755559e-07, "loss": -0.005222773179411888, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3108, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.0, "completions/mean_length": 55.968750953674316, "completions/min_length": 22.875, "epoch": 6.1806899975179945, "grad_norm": 0.003960734513593447, "kl": 0.0855712890625, "learning_rate": 3.244687204970729e-07, "loss": 8.55414109537378e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3109, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 66.14583492279053, "completions/min_length": 19.75, "epoch": 6.1826756018863245, "grad_norm": 0.01626856857880739, "kl": 0.104248046875, "learning_rate": 3.2417335110134135e-07, "loss": 0.00010417350858915597, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3110, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 76.01041889190674, "completions/min_length": 27.375, "epoch": 6.1846612062546535, "grad_norm": 0.008821150174212686, "kl": 0.0928955078125, "learning_rate": 3.238780517059544e-07, "loss": 9.291309106629342e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3111, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 66.43750333786011, "completions/min_length": 26.125, "epoch": 6.1866468106229835, "grad_norm": 0.005613471856572807, "kl": 0.101318359375, "learning_rate": 3.235828224284769e-07, "loss": 0.00010137448407476768, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3112, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.125, "completions/mean_length": 62.27083492279053, "completions/min_length": 22.5, "epoch": 6.188632414991313, "grad_norm": 0.01502929158721786, "kl": 0.101165771484375, "learning_rate": 3.2328766338644594e-07, "loss": 0.00010128612484550104, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3113, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.75, "completions/mean_length": 66.73958587646484, "completions/min_length": 26.0, "epoch": 6.1906180193596425, "grad_norm": 0.022065479931969258, "kl": 0.0838623046875, "learning_rate": 3.229925746973706e-07, "loss": 8.390971197513863e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3114, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.375, "completions/mean_length": 58.16666793823242, "completions/min_length": 23.125, "epoch": 6.1926036237279725, "grad_norm": 0.0049344301030036525, "kl": 0.08892822265625, "learning_rate": 3.2269755647873214e-07, "loss": 8.899138629203662e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3115, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 71.38541889190674, "completions/min_length": 19.75, "epoch": 6.1945892280963015, "grad_norm": 0.008573771948103145, "kl": 0.0977783203125, "learning_rate": 3.2240260884798354e-07, "loss": 9.785166184883565e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3116, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 72.9791693687439, "completions/min_length": 26.0, "epoch": 6.1965748324646315, "grad_norm": 0.01391361123301328, "kl": 0.1021728515625, "learning_rate": 3.221077319225499e-07, "loss": 0.00010208313324255869, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3117, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 64.10416889190674, "completions/min_length": 26.625, "epoch": 6.198560436832961, "grad_norm": 0.004979520402488125, "kl": 0.108642578125, "learning_rate": 3.2181292581982775e-07, "loss": 0.00010855172149604186, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3118, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.875, "completions/mean_length": 64.13541841506958, "completions/min_length": 23.375, "epoch": 6.2005460412012905, "grad_norm": 0.003786666186186788, "kl": 0.078826904296875, "learning_rate": 3.215181906571858e-07, "loss": 7.883564830990508e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3119, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 63.541667461395264, "completions/min_length": 24.625, "epoch": 6.2025316455696204, "grad_norm": 0.008177802740061323, "kl": 0.077545166015625, "learning_rate": 3.2122352655196446e-07, "loss": 7.75569787947461e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3120, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.125, "completions/mean_length": 80.57291984558105, "completions/min_length": 35.375, "epoch": 6.2045172499379495, "grad_norm": 0.00405138316536812, "kl": 0.09844970703125, "learning_rate": 3.2092893362147564e-07, "loss": 9.840886195888743e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3121, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.625, "completions/mean_length": 58.60416793823242, "completions/min_length": 23.25, "epoch": 6.2065028543062795, "grad_norm": 0.0038341338556492975, "kl": 0.099761962890625, "learning_rate": 3.2063441198300333e-07, "loss": 9.983066411223263e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3122, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.875, "completions/mean_length": 68.07291889190674, "completions/min_length": 26.375, "epoch": 6.208488458674609, "grad_norm": 0.013338144597359672, "kl": 0.1317138671875, "learning_rate": 3.203399617538027e-07, "loss": 0.00013176453649066389, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3123, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 62.72916793823242, "completions/min_length": 23.75, "epoch": 6.2104740630429385, "grad_norm": 0.005667677148609116, "kl": 0.08251953125, "learning_rate": 3.2004558305110084e-07, "loss": 8.24426970211789e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3124, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.125, "completions/mean_length": 55.11458396911621, "completions/min_length": 23.125, "epoch": 6.212459667411268, "grad_norm": 0.003959666188874111, "kl": 0.08245849609375, "learning_rate": 3.197512759920962e-07, "loss": 8.23399459477514e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3125, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.75, "completions/mean_length": 62.04166841506958, "completions/min_length": 19.25, "epoch": 6.2144452717795975, "grad_norm": 0.003780881275547536, "kl": 0.084503173828125, "learning_rate": 3.194570406939585e-07, "loss": 8.46053590066731e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3126, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 71.05208587646484, "completions/min_length": 22.0, "epoch": 6.216430876147927, "grad_norm": 0.004401607326832969, "kl": 0.098480224609375, "learning_rate": 3.1916287727382925e-07, "loss": 9.85489969025366e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3127, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 71.27083539962769, "completions/min_length": 20.25, "epoch": 6.218416480516257, "grad_norm": 0.003568702121712919, "kl": 0.1007080078125, "learning_rate": 3.1886878584882086e-07, "loss": 0.00010078072955366224, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3128, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 64.01041841506958, "completions/min_length": 16.875, "epoch": 6.2204020848845865, "grad_norm": 0.0335251654545457, "kl": 0.1275634765625, "learning_rate": 3.1857476653601807e-07, "loss": 0.00012765468272846192, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3129, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.125, "completions/mean_length": 61.31250238418579, "completions/min_length": 22.625, "epoch": 6.222387689252916, "grad_norm": 0.003775006586297897, "kl": 0.081756591796875, "learning_rate": 3.1828081945247576e-07, "loss": 8.166694169631228e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3130, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.875, "completions/mean_length": 66.770836353302, "completions/min_length": 26.25, "epoch": 6.224373293621246, "grad_norm": 1.7876240371486267, "kl": 0.46697998046875, "learning_rate": 3.179869447152206e-07, "loss": -0.015796799212694168, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.30977265536785126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3131, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 63.13541841506958, "completions/min_length": 22.125, "epoch": 6.226358897989575, "grad_norm": 0.007546557296293014, "kl": 0.0853271484375, "learning_rate": 3.176931424412505e-07, "loss": 8.531093772035092e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3132, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.875, "completions/mean_length": 66.44791889190674, "completions/min_length": 29.375, "epoch": 6.228344502357905, "grad_norm": 1.149030383377944, "kl": 0.18267822265625, "learning_rate": 3.173994127475344e-07, "loss": 0.0008500342955812812, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3133, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.625, "completions/mean_length": 70.55208587646484, "completions/min_length": 24.75, "epoch": 6.230330106726234, "grad_norm": 1.023121400909042, "kl": 0.08843994140625, "learning_rate": 3.171057557510124e-07, "loss": -0.0018992971163243055, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3134, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 63.81250190734863, "completions/min_length": 23.75, "epoch": 6.232315711094564, "grad_norm": 0.004095120055570903, "kl": 0.092498779296875, "learning_rate": 3.168121715685953e-07, "loss": 9.244475950254127e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3135, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 65.36458492279053, "completions/min_length": 25.125, "epoch": 6.234301315462894, "grad_norm": 0.009297164469579195, "kl": 0.1077880859375, "learning_rate": 3.1651866031716565e-07, "loss": 0.00010797010327223688, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3136, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 71.65625190734863, "completions/min_length": 31.5, "epoch": 6.236286919831223, "grad_norm": 0.00600505478165713, "kl": 0.086944580078125, "learning_rate": 3.162252221135766e-07, "loss": 8.684572821948677e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3137, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.75, "completions/mean_length": 64.00000143051147, "completions/min_length": 22.5, "epoch": 6.238272524199553, "grad_norm": 1.5793968582535947, "kl": 0.108154296875, "learning_rate": 3.159318570746518e-07, "loss": -0.011972403153777122, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3138, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/mean_length": 57.156251430511475, "completions/min_length": 23.375, "epoch": 6.240258128567882, "grad_norm": 0.005971067252399188, "kl": 0.09368896484375, "learning_rate": 3.156385653171862e-07, "loss": 9.376247180625796e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3139, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 67.31250143051147, "completions/min_length": 24.0, "epoch": 6.242243732936212, "grad_norm": 0.005082649895943942, "kl": 0.0849609375, "learning_rate": 3.153453469579458e-07, "loss": 8.502871787641197e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3140, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 64.46875238418579, "completions/min_length": 19.75, "epoch": 6.244229337304542, "grad_norm": 0.004809543612420979, "kl": 0.09869384765625, "learning_rate": 3.150522021136668e-07, "loss": 9.877372940536588e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3141, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 65.13541793823242, "completions/min_length": 21.75, "epoch": 6.246214941672871, "grad_norm": 0.005891090358056983, "kl": 0.086822509765625, "learning_rate": 3.1475913090105646e-07, "loss": 8.68485658429563e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3142, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 69.25000238418579, "completions/min_length": 23.375, "epoch": 6.248200546041201, "grad_norm": 0.004289105541940191, "kl": 0.09405517578125, "learning_rate": 3.1446613343679285e-07, "loss": 9.39553719945252e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3143, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.625, "completions/mean_length": 65.57291793823242, "completions/min_length": 20.875, "epoch": 6.250186150409531, "grad_norm": 1.1534487605935733, "kl": 0.10833740234375, "learning_rate": 3.141732098375245e-07, "loss": 0.0024584531784057617, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3144, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.125, "completions/mean_length": 63.8541693687439, "completions/min_length": 26.125, "epoch": 6.25217175477786, "grad_norm": 0.006726557592677674, "kl": 0.092559814453125, "learning_rate": 3.138803602198704e-07, "loss": 9.255253098672256e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3145, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.375, "completions/mean_length": 73.19791889190674, "completions/min_length": 26.125, "epoch": 6.25415735914619, "grad_norm": 0.0037410068523653145, "kl": 0.093231201171875, "learning_rate": 3.1358758470042045e-07, "loss": 9.316120122093707e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3146, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.625, "completions/mean_length": 61.46875190734863, "completions/min_length": 23.25, "epoch": 6.256142963514519, "grad_norm": 0.005160159337807292, "kl": 0.073211669921875, "learning_rate": 3.1329488339573464e-07, "loss": 7.324671605601907e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3147, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.375, "completions/mean_length": 65.94791984558105, "completions/min_length": 21.5, "epoch": 6.258128567882849, "grad_norm": 0.004274866184939987, "kl": 0.0909423828125, "learning_rate": 3.130022564223436e-07, "loss": 9.085440979106352e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3148, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.0, "completions/mean_length": 65.01041841506958, "completions/min_length": 25.25, "epoch": 6.260114172251179, "grad_norm": 1.473045619044801, "kl": 0.083404541015625, "learning_rate": 3.127097038967483e-07, "loss": 8.341297507286072e-05, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3149, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 61.53125286102295, "completions/min_length": 28.625, "epoch": 6.262099776619508, "grad_norm": 0.016537414305204987, "kl": 0.08648681640625, "learning_rate": 3.124172259354206e-07, "loss": 8.64926478243433e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3150, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 62.45833444595337, "completions/min_length": 15.75, "epoch": 6.264085380987838, "grad_norm": 0.006032334743327232, "kl": 0.0772705078125, "learning_rate": 3.1212482265480177e-07, "loss": 7.726001786068082e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3151, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 76.48958492279053, "completions/min_length": 33.25, "epoch": 6.266070985356167, "grad_norm": 0.004036827760249734, "kl": 0.081878662109375, "learning_rate": 3.118324941713041e-07, "loss": 8.187924686353654e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3152, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 64.11458539962769, "completions/min_length": 22.0, "epoch": 6.268056589724497, "grad_norm": 0.005858453117168549, "kl": 0.084381103515625, "learning_rate": 3.1154024060130956e-07, "loss": 8.434015035163611e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3153, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.125, "completions/mean_length": 67.52083539962769, "completions/min_length": 26.0, "epoch": 6.270042194092827, "grad_norm": 0.003850758601447791, "kl": 0.101776123046875, "learning_rate": 3.1124806206117076e-07, "loss": 0.00010177911462960765, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3154, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.375, "completions/mean_length": 73.93750286102295, "completions/min_length": 25.25, "epoch": 6.272027798461156, "grad_norm": 0.793783841303115, "kl": 0.09716796875, "learning_rate": 3.1095595866721005e-07, "loss": -0.0011603438761085272, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3155, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.875, "completions/mean_length": 69.47916889190674, "completions/min_length": 29.5, "epoch": 6.274013402829486, "grad_norm": 0.005801707410151214, "kl": 0.08837890625, "learning_rate": 3.106639305357198e-07, "loss": 8.832634193822742e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3156, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.125, "completions/mean_length": 69.10416793823242, "completions/min_length": 21.125, "epoch": 6.275999007197816, "grad_norm": 0.003441453882323851, "kl": 0.075042724609375, "learning_rate": 3.103719777829633e-07, "loss": 7.503099914174527e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3157, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.625, "completions/mean_length": 70.81250286102295, "completions/min_length": 22.375, "epoch": 6.277984611566145, "grad_norm": 0.005261579875530533, "kl": 0.10870361328125, "learning_rate": 3.1008010052517263e-07, "loss": 0.00010861671034945175, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3158, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.125, "completions/mean_length": 64.79166841506958, "completions/min_length": 19.625, "epoch": 6.279970215934475, "grad_norm": 0.009419882186383302, "kl": 0.08233642578125, "learning_rate": 3.097882988785506e-07, "loss": 8.234622509917244e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3159, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 66.91666984558105, "completions/min_length": 24.25, "epoch": 6.281955820302804, "grad_norm": 0.004074886471241036, "kl": 0.09979248046875, "learning_rate": 3.094965729592697e-07, "loss": 9.988283272832632e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3160, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 64.37500190734863, "completions/min_length": 25.125, "epoch": 6.283941424671134, "grad_norm": 0.009121828539919732, "kl": 0.0975341796875, "learning_rate": 3.09204922883472e-07, "loss": 9.764295828063041e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3161, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 70.07291793823242, "completions/min_length": 29.875, "epoch": 6.285927029039464, "grad_norm": 0.006788249231196899, "kl": 0.0780029296875, "learning_rate": 3.089133487672698e-07, "loss": 7.801067840773612e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3162, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 73.71875286102295, "completions/min_length": 30.5, "epoch": 6.287912633407793, "grad_norm": 0.004614706270702021, "kl": 0.089111328125, "learning_rate": 3.0862185072674496e-07, "loss": 8.905110007617623e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3163, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 69.47916889190674, "completions/min_length": 23.875, "epoch": 6.289898237776123, "grad_norm": 0.005489914410934841, "kl": 0.07635498046875, "learning_rate": 3.0833042887794915e-07, "loss": 7.634704525116831e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3164, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 71.43750286102295, "completions/min_length": 17.875, "epoch": 6.291883842144452, "grad_norm": 0.015235608702993992, "kl": 0.0916748046875, "learning_rate": 3.080390833369036e-07, "loss": 9.160333866020665e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3165, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 68.17708396911621, "completions/min_length": 30.0, "epoch": 6.293869446512782, "grad_norm": 0.0037352158411413285, "kl": 0.0762939453125, "learning_rate": 3.0774781421959927e-07, "loss": 7.629330502822995e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3166, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 74.645836353302, "completions/min_length": 27.5, "epoch": 6.295855050881112, "grad_norm": 0.003958134861588333, "kl": 0.07147216796875, "learning_rate": 3.074566216419964e-07, "loss": 7.147947326302528e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3167, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 56.145835399627686, "completions/min_length": 20.0, "epoch": 6.297840655249441, "grad_norm": 0.003947371774646639, "kl": 0.074981689453125, "learning_rate": 3.07165505720025e-07, "loss": 7.496902981074527e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3168, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 72.45833444595337, "completions/min_length": 23.5, "epoch": 6.299826259617771, "grad_norm": 0.003002157482170515, "kl": 0.08294677734375, "learning_rate": 3.068744665695846e-07, "loss": 8.296724263345823e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3169, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.0, "completions/mean_length": 77.73958444595337, "completions/min_length": 23.0, "epoch": 6.301811863986101, "grad_norm": 0.005636080634464851, "kl": 0.091400146484375, "learning_rate": 3.0658350430654423e-07, "loss": 9.134468564298004e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3170, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.25, "completions/mean_length": 74.84375286102295, "completions/min_length": 27.875, "epoch": 6.30379746835443, "grad_norm": 0.004474416035252437, "kl": 0.08782958984375, "learning_rate": 3.0629261904674203e-07, "loss": 8.786410035099834e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3171, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 64.18750143051147, "completions/min_length": 20.125, "epoch": 6.30578307272276, "grad_norm": 0.0038355680455892317, "kl": 0.089324951171875, "learning_rate": 3.060018109059857e-07, "loss": 8.934485231293365e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3172, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.75, "completions/mean_length": 60.322917461395264, "completions/min_length": 23.75, "epoch": 6.307768677091089, "grad_norm": 0.00626091361326418, "kl": 0.090972900390625, "learning_rate": 3.057110800000522e-07, "loss": 9.085195779334754e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3173, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 66.78125190734863, "completions/min_length": 21.0, "epoch": 6.309754281459419, "grad_norm": 0.005614855591660481, "kl": 0.0816650390625, "learning_rate": 3.054204264446877e-07, "loss": 8.169886859832332e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3174, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 67.80208587646484, "completions/min_length": 22.625, "epoch": 6.311739885827749, "grad_norm": 1.3443157770217442, "kl": 0.0947265625, "learning_rate": 3.051298503556075e-07, "loss": 0.012410818599164486, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.833333333954215, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3175, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 65.90625238418579, "completions/min_length": 24.625, "epoch": 6.313725490196078, "grad_norm": 1.5680957958398518, "kl": 0.10076904296875, "learning_rate": 3.04839351848496e-07, "loss": -0.0017739187460392714, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.05653337761759758, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3176, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.375, "completions/mean_length": 63.520835876464844, "completions/min_length": 20.5, "epoch": 6.315711094564408, "grad_norm": 0.003410419424258827, "kl": 0.076080322265625, "learning_rate": 3.0454893103900735e-07, "loss": 7.616783841513097e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3177, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.125, "completions/mean_length": 74.62500190734863, "completions/min_length": 29.625, "epoch": 6.317696698932737, "grad_norm": 0.007302252948029951, "kl": 0.081024169921875, "learning_rate": 3.042585880427639e-07, "loss": 8.102280116872862e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3178, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 61.16666841506958, "completions/min_length": 19.625, "epoch": 6.319682303301067, "grad_norm": 0.003403570888287035, "kl": 0.08074951171875, "learning_rate": 3.039683229753575e-07, "loss": 8.084949513431638e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3179, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 65.15625047683716, "completions/min_length": 19.75, "epoch": 6.321667907669397, "grad_norm": 2.1251951247506966, "kl": 0.081329345703125, "learning_rate": 3.0367813595234883e-07, "loss": 0.014659504406154156, "memory(GiB)": 94.21, "reward": 1.7500000149011612, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.2592903971672058, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3180, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 68.03125190734863, "completions/min_length": 24.125, "epoch": 6.323653512037726, "grad_norm": 0.004987353934230566, "kl": 0.07977294921875, "learning_rate": 3.033880270892676e-07, "loss": 7.975176413310692e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3181, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 65.39583539962769, "completions/min_length": 25.5, "epoch": 6.325639116406056, "grad_norm": 0.003433414165108702, "kl": 0.072967529296875, "learning_rate": 3.0309799650161227e-07, "loss": 7.288772030733526e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3182, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.25, "completions/mean_length": 67.39583587646484, "completions/min_length": 18.875, "epoch": 6.327624720774386, "grad_norm": 0.004526580003160603, "kl": 0.09686279296875, "learning_rate": 3.0280804430485017e-07, "loss": 9.676550689619035e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3183, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 65.78125190734863, "completions/min_length": 22.75, "epoch": 6.329610325142715, "grad_norm": 0.0033851830381136086, "kl": 0.06414794921875, "learning_rate": 3.0251817061441776e-07, "loss": 6.408228364307433e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3184, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.625, "completions/mean_length": 55.895835399627686, "completions/min_length": 18.625, "epoch": 6.331595929511045, "grad_norm": 0.006131008702751606, "kl": 0.077880859375, "learning_rate": 3.0222837554571967e-07, "loss": 7.786209607729688e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3185, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.625, "completions/mean_length": 75.28125190734863, "completions/min_length": 26.375, "epoch": 6.333581533879374, "grad_norm": 0.7290514364235661, "kl": 0.1021728515625, "learning_rate": 3.0193865921412963e-07, "loss": 0.01342483889311552, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3186, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.375, "completions/mean_length": 64.42708444595337, "completions/min_length": 20.75, "epoch": 6.335567138247704, "grad_norm": 0.00534200322926987, "kl": 0.0909423828125, "learning_rate": 3.0164902173498986e-07, "loss": 9.094739652937278e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3187, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 67.45833444595337, "completions/min_length": 27.75, "epoch": 6.337552742616034, "grad_norm": 0.004052765995189815, "kl": 0.076995849609375, "learning_rate": 3.0135946322361114e-07, "loss": 7.689397898502648e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3188, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.875, "completions/mean_length": 58.239585399627686, "completions/min_length": 22.75, "epoch": 6.339538346984363, "grad_norm": 0.007492461380118997, "kl": 0.093170166015625, "learning_rate": 3.0106998379527296e-07, "loss": 9.316718205809593e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3189, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 62.71875238418579, "completions/min_length": 27.0, "epoch": 6.341523951352693, "grad_norm": 0.006452160772387257, "kl": 0.086334228515625, "learning_rate": 3.0078058356522325e-07, "loss": 8.632532262708992e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3190, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.75, "completions/mean_length": 65.06250238418579, "completions/min_length": 22.125, "epoch": 6.343509555721022, "grad_norm": 1.2068679914908, "kl": 0.092987060546875, "learning_rate": 3.0049126264867846e-07, "loss": 0.001519040553830564, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3191, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.875, "completions/mean_length": 76.177086353302, "completions/min_length": 25.875, "epoch": 6.345495160089352, "grad_norm": 0.007136411488001945, "kl": 0.09564208984375, "learning_rate": 3.0020202116082347e-07, "loss": 9.564954962115735e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3192, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 61.91666841506958, "completions/min_length": 19.125, "epoch": 6.347480764457682, "grad_norm": 1.3304155625428375, "kl": 0.097320556640625, "learning_rate": 2.999128592168114e-07, "loss": 0.006263755261898041, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3193, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 66.34375143051147, "completions/min_length": 25.375, "epoch": 6.349466368826011, "grad_norm": 0.0112759389636487, "kl": 0.096405029296875, "learning_rate": 2.99623776931764e-07, "loss": 9.63092315942049e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3194, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 67.67708539962769, "completions/min_length": 23.875, "epoch": 6.351451973194341, "grad_norm": 0.005925655658320405, "kl": 0.0809326171875, "learning_rate": 2.9933477442077084e-07, "loss": 8.095103839877993e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3195, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 65.17708444595337, "completions/min_length": 26.25, "epoch": 6.353437577562671, "grad_norm": 0.004298581437508858, "kl": 0.0694580078125, "learning_rate": 2.990458517988901e-07, "loss": 6.947157817194238e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3196, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 66.36458444595337, "completions/min_length": 23.75, "epoch": 6.355423181931, "grad_norm": 0.0031663668112983856, "kl": 0.07598876953125, "learning_rate": 2.9875700918114786e-07, "loss": 7.596309296786785e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3197, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 74.50000286102295, "completions/min_length": 37.25, "epoch": 6.35740878629933, "grad_norm": 1.2698018778047133, "kl": 0.08929443359375, "learning_rate": 2.9846824668253887e-07, "loss": 0.0009202770888805389, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3198, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 57.73958444595337, "completions/min_length": 23.5, "epoch": 6.359394390667659, "grad_norm": 1.9044026818443813, "kl": 0.09881591796875, "learning_rate": 2.981795644180255e-07, "loss": 0.007204392924904823, "memory(GiB)": 94.21, "reward": 1.9062500149011612, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.9062500074505806, "rewards/CineAccuracyORM/std": 0.15001969039440155, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3199, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 66.93750238418579, "completions/min_length": 23.375, "epoch": 6.361379995035989, "grad_norm": 0.0040278264469864634, "kl": 0.089141845703125, "learning_rate": 2.978909625025383e-07, "loss": 8.910940960049629e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3200, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.375, "completions/mean_length": 65.3854193687439, "completions/min_length": 22.0, "epoch": 6.363365599404319, "grad_norm": 0.006368258610943689, "kl": 0.09259033203125, "learning_rate": 2.9760244105097585e-07, "loss": 9.261471859645098e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3201, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 61.60416889190674, "completions/min_length": 23.5, "epoch": 6.365351203772648, "grad_norm": 0.008213145947016, "kl": 0.10150146484375, "learning_rate": 2.9731400017820484e-07, "loss": 0.00010158667282667011, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3202, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 64.3229193687439, "completions/min_length": 19.875, "epoch": 6.367336808140978, "grad_norm": 0.005862015906315552, "kl": 0.08489990234375, "learning_rate": 2.970256399990596e-07, "loss": 8.489063475281e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3203, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.875, "completions/mean_length": 66.53125238418579, "completions/min_length": 22.75, "epoch": 6.369322412509307, "grad_norm": 0.00636125243233647, "kl": 0.0858154296875, "learning_rate": 2.9673736062834233e-07, "loss": 8.585414616391063e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3204, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.5, "completions/mean_length": 61.18750190734863, "completions/min_length": 27.875, "epoch": 6.371308016877637, "grad_norm": 0.9202924284098827, "kl": 0.2208251953125, "learning_rate": 2.964491621808235e-07, "loss": -0.009702635928988457, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3205, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 68.16666889190674, "completions/min_length": 24.375, "epoch": 6.373293621245967, "grad_norm": 0.006685737713924595, "kl": 0.11236572265625, "learning_rate": 2.96161044771241e-07, "loss": 0.00011227602226426825, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3206, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.375, "completions/mean_length": 59.81250238418579, "completions/min_length": 20.875, "epoch": 6.375279225614296, "grad_norm": 0.007893269074993735, "kl": 0.09259033203125, "learning_rate": 2.9587300851430053e-07, "loss": 9.266478446079418e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3207, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 64.14583539962769, "completions/min_length": 25.0, "epoch": 6.377264829982626, "grad_norm": 0.005524452127413556, "kl": 0.084869384765625, "learning_rate": 2.955850535246753e-07, "loss": 8.495587098877877e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3208, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 66.31250190734863, "completions/min_length": 22.25, "epoch": 6.379250434350956, "grad_norm": 0.005304418706715149, "kl": 0.09014892578125, "learning_rate": 2.9529717991700654e-07, "loss": 9.00080194696784e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3209, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.5, "completions/mean_length": 54.312500953674316, "completions/min_length": 25.0, "epoch": 6.381236038719285, "grad_norm": 1.6934375785685654, "kl": 0.083740234375, "learning_rate": 2.950093878059027e-07, "loss": 0.001064905314706266, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3210, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.875, "completions/mean_length": 65.84375143051147, "completions/min_length": 28.125, "epoch": 6.383221643087615, "grad_norm": 0.00813484045008924, "kl": 0.09820556640625, "learning_rate": 2.947216773059401e-07, "loss": 9.817527461564168e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3211, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 61.98958444595337, "completions/min_length": 25.5, "epoch": 6.385207247455944, "grad_norm": 0.006731930220970469, "kl": 0.108154296875, "learning_rate": 2.944340485316624e-07, "loss": 0.000108105901745148, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3212, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 61.916666984558105, "completions/min_length": 24.375, "epoch": 6.387192851824274, "grad_norm": 0.00810795712192299, "kl": 0.083831787109375, "learning_rate": 2.9414650159758083e-07, "loss": 8.381686348002404e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3213, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.375, "completions/mean_length": 60.427085399627686, "completions/min_length": 20.125, "epoch": 6.389178456192604, "grad_norm": 0.006882810262558977, "kl": 0.1248779296875, "learning_rate": 2.93859036618174e-07, "loss": 0.00012484054605010897, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3214, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 62.645835399627686, "completions/min_length": 24.125, "epoch": 6.391164060560933, "grad_norm": 0.0048933111916164085, "kl": 0.089141845703125, "learning_rate": 2.9357165370788763e-07, "loss": 8.914059435483068e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3215, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 63.218750953674316, "completions/min_length": 27.375, "epoch": 6.393149664929263, "grad_norm": 0.007082944969145969, "kl": 0.11102294921875, "learning_rate": 2.932843529811352e-07, "loss": 0.00011099546100012958, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3216, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 67.46875286102295, "completions/min_length": 27.125, "epoch": 6.395135269297592, "grad_norm": 0.006149838922129725, "kl": 0.0863037109375, "learning_rate": 2.9299713455229706e-07, "loss": 8.632345998194069e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3217, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 68.36458539962769, "completions/min_length": 21.75, "epoch": 6.397120873665922, "grad_norm": 0.007126567103740635, "kl": 0.0880126953125, "learning_rate": 2.9270999853572115e-07, "loss": 8.799279748927802e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3218, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 64.82291746139526, "completions/min_length": 23.0, "epoch": 6.399106478034252, "grad_norm": 0.0063088092896884706, "kl": 0.1097412109375, "learning_rate": 2.9242294504572253e-07, "loss": 0.0001097028361982666, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3219, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.375, "completions/mean_length": 59.14583492279053, "completions/min_length": 25.5, "epoch": 6.401092082402581, "grad_norm": 0.007119185629899003, "kl": 0.07855224609375, "learning_rate": 2.9213597419658314e-07, "loss": 7.84529693191871e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3220, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 71.6666693687439, "completions/min_length": 26.25, "epoch": 6.403077686770911, "grad_norm": 0.005209946067006763, "kl": 0.095458984375, "learning_rate": 2.918490861025523e-07, "loss": 9.556749137118459e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3221, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.375, "completions/mean_length": 67.04166889190674, "completions/min_length": 23.875, "epoch": 6.405063291139241, "grad_norm": 0.007251241966328845, "kl": 0.08367919921875, "learning_rate": 2.9156228087784653e-07, "loss": 8.368380804313347e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3222, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 54.20833492279053, "completions/min_length": 16.625, "epoch": 6.40704889550757, "grad_norm": 0.00458563815799741, "kl": 0.09002685546875, "learning_rate": 2.912755586366485e-07, "loss": 9.006025356939062e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3223, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.5, "completions/mean_length": 68.020836353302, "completions/min_length": 24.125, "epoch": 6.4090344998759, "grad_norm": 0.0045176839309874805, "kl": 0.084991455078125, "learning_rate": 2.909889194931091e-07, "loss": 8.502715354552492e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3224, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.625, "completions/mean_length": 58.33333444595337, "completions/min_length": 28.0, "epoch": 6.411020104244229, "grad_norm": 0.9724183104346384, "kl": 0.089080810546875, "learning_rate": 2.90702363561345e-07, "loss": 0.006806063931435347, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3225, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.625, "completions/mean_length": 60.083335876464844, "completions/min_length": 24.5, "epoch": 6.413005708612559, "grad_norm": 0.006325012243000106, "kl": 0.08892822265625, "learning_rate": 2.904158909554405e-07, "loss": 8.890218305168673e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3226, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.0, "completions/mean_length": 69.28125238418579, "completions/min_length": 24.375, "epoch": 6.414991312980889, "grad_norm": 0.003616058769709026, "kl": 0.106201171875, "learning_rate": 2.901295017894466e-07, "loss": 0.000106100516859442, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3227, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 65.02083539962769, "completions/min_length": 22.375, "epoch": 6.416976917349218, "grad_norm": 0.007624530511207928, "kl": 0.09033203125, "learning_rate": 2.8984319617738083e-07, "loss": 9.03638283489272e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3228, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 70.2291693687439, "completions/min_length": 25.25, "epoch": 6.418962521717548, "grad_norm": 0.005822088922609145, "kl": 0.108123779296875, "learning_rate": 2.8955697423322723e-07, "loss": 0.00010817102156579494, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3229, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 68.06250190734863, "completions/min_length": 26.0, "epoch": 6.420948126085877, "grad_norm": 0.003536410411496393, "kl": 0.095703125, "learning_rate": 2.892708360709374e-07, "loss": 9.572417184244841e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3230, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 65.38541984558105, "completions/min_length": 26.625, "epoch": 6.422933730454207, "grad_norm": 0.005904712799171937, "kl": 0.103790283203125, "learning_rate": 2.8898478180442853e-07, "loss": 0.00010377258149674162, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3231, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.875, "completions/mean_length": 67.53125238418579, "completions/min_length": 23.75, "epoch": 6.424919334822537, "grad_norm": 0.004280373173076877, "kl": 0.0972900390625, "learning_rate": 2.8869881154758527e-07, "loss": 9.732063335832208e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3232, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 62.18750238418579, "completions/min_length": 22.75, "epoch": 6.426904939190866, "grad_norm": 0.00457266950693342, "kl": 0.08758544921875, "learning_rate": 2.8841292541425856e-07, "loss": 8.750433335080743e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3233, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 68.23958587646484, "completions/min_length": 23.125, "epoch": 6.428890543559196, "grad_norm": 0.004023876628912124, "kl": 0.085906982421875, "learning_rate": 2.881271235182655e-07, "loss": 8.594644896220416e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3234, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.625, "completions/mean_length": 59.60416889190674, "completions/min_length": 27.125, "epoch": 6.430876147927526, "grad_norm": 0.003938395012429837, "kl": 0.08551025390625, "learning_rate": 2.8784140597339023e-07, "loss": 8.552710642106831e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3235, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.5, "completions/mean_length": 54.687501430511475, "completions/min_length": 19.0, "epoch": 6.432861752295855, "grad_norm": 0.006045316535094096, "kl": 0.07733154296875, "learning_rate": 2.875557728933826e-07, "loss": 7.739596185274422e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3236, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.5, "completions/mean_length": 63.375001430511475, "completions/min_length": 21.25, "epoch": 6.434847356664185, "grad_norm": 0.007648025007786075, "kl": 0.0816650390625, "learning_rate": 2.872702243919598e-07, "loss": 8.164472819771618e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3237, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/mean_length": 61.427085876464844, "completions/min_length": 26.375, "epoch": 6.436832961032514, "grad_norm": 0.014048365116325687, "kl": 0.08197021484375, "learning_rate": 2.869847605828042e-07, "loss": 8.192491804948077e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3238, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 59.770835399627686, "completions/min_length": 20.625, "epoch": 6.438818565400844, "grad_norm": 0.007596259507577925, "kl": 0.10968017578125, "learning_rate": 2.866993815795653e-07, "loss": 0.00010956473124679178, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3239, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.875, "completions/mean_length": 66.35416889190674, "completions/min_length": 26.5, "epoch": 6.440804169769174, "grad_norm": 0.004828051474948282, "kl": 0.0782470703125, "learning_rate": 2.8641408749585884e-07, "loss": 7.840296166250482e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3240, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 60.52083492279053, "completions/min_length": 27.625, "epoch": 6.442789774137503, "grad_norm": 0.005629559002850129, "kl": 0.07391357421875, "learning_rate": 2.8612887844526615e-07, "loss": 7.402936171274632e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3241, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 67.14583492279053, "completions/min_length": 24.125, "epoch": 6.444775378505833, "grad_norm": 0.0039585856233579336, "kl": 0.09979248046875, "learning_rate": 2.8584375454133503e-07, "loss": 9.972216503228992e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3242, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 68.18750143051147, "completions/min_length": 22.625, "epoch": 6.446760982874162, "grad_norm": 0.005487321365180753, "kl": 0.09295654296875, "learning_rate": 2.855587158975796e-07, "loss": 9.301453246735036e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3243, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 57.97916841506958, "completions/min_length": 19.875, "epoch": 6.448746587242492, "grad_norm": 0.005562677396413367, "kl": 0.071624755859375, "learning_rate": 2.852737626274797e-07, "loss": 7.15503265382722e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3244, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 64.71875095367432, "completions/min_length": 19.5, "epoch": 6.450732191610822, "grad_norm": 0.004315901498022904, "kl": 0.08074951171875, "learning_rate": 2.849888948444812e-07, "loss": 8.06492316769436e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3245, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 67.78125190734863, "completions/min_length": 24.625, "epoch": 6.452717795979151, "grad_norm": 0.004295211493529179, "kl": 0.0836181640625, "learning_rate": 2.847041126619964e-07, "loss": 8.36151884868741e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3246, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.625, "completions/mean_length": 62.770835399627686, "completions/min_length": 26.125, "epoch": 6.454703400347481, "grad_norm": 0.003492843423629508, "kl": 0.087158203125, "learning_rate": 2.844194161934028e-07, "loss": 8.721269841771573e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3247, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 68.71875333786011, "completions/min_length": 31.5, "epoch": 6.456689004715811, "grad_norm": 0.0046016102960333245, "kl": 0.1064453125, "learning_rate": 2.841348055520445e-07, "loss": 0.00010640530672390014, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3248, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 56.51041841506958, "completions/min_length": 15.0, "epoch": 6.45867460908414, "grad_norm": 0.0033017773213425985, "kl": 0.07391357421875, "learning_rate": 2.8385028085123084e-07, "loss": 7.382655167020857e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3249, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 66.73958587646484, "completions/min_length": 25.25, "epoch": 6.46066021345247, "grad_norm": 0.007200043131407117, "kl": 0.087799072265625, "learning_rate": 2.8356584220423706e-07, "loss": 8.77869242685847e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3250, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 66.81250238418579, "completions/min_length": 25.125, "epoch": 6.462645817820799, "grad_norm": 0.003764279457579972, "kl": 0.0916748046875, "learning_rate": 2.8328148972430463e-07, "loss": 9.156690066447482e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3251, "train_speed(iter/s)": 0.022647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 64.89583444595337, "completions/min_length": 24.125, "epoch": 6.464631422189129, "grad_norm": 0.011476157212154402, "kl": 0.10333251953125, "learning_rate": 2.8299722352463994e-07, "loss": 0.00010348884097766131, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3252, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.625, "completions/mean_length": 66.72916793823242, "completions/min_length": 21.25, "epoch": 6.466617026557459, "grad_norm": 0.003595420393572264, "kl": 0.100616455078125, "learning_rate": 2.8271304371841565e-07, "loss": 0.0001006111124297604, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3253, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.0, "completions/mean_length": 53.354166984558105, "completions/min_length": 16.375, "epoch": 6.468602630925788, "grad_norm": 0.004320378633937529, "kl": 0.10955810546875, "learning_rate": 2.8242895041877004e-07, "loss": 0.00010947669215966016, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3254, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 62.46875190734863, "completions/min_length": 21.0, "epoch": 6.470588235294118, "grad_norm": 0.004914148393736551, "kl": 0.09356689453125, "learning_rate": 2.821449437388062e-07, "loss": 9.368818427901715e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3255, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.875, "completions/mean_length": 64.03125143051147, "completions/min_length": 26.875, "epoch": 6.472573839662447, "grad_norm": 0.005483349875600164, "kl": 0.0869140625, "learning_rate": 2.8186102379159376e-07, "loss": 8.685316424816847e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3256, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.5, "completions/mean_length": 60.83333444595337, "completions/min_length": 25.875, "epoch": 6.474559444030777, "grad_norm": 0.003428552988963893, "kl": 0.07391357421875, "learning_rate": 2.8157719069016703e-07, "loss": 7.392094994429499e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3257, "train_speed(iter/s)": 0.022649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 63.562501430511475, "completions/min_length": 21.0, "epoch": 6.476545048399107, "grad_norm": 0.0043699131111029485, "kl": 0.081634521484375, "learning_rate": 2.81293444547526e-07, "loss": 8.163400343619287e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3258, "train_speed(iter/s)": 0.022649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.125, "completions/mean_length": 58.54166793823242, "completions/min_length": 19.25, "epoch": 6.478530652767436, "grad_norm": 0.0088400725748611, "kl": 0.0804443359375, "learning_rate": 2.810097854766361e-07, "loss": 8.042525587370619e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3259, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 61.00000238418579, "completions/min_length": 25.125, "epoch": 6.480516257135766, "grad_norm": 0.003855738016349987, "kl": 0.0684814453125, "learning_rate": 2.8072621359042837e-07, "loss": 6.852075603092089e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3260, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 70.75000143051147, "completions/min_length": 26.25, "epoch": 6.482501861504096, "grad_norm": 1.1249144863329308, "kl": 0.08453369140625, "learning_rate": 2.8044272900179835e-07, "loss": 0.012785809114575386, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3261, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.25, "completions/mean_length": 56.91666841506958, "completions/min_length": 25.5, "epoch": 6.484487465872425, "grad_norm": 1.853775272476962, "kl": 0.103271484375, "learning_rate": 2.801593318236077e-07, "loss": 0.0030850651673972607, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3262, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.875, "completions/mean_length": 63.07291841506958, "completions/min_length": 14.75, "epoch": 6.486473070240755, "grad_norm": 0.016466420466611575, "kl": 0.0888671875, "learning_rate": 2.7987602216868255e-07, "loss": 8.875381899997592e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3263, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.125, "completions/mean_length": 68.770836353302, "completions/min_length": 22.25, "epoch": 6.488458674609084, "grad_norm": 0.005128073103485684, "kl": 0.1048583984375, "learning_rate": 2.79592800149815e-07, "loss": 0.00010473666770849377, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3264, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.5, "completions/mean_length": 59.25000238418579, "completions/min_length": 22.375, "epoch": 6.490444278977414, "grad_norm": 0.08252447953572653, "kl": 0.21771240234375, "learning_rate": 2.7930966587976143e-07, "loss": 0.00021706405095756054, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3265, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 67.020836353302, "completions/min_length": 25.25, "epoch": 6.492429883345744, "grad_norm": 0.0030416034477067193, "kl": 0.0877685546875, "learning_rate": 2.7902661947124333e-07, "loss": 8.780603820923716e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3266, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.875, "completions/mean_length": 65.71875238418579, "completions/min_length": 26.125, "epoch": 6.494415487714073, "grad_norm": 0.0034813193820282407, "kl": 0.07928466796875, "learning_rate": 2.787436610369483e-07, "loss": 7.926626858534291e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3267, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 63.06250190734863, "completions/min_length": 24.875, "epoch": 6.496401092082403, "grad_norm": 0.010940672892437874, "kl": 0.084136962890625, "learning_rate": 2.784607906895278e-07, "loss": 8.40155262267217e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3268, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 59.79166793823242, "completions/min_length": 19.375, "epoch": 6.498386696450732, "grad_norm": 0.0033375829341551135, "kl": 0.083587646484375, "learning_rate": 2.7817800854159815e-07, "loss": 8.355158934136853e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3269, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.875, "completions/mean_length": 58.10416793823242, "completions/min_length": 23.5, "epoch": 6.500372300819062, "grad_norm": 2.0072254655143227, "kl": 0.069976806640625, "learning_rate": 2.778953147057416e-07, "loss": 7.003545761108398e-05, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393530294299126, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3270, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.25, "completions/mean_length": 59.91666841506958, "completions/min_length": 23.5, "epoch": 6.502357905187392, "grad_norm": 0.004254714512047353, "kl": 0.074066162109375, "learning_rate": 2.776127092945041e-07, "loss": 7.399563764920458e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3271, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.375, "completions/mean_length": 58.60416889190674, "completions/min_length": 24.25, "epoch": 6.504343509555721, "grad_norm": 2.4980405785880406, "kl": 0.0784912109375, "learning_rate": 2.7733019242039735e-07, "loss": 7.837265729904175e-05, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.770833333954215, "rewards/CineAccuracyORM/std": 0.17921442165970802, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3272, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 70.5729193687439, "completions/min_length": 25.25, "epoch": 6.506329113924051, "grad_norm": 0.004838239962596815, "kl": 0.0928955078125, "learning_rate": 2.770477641958968e-07, "loss": 9.30547685129568e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3273, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.875, "completions/mean_length": 60.302085399627686, "completions/min_length": 15.0, "epoch": 6.508314718292381, "grad_norm": 0.004417981794306718, "kl": 0.0765380859375, "learning_rate": 2.767654247334436e-07, "loss": 7.65023214626126e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3274, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.5, "completions/mean_length": 68.36458492279053, "completions/min_length": 21.625, "epoch": 6.51030032266071, "grad_norm": 0.9052182285975585, "kl": 0.093994140625, "learning_rate": 2.7648317414544315e-07, "loss": 0.0064375679939985275, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3275, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.375, "completions/mean_length": 64.27083444595337, "completions/min_length": 26.125, "epoch": 6.51228592702904, "grad_norm": 0.004279239580923891, "kl": 0.078399658203125, "learning_rate": 2.762010125442651e-07, "loss": 7.841864135116339e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3276, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.25, "completions/mean_length": 55.447916984558105, "completions/min_length": 20.0, "epoch": 6.514271531397369, "grad_norm": 0.008940178048317862, "kl": 0.105072021484375, "learning_rate": 2.7591894004224436e-07, "loss": 0.0001050223654601723, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3277, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 59.67708492279053, "completions/min_length": 20.125, "epoch": 6.516257135765699, "grad_norm": 0.008766169059350301, "kl": 0.0819091796875, "learning_rate": 2.756369567516799e-07, "loss": 8.19417618913576e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3278, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 59.54166841506958, "completions/min_length": 22.375, "epoch": 6.518242740134029, "grad_norm": 0.003470903820932684, "kl": 0.080810546875, "learning_rate": 2.753550627848349e-07, "loss": 8.078533574007452e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3279, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.875, "completions/mean_length": 65.11458492279053, "completions/min_length": 21.375, "epoch": 6.520228344502358, "grad_norm": 0.006296316365484303, "kl": 0.093658447265625, "learning_rate": 2.7507325825393783e-07, "loss": 9.377703827340156e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3280, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 51.83333396911621, "completions/min_length": 19.0, "epoch": 6.522213948870688, "grad_norm": 0.008882777556686077, "kl": 0.07305908203125, "learning_rate": 2.7479154327118117e-07, "loss": 7.30404062778689e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3281, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.875, "completions/mean_length": 63.56250238418579, "completions/min_length": 26.0, "epoch": 6.524199553239017, "grad_norm": 0.0064920878592217976, "kl": 0.094879150390625, "learning_rate": 2.7450991794872127e-07, "loss": 9.492039680480957e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3282, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.625, "completions/mean_length": 67.36458539962769, "completions/min_length": 22.25, "epoch": 6.526185157607347, "grad_norm": 0.00742882390242665, "kl": 0.10125732421875, "learning_rate": 2.742283823986795e-07, "loss": 0.00010122793901246041, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3283, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.875, "completions/mean_length": 70.04166984558105, "completions/min_length": 23.25, "epoch": 6.528170761975677, "grad_norm": 0.0037649788742524547, "kl": 0.082366943359375, "learning_rate": 2.7394693673314094e-07, "loss": 8.222565520554781e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3284, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.375, "completions/mean_length": 60.46875047683716, "completions/min_length": 22.75, "epoch": 6.530156366344006, "grad_norm": 0.003939096813068536, "kl": 0.100494384765625, "learning_rate": 2.736655810641555e-07, "loss": 0.0001004367513814941, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3285, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 66.92708492279053, "completions/min_length": 22.0, "epoch": 6.532141970712336, "grad_norm": 1.5327424197118356, "kl": 0.27783203125, "learning_rate": 2.733843155037366e-07, "loss": -0.003415369428694248, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3286, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 61.8854193687439, "completions/min_length": 14.625, "epoch": 6.5341275750806656, "grad_norm": 0.00946429346113309, "kl": 0.10809326171875, "learning_rate": 2.7310314016386163e-07, "loss": 0.00010801161988638341, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3287, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 67.29166746139526, "completions/min_length": 22.0, "epoch": 6.536113179448995, "grad_norm": 0.8902188756007005, "kl": 0.091064453125, "learning_rate": 2.728220551564734e-07, "loss": -0.015449434518814087, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3288, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 63.031251430511475, "completions/min_length": 17.875, "epoch": 6.538098783817325, "grad_norm": 0.007049569568722197, "kl": 0.0780029296875, "learning_rate": 2.7254106059347746e-07, "loss": 7.805964560247958e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3289, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.5, "completions/mean_length": 61.10416841506958, "completions/min_length": 20.0, "epoch": 6.540084388185654, "grad_norm": 1.1789705562562038, "kl": 0.0927734375, "learning_rate": 2.722601565867435e-07, "loss": 0.0015071779489517212, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3290, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 69.81250238418579, "completions/min_length": 23.625, "epoch": 6.542069992553984, "grad_norm": 2.562191279643471, "kl": 0.10760498046875, "learning_rate": 2.719793432481058e-07, "loss": -0.00011565785098355263, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.057790376245975494, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3291, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 59.697917461395264, "completions/min_length": 22.875, "epoch": 6.5440555969223135, "grad_norm": 1.4417215365683687, "kl": 0.094573974609375, "learning_rate": 2.716986206893618e-07, "loss": 0.00494935130700469, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3292, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 52.02083396911621, "completions/min_length": 17.75, "epoch": 6.546041201290643, "grad_norm": 0.007345456597503264, "kl": 0.10198974609375, "learning_rate": 2.714179890222734e-07, "loss": 0.00010200271208304912, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3293, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 69.58333492279053, "completions/min_length": 20.5, "epoch": 6.5480268056589725, "grad_norm": 1.4443127585743225, "kl": 0.12353515625, "learning_rate": 2.711374483585658e-07, "loss": -0.00837962981313467, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3294, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 70.57291793823242, "completions/min_length": 24.875, "epoch": 6.550012410027302, "grad_norm": 1.4237256159871454, "kl": 0.081573486328125, "learning_rate": 2.7085699880992845e-07, "loss": 0.007895395159721375, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3295, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 57.56250238418579, "completions/min_length": 19.75, "epoch": 6.551998014395632, "grad_norm": 0.006446528284835301, "kl": 0.087432861328125, "learning_rate": 2.7057664048801445e-07, "loss": 8.73694516485557e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3296, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.125, "completions/mean_length": 60.94791889190674, "completions/min_length": 20.875, "epoch": 6.5539836187639615, "grad_norm": 0.004534005863694354, "kl": 0.07562255859375, "learning_rate": 2.7029637350444026e-07, "loss": 7.55086075514555e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3297, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/mean_length": 56.93750190734863, "completions/min_length": 19.5, "epoch": 6.555969223132291, "grad_norm": 0.006758925072616362, "kl": 0.073638916015625, "learning_rate": 2.700161979707859e-07, "loss": 7.370335515588522e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3298, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 59.80208444595337, "completions/min_length": 23.125, "epoch": 6.5579548275006205, "grad_norm": 0.005869538046115024, "kl": 0.112152099609375, "learning_rate": 2.6973611399859564e-07, "loss": 0.00011218262807233259, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3299, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 57.75000286102295, "completions/min_length": 18.625, "epoch": 6.5599404318689505, "grad_norm": 0.00731370251179893, "kl": 0.08160400390625, "learning_rate": 2.694561216993766e-07, "loss": 8.1530241004657e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3300, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 53.28125190734863, "completions/min_length": 20.5, "epoch": 6.5619260362372795, "grad_norm": 1.879212668413089, "kl": 0.0811767578125, "learning_rate": 2.691762211845997e-07, "loss": -0.0052892486564815044, "memory(GiB)": 94.21, "reward": 1.9479166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3301, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 68.54166793823242, "completions/min_length": 23.125, "epoch": 6.5639116406056095, "grad_norm": 0.006342730881970545, "kl": 0.093017578125, "learning_rate": 2.6889641256569973e-07, "loss": 9.30671812966466e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3302, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 57.97916841506958, "completions/min_length": 18.875, "epoch": 6.565897244973939, "grad_norm": 0.005479099147503255, "kl": 0.09716796875, "learning_rate": 2.686166959540739e-07, "loss": 9.717326611280441e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3303, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 69.11458444595337, "completions/min_length": 24.75, "epoch": 6.5678828493422685, "grad_norm": 0.004939207796209544, "kl": 0.075286865234375, "learning_rate": 2.6833707146108386e-07, "loss": 7.521603401983157e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3304, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.0, "completions/mean_length": 67.20833539962769, "completions/min_length": 23.375, "epoch": 6.5698684537105985, "grad_norm": 0.2797970189304334, "kl": 0.197479248046875, "learning_rate": 2.68057539198054e-07, "loss": 0.00019732053624466062, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3305, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.875, "completions/mean_length": 68.28125238418579, "completions/min_length": 23.25, "epoch": 6.5718540580789275, "grad_norm": 0.00428974557960698, "kl": 0.081756591796875, "learning_rate": 2.677780992762716e-07, "loss": 8.18474218249321e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3306, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.875, "completions/mean_length": 69.63541889190674, "completions/min_length": 25.875, "epoch": 6.5738396624472575, "grad_norm": 0.00429191783697208, "kl": 0.07598876953125, "learning_rate": 2.674987518069883e-07, "loss": 7.596724026370794e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3307, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 60.42708492279053, "completions/min_length": 23.25, "epoch": 6.5758252668155865, "grad_norm": 0.003882740737015487, "kl": 0.10540771484375, "learning_rate": 2.6721949690141776e-07, "loss": 0.00010550117440288886, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3308, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 65.63541889190674, "completions/min_length": 27.125, "epoch": 6.5778108711839165, "grad_norm": 0.003647339858864909, "kl": 0.080902099609375, "learning_rate": 2.669403346707376e-07, "loss": 8.098404214251786e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3309, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 61.28125190734863, "completions/min_length": 24.625, "epoch": 6.5797964755522464, "grad_norm": 0.005204546559226765, "kl": 0.088958740234375, "learning_rate": 2.666612652260885e-07, "loss": 8.892134064808488e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3310, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 59.02083396911621, "completions/min_length": 24.125, "epoch": 6.5817820799205755, "grad_norm": 1.922672390197199, "kl": 0.0869140625, "learning_rate": 2.6638228867857347e-07, "loss": 0.00024019306874834, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3311, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.125, "completions/mean_length": 60.656251430511475, "completions/min_length": 19.5, "epoch": 6.5837676842889055, "grad_norm": 0.0032595686262476106, "kl": 0.0914306640625, "learning_rate": 2.661034051392595e-07, "loss": 9.139993926510215e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3312, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 61.520835399627686, "completions/min_length": 15.125, "epoch": 6.585753288657235, "grad_norm": 0.006045836019464943, "kl": 0.082916259765625, "learning_rate": 2.658246147191756e-07, "loss": 8.286805677926168e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3313, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 65.15625238418579, "completions/min_length": 18.625, "epoch": 6.5877388930255645, "grad_norm": 0.0038154608461361413, "kl": 0.11724853515625, "learning_rate": 2.6554591752931455e-07, "loss": 0.00011719104077201337, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3314, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.875, "completions/mean_length": 58.11458492279053, "completions/min_length": 20.75, "epoch": 6.589724497393894, "grad_norm": 0.005446723226356524, "kl": 0.078704833984375, "learning_rate": 2.652673136806317e-07, "loss": 7.86786840762943e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3315, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 59.54166793823242, "completions/min_length": 19.375, "epoch": 6.5917101017622235, "grad_norm": 0.0034012921118688104, "kl": 0.087188720703125, "learning_rate": 2.649888032840448e-07, "loss": 8.71704105520621e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3316, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.875, "completions/mean_length": 75.50000286102295, "completions/min_length": 22.375, "epoch": 6.5936957061305534, "grad_norm": 0.0036895044989605275, "kl": 0.0858154296875, "learning_rate": 2.647103864504353e-07, "loss": 8.579499990446493e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3317, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.625, "completions/mean_length": 56.3229193687439, "completions/min_length": 19.125, "epoch": 6.595681310498883, "grad_norm": 0.004282274991072171, "kl": 0.079315185546875, "learning_rate": 2.644320632906466e-07, "loss": 7.92255304986611e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3318, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 66.55208539962769, "completions/min_length": 19.0, "epoch": 6.5976669148672125, "grad_norm": 0.0031926142548439115, "kl": 0.079437255859375, "learning_rate": 2.6415383391548494e-07, "loss": 7.944124809000641e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3319, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 56.48958444595337, "completions/min_length": 20.625, "epoch": 6.599652519235542, "grad_norm": 0.003399086594998678, "kl": 0.092681884765625, "learning_rate": 2.638756984357198e-07, "loss": 9.269505244446918e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3320, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 64.33333539962769, "completions/min_length": 22.875, "epoch": 6.6016381236038715, "grad_norm": 0.0038492704191697984, "kl": 0.08599853515625, "learning_rate": 2.635976569620823e-07, "loss": 8.592945232521743e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3321, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.875, "completions/mean_length": 62.04166793823242, "completions/min_length": 21.5, "epoch": 6.603623727972201, "grad_norm": 0.004115888856919297, "kl": 0.072998046875, "learning_rate": 2.6331970960526704e-07, "loss": 7.300818833755329e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3322, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 62.57291841506958, "completions/min_length": 19.0, "epoch": 6.605609332340531, "grad_norm": 0.0071639383081339245, "kl": 0.090667724609375, "learning_rate": 2.6304185647593105e-07, "loss": 9.062886238098145e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3323, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 71.81250238418579, "completions/min_length": 18.5, "epoch": 6.6075949367088604, "grad_norm": 0.0039402091299118825, "kl": 0.091644287109375, "learning_rate": 2.62764097684693e-07, "loss": 9.170065459329635e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3324, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 77.41666889190674, "completions/min_length": 25.875, "epoch": 6.60958054107719, "grad_norm": 0.008417197344632511, "kl": 0.1055908203125, "learning_rate": 2.6248643334213513e-07, "loss": 0.00010556657798588276, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3325, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.375, "completions/mean_length": 62.44791841506958, "completions/min_length": 29.875, "epoch": 6.61156614544552, "grad_norm": 0.0036738559349019147, "kl": 0.075714111328125, "learning_rate": 2.6220886355880126e-07, "loss": 7.577511132694781e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3326, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.375, "completions/mean_length": 63.07291841506958, "completions/min_length": 21.375, "epoch": 6.613551749813849, "grad_norm": 0.004403329476001133, "kl": 0.086395263671875, "learning_rate": 2.619313884451978e-07, "loss": 8.63686072989367e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3327, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.25, "completions/mean_length": 62.645835399627686, "completions/min_length": 19.75, "epoch": 6.615537354182179, "grad_norm": 0.003608955352380806, "kl": 0.0836181640625, "learning_rate": 2.6165400811179363e-07, "loss": 8.356331090908498e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3328, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.125, "completions/mean_length": 72.13541889190674, "completions/min_length": 24.75, "epoch": 6.617522958550508, "grad_norm": 0.0028169912923870478, "kl": 0.074249267578125, "learning_rate": 2.6137672266901986e-07, "loss": 7.409360841847956e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3329, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 66.3854193687439, "completions/min_length": 16.75, "epoch": 6.619508562918838, "grad_norm": 0.003873397827965879, "kl": 0.0819091796875, "learning_rate": 2.610995322272696e-07, "loss": 8.192495442926884e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3330, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 63.708335399627686, "completions/min_length": 20.5, "epoch": 6.621494167287168, "grad_norm": 2.7306234425268365, "kl": 0.093902587890625, "learning_rate": 2.6082243689689854e-07, "loss": -0.010180055163800716, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3331, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 59.91666793823242, "completions/min_length": 16.375, "epoch": 6.623479771655497, "grad_norm": 0.056881296270132796, "kl": 0.103759765625, "learning_rate": 2.605454367882238e-07, "loss": 0.00010373154509579763, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3332, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 67.44791889190674, "completions/min_length": 18.0, "epoch": 6.625465376023827, "grad_norm": 1.6922141995416082, "kl": 0.09228515625, "learning_rate": 2.6026853201152553e-07, "loss": -0.009174630045890808, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666679084301, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3333, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 61.89583396911621, "completions/min_length": 25.25, "epoch": 6.627450980392156, "grad_norm": 1.4010296660145436, "kl": 0.098602294921875, "learning_rate": 2.599917226770453e-07, "loss": -0.0017798136686906219, "memory(GiB)": 94.21, "reward": 1.6458333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3334, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 57.98958444595337, "completions/min_length": 15.75, "epoch": 6.629436584760486, "grad_norm": 0.8062404108832326, "kl": 0.08477783203125, "learning_rate": 2.5971500889498623e-07, "loss": -0.012669868767261505, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3335, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.5, "completions/mean_length": 72.43750190734863, "completions/min_length": 21.375, "epoch": 6.631422189128816, "grad_norm": 0.005339388316280159, "kl": 0.07958984375, "learning_rate": 2.5943839077551487e-07, "loss": 7.962968084029853e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3336, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 55.05208492279053, "completions/min_length": 18.0, "epoch": 6.633407793497145, "grad_norm": 0.007697588192921968, "kl": 0.092041015625, "learning_rate": 2.5916186842875855e-07, "loss": 9.184511145576835e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3337, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 68.23958587646484, "completions/min_length": 20.125, "epoch": 6.635393397865475, "grad_norm": 0.004779643424237663, "kl": 0.0899658203125, "learning_rate": 2.5888544196480625e-07, "loss": 9.000241698231548e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3338, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.625, "completions/mean_length": 69.427086353302, "completions/min_length": 25.375, "epoch": 6.637379002233805, "grad_norm": 0.004951351674302059, "kl": 0.07647705078125, "learning_rate": 2.586091114937099e-07, "loss": 7.65608056099154e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3339, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.375, "completions/mean_length": 56.82291793823242, "completions/min_length": 26.625, "epoch": 6.639364606602134, "grad_norm": 0.00418829974208351, "kl": 0.100006103515625, "learning_rate": 2.5833287712548197e-07, "loss": 0.00010003871284425259, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3340, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/mean_length": 61.41666889190674, "completions/min_length": 22.5, "epoch": 6.641350210970464, "grad_norm": 2.106885645935163, "kl": 0.084716796875, "learning_rate": 2.580567389700978e-07, "loss": 0.002495020627975464, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.05653337761759758, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3341, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.75, "completions/mean_length": 66.79166841506958, "completions/min_length": 22.125, "epoch": 6.643335815338793, "grad_norm": 0.005040883730130993, "kl": 0.095458984375, "learning_rate": 2.577806971374934e-07, "loss": 9.541861800244078e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3342, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.375, "completions/mean_length": 53.52083468437195, "completions/min_length": 23.25, "epoch": 6.645321419707123, "grad_norm": 0.013046118876608146, "kl": 0.084259033203125, "learning_rate": 2.575047517375671e-07, "loss": 8.429917215835303e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3343, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.875, "completions/mean_length": 56.44791793823242, "completions/min_length": 19.0, "epoch": 6.647307024075453, "grad_norm": 0.9421490556089949, "kl": 0.08135986328125, "learning_rate": 2.5722890288017906e-07, "loss": -0.00587873300537467, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3344, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 60.55208396911621, "completions/min_length": 24.75, "epoch": 6.649292628443782, "grad_norm": 0.006296653356111127, "kl": 0.072509765625, "learning_rate": 2.5695315067515014e-07, "loss": 7.25698919268325e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3345, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.625, "completions/mean_length": 60.864586353302, "completions/min_length": 19.75, "epoch": 6.651278232812112, "grad_norm": 0.004095511979683474, "kl": 0.08154296875, "learning_rate": 2.566774952322631e-07, "loss": 8.152994996635243e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3346, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 61.65625190734863, "completions/min_length": 18.0, "epoch": 6.653263837180441, "grad_norm": 0.005288149977675071, "kl": 0.0723876953125, "learning_rate": 2.5640193666126277e-07, "loss": 7.223733700811863e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3347, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.375, "completions/mean_length": 62.88541889190674, "completions/min_length": 23.25, "epoch": 6.655249441548771, "grad_norm": 0.00449370621506626, "kl": 0.08258056640625, "learning_rate": 2.5612647507185426e-07, "loss": 8.245091157732531e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3348, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.625, "completions/mean_length": 65.17708587646484, "completions/min_length": 22.125, "epoch": 6.657235045917101, "grad_norm": 0.9276794784041164, "kl": 0.090484619140625, "learning_rate": 2.558511105737051e-07, "loss": -0.01506928913295269, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3349, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.125, "completions/mean_length": 63.906251430511475, "completions/min_length": 14.375, "epoch": 6.65922065028543, "grad_norm": 0.004896746325986625, "kl": 0.077728271484375, "learning_rate": 2.555758432764439e-07, "loss": 7.765968621242791e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3350, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.375, "completions/mean_length": 58.36458492279053, "completions/min_length": 21.375, "epoch": 6.66120625465376, "grad_norm": 0.003631481854431352, "kl": 0.07855224609375, "learning_rate": 2.553006732896601e-07, "loss": 7.859925972297788e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3351, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 60.75000238418579, "completions/min_length": 24.75, "epoch": 6.66319185902209, "grad_norm": 0.004907615047471998, "kl": 0.0843505859375, "learning_rate": 2.550256007229051e-07, "loss": 8.43419911689125e-05, "memory(GiB)": 94.21, "reward": 1.4375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.4375, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3352, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 57.64583444595337, "completions/min_length": 24.0, "epoch": 6.665177463390419, "grad_norm": 0.0037962988035943708, "kl": 0.08966064453125, "learning_rate": 2.547506256856907e-07, "loss": 8.961845742305741e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3353, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.125, "completions/mean_length": 58.63541889190674, "completions/min_length": 22.25, "epoch": 6.667163067758749, "grad_norm": 0.004024698613476663, "kl": 0.063323974609375, "learning_rate": 2.5447574828749094e-07, "loss": 6.329286406980827e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3354, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.375, "completions/mean_length": 62.4479193687439, "completions/min_length": 21.875, "epoch": 6.669148672127078, "grad_norm": 0.005221276344265532, "kl": 0.078155517578125, "learning_rate": 2.5420096863774e-07, "loss": 7.819536403985694e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3355, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 64.05208492279053, "completions/min_length": 24.375, "epoch": 6.671134276495408, "grad_norm": 0.0036717399578703115, "kl": 0.0992431640625, "learning_rate": 2.5392628684583326e-07, "loss": 9.928403596859425e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3356, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.125, "completions/mean_length": 63.87500190734863, "completions/min_length": 24.5, "epoch": 6.673119880863738, "grad_norm": 0.9452003011181197, "kl": 0.073333740234375, "learning_rate": 2.536517030211281e-07, "loss": 0.0017337091267108917, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3357, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 63.145835399627686, "completions/min_length": 19.25, "epoch": 6.675105485232067, "grad_norm": 0.0036780416918719954, "kl": 0.065460205078125, "learning_rate": 2.5337721727294183e-07, "loss": 6.55170006211847e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3358, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 57.572917461395264, "completions/min_length": 19.125, "epoch": 6.677091089600397, "grad_norm": 0.0041497013124343314, "kl": 0.0875244140625, "learning_rate": 2.531028297105529e-07, "loss": 8.741334022488445e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3359, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.875, "completions/mean_length": 68.39583587646484, "completions/min_length": 20.375, "epoch": 6.679076693968726, "grad_norm": 0.003483883629828705, "kl": 0.0755615234375, "learning_rate": 2.528285404432013e-07, "loss": 7.548509893240407e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3360, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 66.55208539962769, "completions/min_length": 22.0, "epoch": 6.681062298337056, "grad_norm": 0.0046859575805675395, "kl": 0.067352294921875, "learning_rate": 2.52554349580087e-07, "loss": 6.732526526320726e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3361, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.625, "completions/mean_length": 58.687500953674316, "completions/min_length": 17.0, "epoch": 6.683047902705386, "grad_norm": 0.007134520061754884, "kl": 0.10400390625, "learning_rate": 2.522802572303716e-07, "loss": 0.00010390947863925248, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3362, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.625, "completions/mean_length": 63.635419845581055, "completions/min_length": 19.0, "epoch": 6.685033507073715, "grad_norm": 0.00442518071558769, "kl": 0.102996826171875, "learning_rate": 2.520062635031768e-07, "loss": 0.00010302726877853274, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3363, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.375, "completions/mean_length": 57.01041841506958, "completions/min_length": 26.5, "epoch": 6.687019111442045, "grad_norm": 2.2339347833199987, "kl": 0.09173583984375, "learning_rate": 2.517323685075855e-07, "loss": 0.0046552326530218124, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3364, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 67.09375190734863, "completions/min_length": 26.25, "epoch": 6.689004715810375, "grad_norm": 0.005528153582910281, "kl": 0.08135986328125, "learning_rate": 2.514585723526414e-07, "loss": 8.139129204209894e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3365, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.5, "completions/mean_length": 56.156250953674316, "completions/min_length": 20.625, "epoch": 6.690990320178704, "grad_norm": 0.006064638235015257, "kl": 0.072509765625, "learning_rate": 2.5118487514734843e-07, "loss": 7.242577703436837e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3366, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.75, "completions/mean_length": 69.78125381469727, "completions/min_length": 23.375, "epoch": 6.692975924547034, "grad_norm": 0.0037611521765138862, "kl": 0.09722900390625, "learning_rate": 2.5091127700067094e-07, "loss": 9.72005509538576e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3367, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.25, "completions/mean_length": 57.66666841506958, "completions/min_length": 21.625, "epoch": 6.694961528915364, "grad_norm": 0.006795767843164217, "kl": 0.06707763671875, "learning_rate": 2.5063777802153477e-07, "loss": 6.716744974255562e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3368, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 64.2916693687439, "completions/min_length": 27.625, "epoch": 6.696947133283693, "grad_norm": 0.00515757239988111, "kl": 0.08392333984375, "learning_rate": 2.503643783188251e-07, "loss": 8.38606501929462e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3369, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 68.114586353302, "completions/min_length": 18.875, "epoch": 6.698932737652023, "grad_norm": 1.3370124218827615, "kl": 0.10272216796875, "learning_rate": 2.5009107800138864e-07, "loss": -0.0020703524351119995, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3370, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.875, "completions/mean_length": 67.21875238418579, "completions/min_length": 20.875, "epoch": 6.700918342020352, "grad_norm": 0.005385096448298331, "kl": 0.09234619140625, "learning_rate": 2.4981787717803206e-07, "loss": 9.226000838680193e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3371, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 67.89583539962769, "completions/min_length": 23.25, "epoch": 6.702903946388682, "grad_norm": 0.003069569510925218, "kl": 0.074554443359375, "learning_rate": 2.4954477595752215e-07, "loss": 7.460590859409422e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3372, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 62.45833492279053, "completions/min_length": 20.875, "epoch": 6.704889550757011, "grad_norm": 0.0031521617109663107, "kl": 0.076873779296875, "learning_rate": 2.492717744485868e-07, "loss": 7.6853517384734e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3373, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 60.375000953674316, "completions/min_length": 24.0, "epoch": 6.706875155125341, "grad_norm": 0.008846982970644401, "kl": 0.0711669921875, "learning_rate": 2.4899887275991344e-07, "loss": 7.109522266546264e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3374, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.5, "completions/mean_length": 71.54166889190674, "completions/min_length": 28.75, "epoch": 6.708860759493671, "grad_norm": 0.0032348623832891216, "kl": 0.09027099609375, "learning_rate": 2.4872607100014984e-07, "loss": 9.030763612827286e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3375, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 67.41666793823242, "completions/min_length": 20.0, "epoch": 6.710846363862, "grad_norm": 0.004185812365160031, "kl": 0.0830078125, "learning_rate": 2.484533692779047e-07, "loss": 8.305534720420837e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3376, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.5, "completions/mean_length": 60.91666793823242, "completions/min_length": 21.25, "epoch": 6.71283196823033, "grad_norm": 0.0035172018483580576, "kl": 0.09246826171875, "learning_rate": 2.48180767701746e-07, "loss": 9.239085193257779e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3377, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 69.02083539962769, "completions/min_length": 20.0, "epoch": 6.71481757259866, "grad_norm": 0.003868128395804147, "kl": 0.11822509765625, "learning_rate": 2.479082663802024e-07, "loss": 0.0001183371277875267, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3378, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 58.19791841506958, "completions/min_length": 21.625, "epoch": 6.716803176966989, "grad_norm": 0.005296618629163508, "kl": 0.065673828125, "learning_rate": 2.476358654217627e-07, "loss": 6.562774069607258e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3379, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.875, "completions/mean_length": 57.000001430511475, "completions/min_length": 22.0, "epoch": 6.718788781335319, "grad_norm": 0.003523144574405198, "kl": 0.085205078125, "learning_rate": 2.4736356493487516e-07, "loss": 8.507106394972652e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3380, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.375, "completions/mean_length": 71.76041889190674, "completions/min_length": 22.875, "epoch": 6.720774385703649, "grad_norm": 0.0031552549941445437, "kl": 0.0738525390625, "learning_rate": 2.4709136502794875e-07, "loss": 7.378502778010443e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3381, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 51.98958396911621, "completions/min_length": 20.125, "epoch": 6.722759990071978, "grad_norm": 0.006089569665799314, "kl": 0.074676513671875, "learning_rate": 2.4681926580935196e-07, "loss": 7.465011003660038e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3382, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.875, "completions/mean_length": 60.33333444595337, "completions/min_length": 25.875, "epoch": 6.724745594440308, "grad_norm": 0.008489593534347837, "kl": 0.07373046875, "learning_rate": 2.4654726738741294e-07, "loss": 7.368314254563302e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3383, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 65.91666793823242, "completions/min_length": 20.625, "epoch": 6.726731198808637, "grad_norm": 0.004086129068409202, "kl": 0.079833984375, "learning_rate": 2.462753698704207e-07, "loss": 7.98147520981729e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3384, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.125, "completions/mean_length": 71.9166693687439, "completions/min_length": 23.0, "epoch": 6.728716803176967, "grad_norm": 0.003876205627064784, "kl": 0.06903076171875, "learning_rate": 2.4600357336662317e-07, "loss": 6.896528066135943e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3385, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 61.15625190734863, "completions/min_length": 23.125, "epoch": 6.730702407545296, "grad_norm": 0.011433983871249859, "kl": 0.112091064453125, "learning_rate": 2.4573187798422814e-07, "loss": 0.0001121356908697635, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3386, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 64.36458396911621, "completions/min_length": 26.125, "epoch": 6.732688011913626, "grad_norm": 0.0036052930835619276, "kl": 0.067901611328125, "learning_rate": 2.454602838314037e-07, "loss": 6.791093619540334e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3387, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.25, "completions/mean_length": 66.645836353302, "completions/min_length": 22.5, "epoch": 6.734673616281956, "grad_norm": 0.0029979367074048175, "kl": 0.084320068359375, "learning_rate": 2.4518879101627695e-07, "loss": 8.429453009739518e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3388, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 65.08333539962769, "completions/min_length": 26.375, "epoch": 6.736659220650285, "grad_norm": 1.275504371865935, "kl": 0.086669921875, "learning_rate": 2.449173996469353e-07, "loss": 0.005607105791568756, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3389, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 58.13541793823242, "completions/min_length": 23.25, "epoch": 6.738644825018615, "grad_norm": 0.004657554891619219, "kl": 0.079132080078125, "learning_rate": 2.4464610983142507e-07, "loss": 7.919049676274881e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3390, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.875, "completions/mean_length": 63.281251430511475, "completions/min_length": 20.875, "epoch": 6.740630429386945, "grad_norm": 0.03311615389314626, "kl": 0.072235107421875, "learning_rate": 2.443749216777528e-07, "loss": 7.219881808850914e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3391, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.375, "completions/mean_length": 71.06250143051147, "completions/min_length": 21.125, "epoch": 6.742616033755274, "grad_norm": 0.00642804698533361, "kl": 0.1016845703125, "learning_rate": 2.441038352938844e-07, "loss": 0.0001016240130411461, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3392, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.125, "completions/mean_length": 71.6354193687439, "completions/min_length": 27.375, "epoch": 6.744601638123604, "grad_norm": 0.004152023186907524, "kl": 0.091552734375, "learning_rate": 2.4383285078774487e-07, "loss": 9.157097520073876e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3393, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 64.78125190734863, "completions/min_length": 25.875, "epoch": 6.746587242491934, "grad_norm": 0.00548445749648638, "kl": 0.09490966796875, "learning_rate": 2.4356196826721913e-07, "loss": 9.482722089160234e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3394, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.0, "completions/mean_length": 64.34375095367432, "completions/min_length": 24.75, "epoch": 6.748572846860263, "grad_norm": 1.9910135055442078, "kl": 0.13055419921875, "learning_rate": 2.4329118784015125e-07, "loss": -0.006759150885045528, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3395, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.125, "completions/mean_length": 59.80208492279053, "completions/min_length": 23.125, "epoch": 6.750558451228593, "grad_norm": 0.8985845256202774, "kl": 0.0772705078125, "learning_rate": 2.4302050961434443e-07, "loss": 0.003503883257508278, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3396, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.0, "completions/mean_length": 70.57291889190674, "completions/min_length": 21.875, "epoch": 6.752544055596922, "grad_norm": 0.003441792077707185, "kl": 0.076263427734375, "learning_rate": 2.427499336975618e-07, "loss": 7.62996933190152e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3397, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.0, "completions/mean_length": 64.82291793823242, "completions/min_length": 25.125, "epoch": 6.754529659965252, "grad_norm": 0.004492297001124366, "kl": 0.0665283203125, "learning_rate": 2.424794601975254e-07, "loss": 6.658166239503771e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3398, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 68.22916889190674, "completions/min_length": 26.75, "epoch": 6.756515264333581, "grad_norm": 1.105991332780493, "kl": 0.0843505859375, "learning_rate": 2.4220908922191625e-07, "loss": -0.012495587579905987, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3399, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.875, "completions/mean_length": 49.833335399627686, "completions/min_length": 18.25, "epoch": 6.758500868701911, "grad_norm": 1.2618422887222154, "kl": 0.09228515625, "learning_rate": 2.4193882087837514e-07, "loss": 0.0025725262239575386, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3400, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.5, "completions/mean_length": 72.08333587646484, "completions/min_length": 24.875, "epoch": 6.760486473070241, "grad_norm": 0.004245976249590136, "kl": 0.091400146484375, "learning_rate": 2.416686552745013e-07, "loss": 9.141544433077797e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3401, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.75, "completions/mean_length": 65.00000190734863, "completions/min_length": 24.5, "epoch": 6.76247207743857, "grad_norm": 0.003321200138939367, "kl": 0.064697265625, "learning_rate": 2.413985925178538e-07, "loss": 6.467117054853588e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3402, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/mean_length": 65.91666841506958, "completions/min_length": 21.25, "epoch": 6.7644576818069, "grad_norm": 0.004712657343597449, "kl": 0.09051513671875, "learning_rate": 2.411286327159503e-07, "loss": 9.06178611330688e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3403, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.375, "completions/mean_length": 63.04166889190674, "completions/min_length": 24.375, "epoch": 6.76644328617523, "grad_norm": 0.14265477761747114, "kl": 0.110382080078125, "learning_rate": 2.4085877597626704e-07, "loss": 0.00011040962999686599, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3404, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 66.48958444595337, "completions/min_length": 25.75, "epoch": 6.768428890543559, "grad_norm": 0.007692668588512472, "kl": 0.09771728515625, "learning_rate": 2.4058902240624056e-07, "loss": 9.764514834387228e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3405, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.625, "completions/mean_length": 55.45833444595337, "completions/min_length": 20.375, "epoch": 6.770414494911889, "grad_norm": 0.008334446388096196, "kl": 0.116455078125, "learning_rate": 2.403193721132652e-07, "loss": 0.00011653061665128917, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3406, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 59.50000190734863, "completions/min_length": 19.875, "epoch": 6.772400099280219, "grad_norm": 0.006609193410780301, "kl": 0.076904296875, "learning_rate": 2.400498252046942e-07, "loss": 7.69533944549039e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3407, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 60.020835399627686, "completions/min_length": 26.375, "epoch": 6.774385703648548, "grad_norm": 0.007581474157891919, "kl": 0.08856201171875, "learning_rate": 2.3978038178784043e-07, "loss": 8.853618055582047e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3408, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.875, "completions/mean_length": 55.11458492279053, "completions/min_length": 20.5, "epoch": 6.776371308016878, "grad_norm": 0.008603655093382775, "kl": 0.08026123046875, "learning_rate": 2.395110419699746e-07, "loss": 8.022796828299761e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3409, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.875, "completions/mean_length": 65.614586353302, "completions/min_length": 25.25, "epoch": 6.778356912385207, "grad_norm": 0.007818040047445268, "kl": 0.09033203125, "learning_rate": 2.3924180585832707e-07, "loss": 9.022171434480697e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3410, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.25, "completions/mean_length": 65.0729193687439, "completions/min_length": 26.5, "epoch": 6.780342516753537, "grad_norm": 0.005496575624380883, "kl": 0.0943603515625, "learning_rate": 2.3897267356008617e-07, "loss": 9.42759943427518e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3411, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 61.48958492279053, "completions/min_length": 22.0, "epoch": 6.782328121121866, "grad_norm": 0.006980595328709287, "kl": 0.08282470703125, "learning_rate": 2.3870364518239925e-07, "loss": 8.279483154183254e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3412, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/mean_length": 59.83333492279053, "completions/min_length": 26.0, "epoch": 6.784313725490196, "grad_norm": 0.00396278425089312, "kl": 0.0894775390625, "learning_rate": 2.384347208323726e-07, "loss": 8.951907511800528e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3413, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.5, "completions/mean_length": 64.20833539962769, "completions/min_length": 20.5, "epoch": 6.786299329858526, "grad_norm": 0.007165150411159889, "kl": 0.080841064453125, "learning_rate": 2.381659006170705e-07, "loss": 8.075873483903706e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3414, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.0, "completions/mean_length": 70.61458587646484, "completions/min_length": 21.375, "epoch": 6.788284934226855, "grad_norm": 0.9550250805260357, "kl": 0.10546875, "learning_rate": 2.3789718464351577e-07, "loss": 0.009096808731555939, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3415, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 64.48958539962769, "completions/min_length": 20.5, "epoch": 6.790270538595185, "grad_norm": 0.004040958340010559, "kl": 0.07293701171875, "learning_rate": 2.3762857301869045e-07, "loss": 7.294019451364875e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3416, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 66.59375143051147, "completions/min_length": 20.625, "epoch": 6.792256142963515, "grad_norm": 0.004940515684547405, "kl": 0.091949462890625, "learning_rate": 2.373600658495341e-07, "loss": 9.194164158543572e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3417, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 66.29166984558105, "completions/min_length": 22.75, "epoch": 6.794241747331844, "grad_norm": 1.5329658316527761, "kl": 0.1015625, "learning_rate": 2.3709166324294545e-07, "loss": -0.004124955274164677, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3418, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.625, "completions/mean_length": 58.69791841506958, "completions/min_length": 21.75, "epoch": 6.796227351700174, "grad_norm": 0.005483250933679256, "kl": 0.07818603515625, "learning_rate": 2.3682336530578139e-07, "loss": 7.810754323145375e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3419, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.375, "completions/mean_length": 66.32291841506958, "completions/min_length": 23.0, "epoch": 6.798212956068504, "grad_norm": 1.1923667757231087, "kl": 0.115966796875, "learning_rate": 2.3655517214485677e-07, "loss": 0.014420520514249802, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3420, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.875, "completions/mean_length": 61.42708444595337, "completions/min_length": 23.5, "epoch": 6.800198560436833, "grad_norm": 0.10072646462925985, "kl": 0.32781982421875, "learning_rate": 2.3628708386694536e-07, "loss": 0.0003270826709922403, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3421, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 63.6666693687439, "completions/min_length": 23.25, "epoch": 6.802184164805163, "grad_norm": 0.00380862300248481, "kl": 0.0791015625, "learning_rate": 2.360191005787786e-07, "loss": 7.902109064161777e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3422, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 62.88541793823242, "completions/min_length": 25.0, "epoch": 6.804169769173492, "grad_norm": 0.0051991632425972874, "kl": 0.086761474609375, "learning_rate": 2.3575122238704627e-07, "loss": 8.665939094498754e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3423, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 64.36458539962769, "completions/min_length": 20.875, "epoch": 6.806155373541822, "grad_norm": 0.00412328354140257, "kl": 0.076751708984375, "learning_rate": 2.3548344939839666e-07, "loss": 7.666759483981878e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3424, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 59.10416889190674, "completions/min_length": 21.75, "epoch": 6.808140977910151, "grad_norm": 0.00577668054951878, "kl": 0.0816650390625, "learning_rate": 2.3521578171943562e-07, "loss": 8.16772153484635e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3425, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.625, "completions/mean_length": 60.71875190734863, "completions/min_length": 24.5, "epoch": 6.810126582278481, "grad_norm": 0.003848301691714587, "kl": 0.0677490234375, "learning_rate": 2.3494821945672754e-07, "loss": 6.783010030630976e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3426, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 60.927085399627686, "completions/min_length": 24.625, "epoch": 6.812112186646811, "grad_norm": 0.0033841753454069427, "kl": 0.09063720703125, "learning_rate": 2.3468076271679487e-07, "loss": 9.07600624486804e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3427, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/mean_length": 64.58333492279053, "completions/min_length": 26.625, "epoch": 6.81409779101514, "grad_norm": 0.0039693970757664065, "kl": 0.09820556640625, "learning_rate": 2.3441341160611749e-07, "loss": 9.82606434263289e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3428, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 65.32291841506958, "completions/min_length": 21.75, "epoch": 6.81608339538347, "grad_norm": 0.003440035934574871, "kl": 0.0855712890625, "learning_rate": 2.3414616623113386e-07, "loss": 8.55953257996589e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3429, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 56.833335399627686, "completions/min_length": 19.5, "epoch": 6.8180689997518, "grad_norm": 0.004359568331895638, "kl": 0.062957763671875, "learning_rate": 2.3387902669824e-07, "loss": 6.287854921538383e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3430, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.125, "completions/mean_length": 57.656251430511475, "completions/min_length": 22.25, "epoch": 6.820054604120129, "grad_norm": 0.09725043894259304, "kl": 0.183380126953125, "learning_rate": 2.3361199311378965e-07, "loss": 0.00018321051902603358, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3431, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 62.062501430511475, "completions/min_length": 23.125, "epoch": 6.822040208488459, "grad_norm": 1.58439091359758, "kl": 0.0914306640625, "learning_rate": 2.3334506558409473e-07, "loss": 0.007666006684303284, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.05974817834794521, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.316736813634634, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3432, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 62.05208492279053, "completions/min_length": 17.875, "epoch": 6.824025812856789, "grad_norm": 0.006249547464504909, "kl": 0.083221435546875, "learning_rate": 2.3307824421542489e-07, "loss": 8.323766815010458e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3433, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 69.34375143051147, "completions/min_length": 19.375, "epoch": 6.826011417225118, "grad_norm": 1.917976116834742, "kl": 0.16632080078125, "learning_rate": 2.3281152911400742e-07, "loss": 0.005980097688734531, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3434, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.625, "completions/mean_length": 64.05208587646484, "completions/min_length": 26.75, "epoch": 6.827997021593448, "grad_norm": 0.00301577880458901, "kl": 0.077239990234375, "learning_rate": 2.325449203860273e-07, "loss": 7.725731120444834e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3435, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.125, "completions/mean_length": 62.13541889190674, "completions/min_length": 21.5, "epoch": 6.829982625961777, "grad_norm": 0.8332981422966501, "kl": 0.075958251953125, "learning_rate": 2.3227841813762688e-07, "loss": -0.01034325361251831, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3436, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 64.14583444595337, "completions/min_length": 25.0, "epoch": 6.831968230330107, "grad_norm": 0.003770732457586283, "kl": 0.08013916015625, "learning_rate": 2.3201202247490676e-07, "loss": 8.001473906915635e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3437, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 81.0416693687439, "completions/min_length": 31.625, "epoch": 6.833953834698436, "grad_norm": 0.004250726128683241, "kl": 0.088409423828125, "learning_rate": 2.317457335039244e-07, "loss": 8.844550757203251e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3438, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.5, "completions/mean_length": 57.23958444595337, "completions/min_length": 22.125, "epoch": 6.835939439066766, "grad_norm": 0.0031637760279484115, "kl": 0.060302734375, "learning_rate": 2.3147955133069537e-07, "loss": 6.025177935953252e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3439, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.875, "completions/mean_length": 67.40625095367432, "completions/min_length": 19.875, "epoch": 6.837925043435096, "grad_norm": 1.4640450952701392, "kl": 0.09423828125, "learning_rate": 2.3121347606119257e-07, "loss": 0.023468755185604095, "memory(GiB)": 94.21, "reward": 1.8125000149011612, "reward_std": 0.05103103443980217, "rewards/CineAccuracyORM/mean": 0.8125000027939677, "rewards/CineAccuracyORM/std": 0.13744790107011795, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3440, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.625, "completions/mean_length": 53.66666793823242, "completions/min_length": 20.875, "epoch": 6.839910647803425, "grad_norm": 0.003385809205604886, "kl": 0.088287353515625, "learning_rate": 2.3094750780134587e-07, "loss": 8.831812010612339e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3441, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 63.66666841506958, "completions/min_length": 17.25, "epoch": 6.841896252171755, "grad_norm": 0.006235937234724296, "kl": 0.11151123046875, "learning_rate": 2.3068164665704336e-07, "loss": 0.00011143732990603894, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3442, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 66.81250190734863, "completions/min_length": 26.125, "epoch": 6.843881856540085, "grad_norm": 0.0034485157487708735, "kl": 0.067840576171875, "learning_rate": 2.304158927341298e-07, "loss": 6.788223254261538e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3443, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.375, "completions/mean_length": 74.22916793823242, "completions/min_length": 25.75, "epoch": 6.845867460908414, "grad_norm": 0.008456224291448381, "kl": 0.0745849609375, "learning_rate": 2.3015024613840738e-07, "loss": 7.461795030394569e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3444, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.0, "completions/mean_length": 69.8229193687439, "completions/min_length": 27.25, "epoch": 6.847853065276744, "grad_norm": 0.005448665479385711, "kl": 0.081878662109375, "learning_rate": 2.29884706975636e-07, "loss": 8.166871702997014e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3445, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.5, "completions/mean_length": 66.75000286102295, "completions/min_length": 19.125, "epoch": 6.849838669645074, "grad_norm": 0.011120099062653334, "kl": 0.096038818359375, "learning_rate": 2.2961927535153215e-07, "loss": 9.604525985196233e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3446, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 65.6666693687439, "completions/min_length": 23.75, "epoch": 6.851824274013403, "grad_norm": 0.02400694885030784, "kl": 0.13018798828125, "learning_rate": 2.2935395137176994e-07, "loss": 0.0001302231685258448, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3447, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 60.645835399627686, "completions/min_length": 22.75, "epoch": 6.853809878381733, "grad_norm": 0.021029487979905625, "kl": 0.12274169921875, "learning_rate": 2.2908873514198073e-07, "loss": 0.00012257686466909945, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3448, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 53.29166793823242, "completions/min_length": 20.0, "epoch": 6.855795482750062, "grad_norm": 0.008970391235218558, "kl": 0.086761474609375, "learning_rate": 2.2882362676775242e-07, "loss": 8.674825949128717e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3449, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 60.60416841506958, "completions/min_length": 28.125, "epoch": 6.857781087118392, "grad_norm": 1.5560489122166241, "kl": 0.090179443359375, "learning_rate": 2.285586263546307e-07, "loss": 0.002466946840286255, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3450, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 67.76041841506958, "completions/min_length": 32.125, "epoch": 6.859766691486721, "grad_norm": 0.0033272374367833354, "kl": 0.08209228515625, "learning_rate": 2.2829373400811763e-07, "loss": 8.206719940062612e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3451, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.625, "completions/mean_length": 60.68750190734863, "completions/min_length": 25.375, "epoch": 6.861752295855051, "grad_norm": 0.0037930915863598385, "kl": 0.080841064453125, "learning_rate": 2.280289498336724e-07, "loss": 8.07654723757878e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3452, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.875, "completions/mean_length": 69.62500238418579, "completions/min_length": 24.75, "epoch": 6.863737900223381, "grad_norm": 0.0033053027537494734, "kl": 0.084625244140625, "learning_rate": 2.2776427393671143e-07, "loss": 8.468855958199129e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3453, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 76.04166889190674, "completions/min_length": 25.875, "epoch": 6.86572350459171, "grad_norm": 1.8740541670066135, "kl": 0.086456298828125, "learning_rate": 2.2749970642260796e-07, "loss": 0.0014206450432538986, "memory(GiB)": 94.21, "reward": 1.7291666865348816, "reward_std": 0.06454972177743912, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.3189288526773453, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3454, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.875, "completions/mean_length": 56.69791889190674, "completions/min_length": 22.125, "epoch": 6.86770910896004, "grad_norm": 0.005013546322175355, "kl": 0.082611083984375, "learning_rate": 2.272352473966917e-07, "loss": 8.264806092483923e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3455, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 61.927085399627686, "completions/min_length": 26.25, "epoch": 6.86969471332837, "grad_norm": 0.003636777975726669, "kl": 0.08807373046875, "learning_rate": 2.2697089696424976e-07, "loss": 8.801998774288222e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3456, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.875, "completions/mean_length": 70.16666889190674, "completions/min_length": 29.0, "epoch": 6.871680317696699, "grad_norm": 0.0049014740364062516, "kl": 0.08245849609375, "learning_rate": 2.267066552305253e-07, "loss": 8.244160562753677e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3457, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 60.78125190734863, "completions/min_length": 22.25, "epoch": 6.873665922065029, "grad_norm": 0.005104505422028492, "kl": 0.077484130859375, "learning_rate": 2.2644252230071898e-07, "loss": 7.759316940791905e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3458, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 66.48958492279053, "completions/min_length": 24.875, "epoch": 6.875651526433359, "grad_norm": 0.0050558672239010985, "kl": 0.074737548828125, "learning_rate": 2.2617849827998736e-07, "loss": 7.47351732570678e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3459, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.625, "completions/mean_length": 68.00000143051147, "completions/min_length": 24.25, "epoch": 6.877637130801688, "grad_norm": 0.004046435436120138, "kl": 0.08929443359375, "learning_rate": 2.259145832734443e-07, "loss": 8.930674812290817e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3460, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 62.21875190734863, "completions/min_length": 21.125, "epoch": 6.879622735170018, "grad_norm": 0.005786762414426067, "kl": 0.084991455078125, "learning_rate": 2.2565077738616023e-07, "loss": 8.511268970323727e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3461, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.875, "completions/mean_length": 63.68750190734863, "completions/min_length": 22.875, "epoch": 6.881608339538347, "grad_norm": 0.003251246710402099, "kl": 0.0687255859375, "learning_rate": 2.2538708072316153e-07, "loss": 6.87915162416175e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3462, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 71.8229193687439, "completions/min_length": 26.875, "epoch": 6.883593943906677, "grad_norm": 0.005648669361979694, "kl": 0.0877685546875, "learning_rate": 2.2512349338943148e-07, "loss": 8.774442540016025e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3463, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 62.875001430511475, "completions/min_length": 27.75, "epoch": 6.885579548275006, "grad_norm": 0.005085901688629966, "kl": 0.08203125, "learning_rate": 2.2486001548991014e-07, "loss": 8.193984103854746e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3464, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.125, "completions/mean_length": 62.468750953674316, "completions/min_length": 20.0, "epoch": 6.887565152643336, "grad_norm": 0.0036000547692858933, "kl": 0.0677490234375, "learning_rate": 2.2459664712949323e-07, "loss": 6.767272134311497e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3465, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 62.26041841506958, "completions/min_length": 19.75, "epoch": 6.889550757011666, "grad_norm": 0.005018476686333123, "kl": 0.078277587890625, "learning_rate": 2.2433338841303363e-07, "loss": 7.828741945559159e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3466, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 65.7604193687439, "completions/min_length": 23.875, "epoch": 6.891536361379995, "grad_norm": 0.0054465563280586445, "kl": 0.0760498046875, "learning_rate": 2.2407023944534032e-07, "loss": 7.601312245242298e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3467, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.125, "completions/mean_length": 57.42708444595337, "completions/min_length": 27.0, "epoch": 6.893521965748325, "grad_norm": 0.003232636385927136, "kl": 0.061798095703125, "learning_rate": 2.2380720033117829e-07, "loss": 6.178372859722003e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3468, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 58.13541793823242, "completions/min_length": 18.125, "epoch": 6.895507570116655, "grad_norm": 1.7874585035156132, "kl": 0.094512939453125, "learning_rate": 2.235442711752693e-07, "loss": -0.011597169563174248, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3469, "train_speed(iter/s)": 0.022664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/mean_length": 56.22916793823242, "completions/min_length": 21.25, "epoch": 6.897493174484984, "grad_norm": 0.004961953164904006, "kl": 0.094879150390625, "learning_rate": 2.2328145208229094e-07, "loss": 9.486427006777376e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3470, "train_speed(iter/s)": 0.022665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 59.218751430511475, "completions/min_length": 17.5, "epoch": 6.899478778853314, "grad_norm": 0.00332144047900622, "kl": 0.0723876953125, "learning_rate": 2.2301874315687692e-07, "loss": 7.2396345785819e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3471, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 61.47916889190674, "completions/min_length": 25.625, "epoch": 6.901464383221644, "grad_norm": 0.030713156031456686, "kl": 0.10504150390625, "learning_rate": 2.2275614450361758e-07, "loss": 0.00010503574594622478, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3472, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.875, "completions/mean_length": 63.72916889190674, "completions/min_length": 22.0, "epoch": 6.903449987589973, "grad_norm": 0.0063864484231935265, "kl": 0.091461181640625, "learning_rate": 2.2249365622705851e-07, "loss": 9.151537960860878e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3473, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.125, "completions/mean_length": 68.54166889190674, "completions/min_length": 19.875, "epoch": 6.905435591958303, "grad_norm": 0.003606612700822254, "kl": 0.0810546875, "learning_rate": 2.222312784317027e-07, "loss": 8.102629362838343e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3474, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.625, "completions/mean_length": 69.96875238418579, "completions/min_length": 26.125, "epoch": 6.907421196326632, "grad_norm": 0.00931615978106732, "kl": 0.078948974609375, "learning_rate": 2.2196901122200795e-07, "loss": 7.907981489552185e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3475, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 66.13541841506958, "completions/min_length": 27.25, "epoch": 6.909406800694962, "grad_norm": 0.003297868054470474, "kl": 0.085235595703125, "learning_rate": 2.217068547023882e-07, "loss": 8.517040259903297e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3476, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.125, "completions/mean_length": 73.78125143051147, "completions/min_length": 31.75, "epoch": 6.911392405063291, "grad_norm": 0.003026304146279524, "kl": 0.06988525390625, "learning_rate": 2.2144480897721402e-07, "loss": 6.988673703745008e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3477, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 71.03125190734863, "completions/min_length": 26.5, "epoch": 6.913378009431621, "grad_norm": 0.005728868994121281, "kl": 0.0775146484375, "learning_rate": 2.2118287415081098e-07, "loss": 7.752554665785283e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3478, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.875, "completions/mean_length": 60.000001430511475, "completions/min_length": 26.875, "epoch": 6.915363613799951, "grad_norm": 0.003766331257942425, "kl": 0.060638427734375, "learning_rate": 2.209210503274614e-07, "loss": 6.05880341026932e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3479, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.75, "completions/mean_length": 70.92708492279053, "completions/min_length": 32.625, "epoch": 6.91734921816828, "grad_norm": 1.9026025174427164, "kl": 0.08837890625, "learning_rate": 2.2065933761140243e-07, "loss": 0.004589976742863655, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3480, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.125, "completions/mean_length": 62.61458492279053, "completions/min_length": 24.125, "epoch": 6.91933482253661, "grad_norm": 0.003322244130724308, "kl": 0.085693359375, "learning_rate": 2.2039773610682772e-07, "loss": 8.572596561862156e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3481, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.625, "completions/mean_length": 64.06250095367432, "completions/min_length": 23.625, "epoch": 6.9213204269049395, "grad_norm": 0.005178868877266053, "kl": 0.088104248046875, "learning_rate": 2.2013624591788667e-07, "loss": 8.814716420602053e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3482, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.125, "completions/mean_length": 68.00000286102295, "completions/min_length": 28.125, "epoch": 6.923306031273269, "grad_norm": 3.061341271111363, "kl": 0.095367431640625, "learning_rate": 2.1987486714868382e-07, "loss": -0.009309722110629082, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3483, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.875, "completions/mean_length": 58.062501430511475, "completions/min_length": 21.25, "epoch": 6.9252916356415986, "grad_norm": 1.6338134999234435, "kl": 0.088165283203125, "learning_rate": 2.1961359990327948e-07, "loss": 0.0071833315305411816, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3484, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.375, "completions/mean_length": 63.07291889190674, "completions/min_length": 30.375, "epoch": 6.9272772400099285, "grad_norm": 0.004369369790120132, "kl": 0.072235107421875, "learning_rate": 2.1935244428569017e-07, "loss": 7.217198435682803e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3485, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.125, "completions/mean_length": 62.645835399627686, "completions/min_length": 20.0, "epoch": 6.929262844378258, "grad_norm": 0.00444703640837691, "kl": 0.081939697265625, "learning_rate": 2.190914003998871e-07, "loss": 8.190786320483312e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3486, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.625, "completions/mean_length": 59.51041793823242, "completions/min_length": 22.375, "epoch": 6.9312484487465875, "grad_norm": 0.0037834384347517516, "kl": 0.06689453125, "learning_rate": 2.1883046834979757e-07, "loss": 6.698662764392793e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3487, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 78.9791693687439, "completions/min_length": 30.0, "epoch": 6.933234053114917, "grad_norm": 0.0033281949718922317, "kl": 0.0738525390625, "learning_rate": 2.1856964823930446e-07, "loss": 7.383064075838774e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3488, "train_speed(iter/s)": 0.022666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.5, "completions/mean_length": 59.895835161209106, "completions/min_length": 24.375, "epoch": 6.9352196574832465, "grad_norm": 0.004177736602361458, "kl": 0.0599365234375, "learning_rate": 2.183089401722454e-07, "loss": 5.99289451201912e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3489, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 63.40625286102295, "completions/min_length": 22.125, "epoch": 6.937205261851576, "grad_norm": 0.00326064519813375, "kl": 0.071380615234375, "learning_rate": 2.180483442524142e-07, "loss": 7.141270907595754e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3490, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.0, "completions/mean_length": 63.48958492279053, "completions/min_length": 22.375, "epoch": 6.9391908662199056, "grad_norm": 0.005937030871098344, "kl": 0.07696533203125, "learning_rate": 2.1778786058355952e-07, "loss": 7.694762462051585e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3491, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.125, "completions/mean_length": 55.770835399627686, "completions/min_length": 20.125, "epoch": 6.9411764705882355, "grad_norm": 0.00635906098039419, "kl": 0.063232421875, "learning_rate": 2.1752748926938524e-07, "loss": 6.31595539744012e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3492, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 63.395835399627686, "completions/min_length": 22.875, "epoch": 6.943162074956565, "grad_norm": 0.0035734480581738823, "kl": 0.075042724609375, "learning_rate": 2.1726723041355115e-07, "loss": 7.499953790102154e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3493, "train_speed(iter/s)": 0.022667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.125, "completions/mean_length": 55.92708492279053, "completions/min_length": 17.75, "epoch": 6.9451476793248945, "grad_norm": 0.003386932960175648, "kl": 0.095184326171875, "learning_rate": 2.170070841196715e-07, "loss": 9.50338362599723e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3494, "train_speed(iter/s)": 0.022668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 68.77083492279053, "completions/min_length": 20.375, "epoch": 6.9471332836932245, "grad_norm": 0.017462602356197406, "kl": 0.0860595703125, "learning_rate": 2.1674705049131624e-07, "loss": 8.608737698523328e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3495, "train_speed(iter/s)": 0.022669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 71.64583444595337, "completions/min_length": 27.25, "epoch": 6.9491188880615535, "grad_norm": 0.003342787719674488, "kl": 0.079345703125, "learning_rate": 2.1648712963201055e-07, "loss": 7.933162123663351e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3496, "train_speed(iter/s)": 0.022668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 66.93750190734863, "completions/min_length": 25.125, "epoch": 6.9511044924298835, "grad_norm": 0.0047212923758240795, "kl": 0.0821533203125, "learning_rate": 2.1622732164523399e-07, "loss": 8.209255611291155e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3497, "train_speed(iter/s)": 0.022669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.25, "completions/mean_length": 76.802086353302, "completions/min_length": 24.375, "epoch": 6.953090096798213, "grad_norm": 0.005209700574145823, "kl": 0.08551025390625, "learning_rate": 2.1596762663442213e-07, "loss": 8.537035319022834e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3498, "train_speed(iter/s)": 0.022669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 73.84375238418579, "completions/min_length": 31.125, "epoch": 6.9550757011665425, "grad_norm": 0.004903439730164932, "kl": 0.08502197265625, "learning_rate": 2.1570804470296494e-07, "loss": 8.500037802150473e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3499, "train_speed(iter/s)": 0.022668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.125, "completions/mean_length": 62.28125190734863, "completions/min_length": 24.125, "epoch": 6.9570613055348725, "grad_norm": 0.00488732235933871, "kl": 0.073394775390625, "learning_rate": 2.154485759542073e-07, "loss": 7.344199548242614e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3500, "train_speed(iter/s)": 0.022668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.375, "completions/mean_length": 65.35416841506958, "completions/min_length": 18.25, "epoch": 6.9590469099032015, "grad_norm": 0.004979974472857077, "kl": 0.08349609375, "learning_rate": 2.1518922049144938e-07, "loss": 8.350598363904282e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3501, "train_speed(iter/s)": 0.022664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.5, "completions/mean_length": 74.60416793823242, "completions/min_length": 24.25, "epoch": 6.9610325142715315, "grad_norm": 0.01165229206791968, "kl": 0.10406494140625, "learning_rate": 2.1492997841794647e-07, "loss": 0.00010403131454950199, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3502, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.75, "completions/mean_length": 69.91666889190674, "completions/min_length": 25.625, "epoch": 6.9630181186398605, "grad_norm": 0.005017248050714145, "kl": 0.072479248046875, "learning_rate": 2.1467084983690787e-07, "loss": 7.241721323225647e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3503, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.625, "completions/mean_length": 60.28125190734863, "completions/min_length": 24.0, "epoch": 6.9650037230081905, "grad_norm": 0.012709558011590925, "kl": 0.106689453125, "learning_rate": 2.1441183485149862e-07, "loss": 0.00010654113430064172, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3504, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.125, "completions/mean_length": 71.48958444595337, "completions/min_length": 27.25, "epoch": 6.96698932737652, "grad_norm": 2.257972941853662, "kl": 0.0977783203125, "learning_rate": 2.1415293356483777e-07, "loss": 0.00227789836935699, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.05974818021059036, "rewards/CineAccuracyORM/mean": 0.6666666669771075, "rewards/CineAccuracyORM/std": 0.28845512494444847, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3505, "train_speed(iter/s)": 0.022664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 64.84375095367432, "completions/min_length": 23.375, "epoch": 6.9689749317448495, "grad_norm": 0.7321872964269951, "kl": 0.09014892578125, "learning_rate": 2.1389414607999977e-07, "loss": 0.006467541214078665, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3506, "train_speed(iter/s)": 0.022664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.25, "completions/mean_length": 72.82291889190674, "completions/min_length": 23.5, "epoch": 6.9709605361131795, "grad_norm": 0.0035710598634169696, "kl": 0.068572998046875, "learning_rate": 2.1363547250001335e-07, "loss": 6.851096986792982e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3507, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 63.23958492279053, "completions/min_length": 27.375, "epoch": 6.972946140481509, "grad_norm": 1.7366125750443138, "kl": 0.090667724609375, "learning_rate": 2.1337691292786159e-07, "loss": 0.002294144593179226, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3508, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 60.000001668930054, "completions/min_length": 19.25, "epoch": 6.9749317448498385, "grad_norm": 0.004224070629503523, "kl": 0.08380126953125, "learning_rate": 2.1311846746648322e-07, "loss": 8.378517668461427e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3509, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.875, "completions/mean_length": 62.69791889190674, "completions/min_length": 18.875, "epoch": 6.976917349218168, "grad_norm": 0.005020541254357614, "kl": 0.102081298828125, "learning_rate": 2.128601362187706e-07, "loss": 0.00010203639976680279, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3510, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.5, "completions/mean_length": 62.12500190734863, "completions/min_length": 27.25, "epoch": 6.978902953586498, "grad_norm": 1.7979416665445698, "kl": 0.0810546875, "learning_rate": 2.1260191928757077e-07, "loss": -0.0007692854851484299, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3511, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.625, "completions/mean_length": 64.51041793823242, "completions/min_length": 22.75, "epoch": 6.980888557954827, "grad_norm": 0.005936664651624782, "kl": 0.07293701171875, "learning_rate": 2.123438167756857e-07, "loss": 7.291975634871051e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3512, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.125, "completions/mean_length": 55.750001430511475, "completions/min_length": 17.75, "epoch": 6.982874162323157, "grad_norm": 0.004799489362140963, "kl": 0.075469970703125, "learning_rate": 2.1208582878587123e-07, "loss": 7.53152635297738e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3513, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 80.37500381469727, "completions/min_length": 28.875, "epoch": 6.9848597666914864, "grad_norm": 0.003107792031337539, "kl": 0.073760986328125, "learning_rate": 2.1182795542083813e-07, "loss": 7.378715963568538e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3514, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 63.13541841506958, "completions/min_length": 26.125, "epoch": 6.986845371059816, "grad_norm": 0.0033922022698946635, "kl": 0.07904052734375, "learning_rate": 2.115701967832511e-07, "loss": 7.897923933342099e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3515, "train_speed(iter/s)": 0.022664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.0, "completions/mean_length": 78.1041693687439, "completions/min_length": 26.875, "epoch": 6.9888309754281455, "grad_norm": 1.0260712899067936, "kl": 0.08111572265625, "learning_rate": 2.1131255297572936e-07, "loss": 0.012934553436934948, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3516, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.375, "completions/mean_length": 77.77083587646484, "completions/min_length": 28.0, "epoch": 6.990816579796475, "grad_norm": 0.008097382640221272, "kl": 0.0845947265625, "learning_rate": 2.1105502410084675e-07, "loss": 8.456048090010881e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3517, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 71.83333492279053, "completions/min_length": 28.0, "epoch": 6.992802184164805, "grad_norm": 0.00490636978181566, "kl": 0.09381103515625, "learning_rate": 2.1079761026113056e-07, "loss": 9.380362462252378e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3518, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.875, "completions/mean_length": 66.42708683013916, "completions/min_length": 23.125, "epoch": 6.994787788533134, "grad_norm": 0.005707926258265224, "kl": 0.070587158203125, "learning_rate": 2.1054031155906315e-07, "loss": 7.06023711245507e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3519, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 61.427085876464844, "completions/min_length": 21.625, "epoch": 6.996773392901464, "grad_norm": 0.003393516415305577, "kl": 0.07763671875, "learning_rate": 2.1028312809708037e-07, "loss": 7.768211071379483e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3520, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 60.14583444595337, "completions/min_length": 22.75, "epoch": 6.998758997269794, "grad_norm": 0.003576802054981375, "kl": 0.0765380859375, "learning_rate": 2.1002605997757238e-07, "loss": 7.656447996851057e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3521, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 77.39583683013916, "completions/min_length": 24.375, "epoch": 7.00198560436833, "grad_norm": 0.0031969466456915722, "kl": 0.069580078125, "learning_rate": 2.0976910730288354e-07, "loss": 6.961668987059966e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3522, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 66.08333492279053, "completions/min_length": 29.0, "epoch": 7.003971208736659, "grad_norm": 0.0037654768409044895, "kl": 0.09014892578125, "learning_rate": 2.0951227017531253e-07, "loss": 9.014253737404943e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3523, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.125, "completions/mean_length": 64.62500286102295, "completions/min_length": 26.75, "epoch": 7.005956813104989, "grad_norm": 0.003520291615816827, "kl": 0.0677490234375, "learning_rate": 2.0925554869711127e-07, "loss": 6.771342305000871e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3524, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.875, "completions/mean_length": 65.34375095367432, "completions/min_length": 24.125, "epoch": 7.007942417473318, "grad_norm": 0.003084352715649954, "kl": 0.07745361328125, "learning_rate": 2.089989429704863e-07, "loss": 7.758366700727493e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3525, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.0, "completions/mean_length": 64.02083587646484, "completions/min_length": 24.75, "epoch": 7.009928021841648, "grad_norm": 0.003513686127529461, "kl": 0.070587158203125, "learning_rate": 2.0874245309759768e-07, "loss": 7.050807471387088e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3526, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 65.85416841506958, "completions/min_length": 23.0, "epoch": 7.011913626209978, "grad_norm": 0.7406721199127559, "kl": 0.18603515625, "learning_rate": 2.0848607918055976e-07, "loss": 0.018216852098703384, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3527, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 69.85416793823242, "completions/min_length": 23.875, "epoch": 7.013899230578307, "grad_norm": 0.0032009757200067088, "kl": 0.075531005859375, "learning_rate": 2.0822982132144034e-07, "loss": 7.552739407401532e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3528, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.0, "completions/mean_length": 68.11458683013916, "completions/min_length": 26.0, "epoch": 7.015884834946637, "grad_norm": 0.004476144446104526, "kl": 0.089202880859375, "learning_rate": 2.079736796222607e-07, "loss": 8.9102795755025e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3529, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 65.07291841506958, "completions/min_length": 22.875, "epoch": 7.017870439314967, "grad_norm": 0.002994012045499423, "kl": 0.0665283203125, "learning_rate": 2.0771765418499715e-07, "loss": 6.65126062813215e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3530, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 65.40625143051147, "completions/min_length": 20.375, "epoch": 7.019856043683296, "grad_norm": 0.00294143413171213, "kl": 0.0889892578125, "learning_rate": 2.0746174511157844e-07, "loss": 8.884790440788493e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3531, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.5, "completions/mean_length": 55.97916793823242, "completions/min_length": 21.5, "epoch": 7.021841648051626, "grad_norm": 0.005378833640088181, "kl": 0.085113525390625, "learning_rate": 2.072059525038873e-07, "loss": 8.504880679538473e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3532, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 63.17708492279053, "completions/min_length": 19.125, "epoch": 7.023827252419955, "grad_norm": 0.003568418971912661, "kl": 0.086029052734375, "learning_rate": 2.0695027646376063e-07, "loss": 8.603769674664363e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3533, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.875, "completions/mean_length": 71.22916841506958, "completions/min_length": 17.375, "epoch": 7.025812856788285, "grad_norm": 0.0030669190865770107, "kl": 0.0733642578125, "learning_rate": 2.0669471709298804e-07, "loss": 7.332459790632129e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3534, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.375, "completions/mean_length": 71.43750286102295, "completions/min_length": 26.0, "epoch": 7.027798461156615, "grad_norm": 0.0028867647510709827, "kl": 0.08544921875, "learning_rate": 2.064392744933135e-07, "loss": 8.547432662453502e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3535, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.125, "completions/mean_length": 66.89583587646484, "completions/min_length": 28.0, "epoch": 7.029784065524944, "grad_norm": 0.0050638388537128185, "kl": 0.08343505859375, "learning_rate": 2.061839487664342e-07, "loss": 8.334654557984322e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3536, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.5, "completions/mean_length": 71.32291841506958, "completions/min_length": 26.5, "epoch": 7.031769669893274, "grad_norm": 0.012672089032473681, "kl": 0.11083984375, "learning_rate": 2.0592874001400056e-07, "loss": 0.00011071137123508379, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3537, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.75, "completions/mean_length": 73.90625143051147, "completions/min_length": 29.5, "epoch": 7.033755274261603, "grad_norm": 0.0031235418965348057, "kl": 0.0833740234375, "learning_rate": 2.0567364833761686e-07, "loss": 8.346197864739224e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3538, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.75, "completions/mean_length": 71.37500238418579, "completions/min_length": 21.75, "epoch": 7.035740878629933, "grad_norm": 0.0036628494077136425, "kl": 0.068145751953125, "learning_rate": 2.0541867383884042e-07, "loss": 6.81786477798596e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3539, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 63.437500953674316, "completions/min_length": 22.75, "epoch": 7.037726482998263, "grad_norm": 3.148342490849975, "kl": 0.08062744140625, "learning_rate": 2.0516381661918192e-07, "loss": 0.0010254066437482834, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3540, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.875, "completions/mean_length": 70.2916693687439, "completions/min_length": 27.125, "epoch": 7.039712087366592, "grad_norm": 0.0032683331730226613, "kl": 0.07977294921875, "learning_rate": 2.049090767801057e-07, "loss": 7.983069372130558e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3541, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.375, "completions/mean_length": 66.21875095367432, "completions/min_length": 26.75, "epoch": 7.041697691734922, "grad_norm": 0.0038616391656452896, "kl": 0.08355712890625, "learning_rate": 2.0465445442302885e-07, "loss": 8.349579002242535e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3542, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.375, "completions/mean_length": 77.90625190734863, "completions/min_length": 28.25, "epoch": 7.043683296103252, "grad_norm": 0.003226334948950208, "kl": 0.078582763671875, "learning_rate": 2.0439994964932217e-07, "loss": 7.855678995838389e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3543, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.125, "completions/mean_length": 64.80208539962769, "completions/min_length": 26.0, "epoch": 7.045668900471581, "grad_norm": 0.003889540449057347, "kl": 0.0782470703125, "learning_rate": 2.0414556256030952e-07, "loss": 7.817507139407098e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3544, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 62.83333444595337, "completions/min_length": 22.25, "epoch": 7.047654504839911, "grad_norm": 0.004836073333392053, "kl": 0.08123779296875, "learning_rate": 2.0389129325726756e-07, "loss": 8.123174484353513e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3545, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 71.12500190734863, "completions/min_length": 28.5, "epoch": 7.04964010920824, "grad_norm": 1.9903818126039807, "kl": 0.0830078125, "learning_rate": 2.0363714184142667e-07, "loss": -0.0016092360019683838, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3546, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.25, "completions/mean_length": 73.51041984558105, "completions/min_length": 23.5, "epoch": 7.05162571357657, "grad_norm": 1.0141092596339412, "kl": 0.113616943359375, "learning_rate": 2.0338310841396976e-07, "loss": -0.005444263573735952, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3547, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.875, "completions/mean_length": 61.833335399627686, "completions/min_length": 24.375, "epoch": 7.0536113179449, "grad_norm": 0.003813418030064665, "kl": 0.072845458984375, "learning_rate": 2.0312919307603283e-07, "loss": 7.275915413629264e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3548, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/mean_length": 66.67708492279053, "completions/min_length": 31.0, "epoch": 7.055596922313229, "grad_norm": 0.007228511181624242, "kl": 0.103515625, "learning_rate": 2.0287539592870519e-07, "loss": 0.00010354012192692608, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3549, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 64.39583492279053, "completions/min_length": 21.375, "epoch": 7.057582526681559, "grad_norm": 0.0037024744323373437, "kl": 0.0670166015625, "learning_rate": 2.0262171707302894e-07, "loss": 6.693199975416064e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3550, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.875, "completions/mean_length": 71.54166793823242, "completions/min_length": 27.875, "epoch": 7.059568131049888, "grad_norm": 0.0040026235516167534, "kl": 0.076690673828125, "learning_rate": 2.0236815660999884e-07, "loss": 7.67509700381197e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3551, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 63.08333492279053, "completions/min_length": 23.75, "epoch": 7.061553735418218, "grad_norm": 0.0072198913357872175, "kl": 0.08221435546875, "learning_rate": 2.0211471464056306e-07, "loss": 8.226436330005527e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3552, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 62.739585876464844, "completions/min_length": 21.125, "epoch": 7.063539339786548, "grad_norm": 0.0069164108224131085, "kl": 0.074493408203125, "learning_rate": 2.018613912656219e-07, "loss": 7.459659536834806e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3553, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.875, "completions/mean_length": 75.52083539962769, "completions/min_length": 27.25, "epoch": 7.065524944154877, "grad_norm": 0.005123172358202033, "kl": 0.067352294921875, "learning_rate": 2.0160818658602912e-07, "loss": 6.738472438883036e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3554, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.5, "completions/mean_length": 65.06250047683716, "completions/min_length": 21.75, "epoch": 7.067510548523207, "grad_norm": 0.006288664909237334, "kl": 0.063751220703125, "learning_rate": 2.013551007025906e-07, "loss": 6.374895747285336e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3555, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 66.52083444595337, "completions/min_length": 22.625, "epoch": 7.069496152891537, "grad_norm": 0.007216577441415266, "kl": 0.076904296875, "learning_rate": 2.0110213371606538e-07, "loss": 7.687686593271792e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3556, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 62.052085399627686, "completions/min_length": 20.875, "epoch": 7.071481757259866, "grad_norm": 0.007705580361005364, "kl": 0.077667236328125, "learning_rate": 2.008492857271652e-07, "loss": 7.767813804093748e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3557, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 64.31250238418579, "completions/min_length": 26.75, "epoch": 7.073467361628196, "grad_norm": 1.333871949930698, "kl": 0.09466552734375, "learning_rate": 2.0059655683655397e-07, "loss": 0.008863439783453941, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3558, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 62.833335399627686, "completions/min_length": 22.75, "epoch": 7.075452965996525, "grad_norm": 0.006352459488767535, "kl": 0.0841064453125, "learning_rate": 2.003439471448487e-07, "loss": 8.404585241805762e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3559, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 57.07291793823242, "completions/min_length": 18.125, "epoch": 7.077438570364855, "grad_norm": 0.005142236595226734, "kl": 0.099578857421875, "learning_rate": 2.0009145675261858e-07, "loss": 9.96140151983127e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3560, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 67.18750238418579, "completions/min_length": 23.125, "epoch": 7.079424174733185, "grad_norm": 0.005396894378942624, "kl": 0.0826416015625, "learning_rate": 1.9983908576038527e-07, "loss": 8.264069765573367e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3561, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 61.04166793823242, "completions/min_length": 20.0, "epoch": 7.081409779101514, "grad_norm": 0.0057154503947840244, "kl": 0.069976806640625, "learning_rate": 1.9958683426862332e-07, "loss": 6.998908065725118e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3562, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 71.26041841506958, "completions/min_length": 22.125, "epoch": 7.083395383469844, "grad_norm": 0.006523748080471988, "kl": 0.0814208984375, "learning_rate": 1.9933470237775923e-07, "loss": 8.142885053530335e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3563, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 70.40625190734863, "completions/min_length": 24.125, "epoch": 7.085380987838173, "grad_norm": 0.004770751718460034, "kl": 0.07879638671875, "learning_rate": 1.9908269018817215e-07, "loss": 7.872957939980552e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3564, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 62.19791793823242, "completions/min_length": 23.625, "epoch": 7.087366592206503, "grad_norm": 0.005293463431732053, "kl": 0.0784912109375, "learning_rate": 1.9883079780019374e-07, "loss": 7.849084067856893e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3565, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.0, "completions/mean_length": 73.81250333786011, "completions/min_length": 22.0, "epoch": 7.089352196574833, "grad_norm": 0.0043115206827554165, "kl": 0.09637451171875, "learning_rate": 1.9857902531410735e-07, "loss": 9.627666440792382e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3566, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 67.93750238418579, "completions/min_length": 29.25, "epoch": 7.091337800943162, "grad_norm": 0.005312950164005668, "kl": 0.07513427734375, "learning_rate": 1.9832737283014938e-07, "loss": 7.514693425036967e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3567, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 61.989585399627686, "completions/min_length": 21.875, "epoch": 7.093323405311492, "grad_norm": 0.003709909475130948, "kl": 0.07904052734375, "learning_rate": 1.9807584044850784e-07, "loss": 7.900722266640514e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3568, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.125, "completions/mean_length": 70.00000190734863, "completions/min_length": 21.375, "epoch": 7.095309009679822, "grad_norm": 0.004269799730926922, "kl": 0.0792236328125, "learning_rate": 1.97824428269323e-07, "loss": 7.928709237603471e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3569, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 69.08333444595337, "completions/min_length": 24.0, "epoch": 7.097294614048151, "grad_norm": 0.025801014928430586, "kl": 0.102874755859375, "learning_rate": 1.9757313639268763e-07, "loss": 0.00010284609015798196, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3570, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 60.36458444595337, "completions/min_length": 17.75, "epoch": 7.099280218416481, "grad_norm": 0.006109804196207869, "kl": 0.06494140625, "learning_rate": 1.973219649186465e-07, "loss": 6.498794391518459e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3571, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.25, "completions/mean_length": 54.03125190734863, "completions/min_length": 21.5, "epoch": 7.10126582278481, "grad_norm": 0.003118831974520875, "kl": 0.069549560546875, "learning_rate": 1.97070913947196e-07, "loss": 6.952178227948025e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3572, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 84.84375333786011, "completions/min_length": 25.25, "epoch": 7.10325142715314, "grad_norm": 0.004979906408050475, "kl": 0.11053466796875, "learning_rate": 1.9681998357828522e-07, "loss": 0.00011044459824915975, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3573, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.25, "completions/mean_length": 66.333336353302, "completions/min_length": 30.75, "epoch": 7.10523703152147, "grad_norm": 0.0032481137081087972, "kl": 0.084136962890625, "learning_rate": 1.965691739118146e-07, "loss": 8.406926644966006e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3574, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 68.16666889190674, "completions/min_length": 23.125, "epoch": 7.107222635889799, "grad_norm": 0.004992303066333042, "kl": 0.07928466796875, "learning_rate": 1.963184850476372e-07, "loss": 7.927630213089287e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3575, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 62.43750190734863, "completions/min_length": 24.625, "epoch": 7.109208240258129, "grad_norm": 0.013000234501473869, "kl": 0.0938720703125, "learning_rate": 1.9606791708555736e-07, "loss": 9.381659037899226e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3576, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 61.583335399627686, "completions/min_length": 28.875, "epoch": 7.111193844626458, "grad_norm": 0.004466753708183528, "kl": 0.075164794921875, "learning_rate": 1.9581747012533117e-07, "loss": 7.516539335483685e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3577, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.375, "completions/mean_length": 73.95833587646484, "completions/min_length": 28.5, "epoch": 7.113179448994788, "grad_norm": 2.1816427754376555, "kl": 0.076507568359375, "learning_rate": 1.9556714426666772e-07, "loss": -0.014472301118075848, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3578, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 71.28125095367432, "completions/min_length": 24.25, "epoch": 7.115165053363118, "grad_norm": 0.004389491435964849, "kl": 0.091796875, "learning_rate": 1.953169396092267e-07, "loss": 9.177574975183234e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3579, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.5, "completions/mean_length": 69.53125286102295, "completions/min_length": 25.625, "epoch": 7.117150657731447, "grad_norm": 1.2283282246893905, "kl": 0.080108642578125, "learning_rate": 1.9506685625261965e-07, "loss": -0.01618211343884468, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166669771075, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3580, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.75, "completions/mean_length": 79.34375238418579, "completions/min_length": 30.0, "epoch": 7.119136262099777, "grad_norm": 0.0032686577537887223, "kl": 0.06640625, "learning_rate": 1.9481689429641058e-07, "loss": 6.644184031756595e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3581, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.625, "completions/mean_length": 75.42708587646484, "completions/min_length": 22.0, "epoch": 7.121121866468107, "grad_norm": 0.0036956789255020617, "kl": 0.09368896484375, "learning_rate": 1.9456705384011423e-07, "loss": 9.37871154746972e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3582, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 83.45833492279053, "completions/min_length": 30.5, "epoch": 7.123107470836436, "grad_norm": 0.003298404515633062, "kl": 0.081878662109375, "learning_rate": 1.943173349831978e-07, "loss": 8.184403122868389e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3583, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 72.58333683013916, "completions/min_length": 27.625, "epoch": 7.125093075204766, "grad_norm": 0.0029432985497142706, "kl": 0.08953857421875, "learning_rate": 1.940677378250794e-07, "loss": 8.96383571671322e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3584, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.75, "completions/mean_length": 63.375001430511475, "completions/min_length": 22.25, "epoch": 7.127078679573095, "grad_norm": 0.056145638678491885, "kl": 0.109130859375, "learning_rate": 1.9381826246512916e-07, "loss": 0.00010919794294750318, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3585, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.875, "completions/mean_length": 66.72916841506958, "completions/min_length": 22.875, "epoch": 7.129064283941425, "grad_norm": 0.00377531799194084, "kl": 0.081024169921875, "learning_rate": 1.935689090026687e-07, "loss": 8.096140663838014e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3586, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 69.07291889190674, "completions/min_length": 30.75, "epoch": 7.131049888309755, "grad_norm": 0.00352380794184653, "kl": 0.085845947265625, "learning_rate": 1.9331967753697077e-07, "loss": 8.580145367886871e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3587, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.625, "completions/mean_length": 64.92708492279053, "completions/min_length": 24.0, "epoch": 7.133035492678084, "grad_norm": 0.0032822665562121005, "kl": 0.08056640625, "learning_rate": 1.9307056816725954e-07, "loss": 8.053382771322504e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3588, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.375, "completions/mean_length": 78.81250286102295, "completions/min_length": 29.75, "epoch": 7.135021097046414, "grad_norm": 0.004317907003769507, "kl": 0.069122314453125, "learning_rate": 1.9282158099271117e-07, "loss": 6.904612382641062e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3589, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 60.94791889190674, "completions/min_length": 26.375, "epoch": 7.137006701414743, "grad_norm": 0.005277435573608788, "kl": 0.07647705078125, "learning_rate": 1.9257271611245245e-07, "loss": 7.652993372175843e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3590, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 63.666667461395264, "completions/min_length": 25.125, "epoch": 7.138992305783073, "grad_norm": 0.006943862837693607, "kl": 0.09051513671875, "learning_rate": 1.9232397362556192e-07, "loss": 9.048033825820312e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3591, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 68.520836353302, "completions/min_length": 23.25, "epoch": 7.140977910151403, "grad_norm": 0.9024465011530635, "kl": 0.06005859375, "learning_rate": 1.9207535363106947e-07, "loss": 0.012049295008182526, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3592, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 63.18750238418579, "completions/min_length": 21.25, "epoch": 7.142963514519732, "grad_norm": 0.0035971724577689664, "kl": 0.069122314453125, "learning_rate": 1.9182685622795565e-07, "loss": 6.906967610120773e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3593, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.375, "completions/mean_length": 71.89583539962769, "completions/min_length": 24.875, "epoch": 7.144949118888062, "grad_norm": 0.007377335249580746, "kl": 0.08782958984375, "learning_rate": 1.91578481515153e-07, "loss": 8.78995269886218e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3594, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.5, "completions/mean_length": 78.32291984558105, "completions/min_length": 25.5, "epoch": 7.146934723256392, "grad_norm": 0.0032559502457722178, "kl": 0.08148193359375, "learning_rate": 1.9133022959154443e-07, "loss": 8.146220352500677e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3595, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 62.29166841506958, "completions/min_length": 17.5, "epoch": 7.148920327624721, "grad_norm": 1.029303708454347, "kl": 0.07916259765625, "learning_rate": 1.9108210055596464e-07, "loss": -0.021941017359495163, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.31764985248446465, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3596, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 69.50000143051147, "completions/min_length": 24.375, "epoch": 7.150905931993051, "grad_norm": 0.0036447591427848907, "kl": 0.0963134765625, "learning_rate": 1.9083409450719896e-07, "loss": 9.627988038118929e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3597, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.625, "completions/mean_length": 74.17708539962769, "completions/min_length": 30.375, "epoch": 7.15289153636138, "grad_norm": 0.005512578834195529, "kl": 0.0830078125, "learning_rate": 1.9058621154398352e-07, "loss": 8.303741924464703e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3598, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.875, "completions/mean_length": 74.88541841506958, "completions/min_length": 30.0, "epoch": 7.15487714072971, "grad_norm": 0.004643165784310934, "kl": 0.07440185546875, "learning_rate": 1.9033845176500656e-07, "loss": 7.443320646416396e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3599, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 68.270836353302, "completions/min_length": 26.5, "epoch": 7.1568627450980395, "grad_norm": 0.003744574858599434, "kl": 0.08074951171875, "learning_rate": 1.900908152689062e-07, "loss": 8.071074262261391e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3600, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 70.28125238418579, "completions/min_length": 29.625, "epoch": 7.158848349466369, "grad_norm": 0.0050952511347941805, "kl": 0.0889892578125, "learning_rate": 1.898433021542716e-07, "loss": 8.900444663595408e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3601, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.25, "completions/mean_length": 65.11458492279053, "completions/min_length": 19.25, "epoch": 7.160833953834699, "grad_norm": 2.6681930964879954, "kl": 0.089599609375, "learning_rate": 1.895959125196433e-07, "loss": -0.007380373775959015, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.06650752201676369, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.18335824459791183, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3602, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 67.31250190734863, "completions/min_length": 23.625, "epoch": 7.162819558203028, "grad_norm": 1.160380406495555, "kl": 0.07415771484375, "learning_rate": 1.8934864646351223e-07, "loss": 0.013130133971571922, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3603, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.75, "completions/mean_length": 63.42708444595337, "completions/min_length": 24.25, "epoch": 7.164805162571358, "grad_norm": 0.6578778375472332, "kl": 0.067138671875, "learning_rate": 1.891015040843203e-07, "loss": 0.0071890633553266525, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3604, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 61.54166841506958, "completions/min_length": 19.0, "epoch": 7.1667907669396875, "grad_norm": 0.005028032023527505, "kl": 0.0787353515625, "learning_rate": 1.8885448548046045e-07, "loss": 7.874410948716104e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3605, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 70.88541793823242, "completions/min_length": 23.875, "epoch": 7.168776371308017, "grad_norm": 0.005638272695269193, "kl": 0.073272705078125, "learning_rate": 1.8860759075027567e-07, "loss": 7.32544285710901e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3606, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.25, "completions/mean_length": 78.85416841506958, "completions/min_length": 22.625, "epoch": 7.1707619756763465, "grad_norm": 0.0032251042552413293, "kl": 0.082733154296875, "learning_rate": 1.8836081999206032e-07, "loss": 8.284873911179602e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3607, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.875, "completions/mean_length": 66.76041793823242, "completions/min_length": 23.5, "epoch": 7.1727475800446765, "grad_norm": 0.04925859862028904, "kl": 0.13531494140625, "learning_rate": 1.8811417330405905e-07, "loss": 0.00013531502918340266, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3608, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 59.87500190734863, "completions/min_length": 20.375, "epoch": 7.174733184413006, "grad_norm": 1.1658615432916306, "kl": 0.093475341796875, "learning_rate": 1.8786765078446686e-07, "loss": 0.004339361097663641, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3609, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 69.66666841506958, "completions/min_length": 21.0, "epoch": 7.1767187887813355, "grad_norm": 0.0032043902690534155, "kl": 0.079010009765625, "learning_rate": 1.8762125253143014e-07, "loss": 7.893408474046737e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3610, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 73.98958539962769, "completions/min_length": 29.75, "epoch": 7.178704393149665, "grad_norm": 0.003570326702981123, "kl": 0.074798583984375, "learning_rate": 1.8737497864304486e-07, "loss": 7.483335502911359e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3611, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.75, "completions/mean_length": 66.38541984558105, "completions/min_length": 24.5, "epoch": 7.1806899975179945, "grad_norm": 0.003849065498481363, "kl": 0.075408935546875, "learning_rate": 1.8712882921735807e-07, "loss": 7.548542635049671e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3612, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.0, "completions/mean_length": 74.04166889190674, "completions/min_length": 23.375, "epoch": 7.1826756018863245, "grad_norm": 0.0032969798299527374, "kl": 0.09912109375, "learning_rate": 1.8688280435236732e-07, "loss": 9.90181797533296e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3613, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.625, "completions/mean_length": 71.05208492279053, "completions/min_length": 24.625, "epoch": 7.1846612062546535, "grad_norm": 3.1178019987571712, "kl": 0.07476806640625, "learning_rate": 1.8663690414602e-07, "loss": 0.006656696554273367, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3614, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.75, "completions/mean_length": 73.25000286102295, "completions/min_length": 22.5, "epoch": 7.1866468106229835, "grad_norm": 1.1595501212157437, "kl": 0.07952880859375, "learning_rate": 1.8639112869621466e-07, "loss": -0.0056605227291584015, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3615, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.625, "completions/mean_length": 59.34375286102295, "completions/min_length": 19.875, "epoch": 7.188632414991313, "grad_norm": 0.004380555733444782, "kl": 0.05828857421875, "learning_rate": 1.8614547810079945e-07, "loss": 5.830806912854314e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3616, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.75, "completions/mean_length": 65.88541889190674, "completions/min_length": 32.25, "epoch": 7.1906180193596425, "grad_norm": 1.06476297550661, "kl": 0.08282470703125, "learning_rate": 1.85899952457573e-07, "loss": 0.011797348968684673, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3617, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.375, "completions/mean_length": 75.71875238418579, "completions/min_length": 28.75, "epoch": 7.1926036237279725, "grad_norm": 0.7337227952823936, "kl": 0.070068359375, "learning_rate": 1.8565455186428454e-07, "loss": 0.0020629758946597576, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3618, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 66.44791793823242, "completions/min_length": 20.125, "epoch": 7.1945892280963015, "grad_norm": 0.0056943358350315994, "kl": 0.058319091796875, "learning_rate": 1.8540927641863342e-07, "loss": 5.823990795761347e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3619, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 77.20833587646484, "completions/min_length": 27.75, "epoch": 7.1965748324646315, "grad_norm": 0.006876746035764185, "kl": 0.0811767578125, "learning_rate": 1.8516412621826865e-07, "loss": 8.115197852021083e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3620, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.875, "completions/mean_length": 69.58333587646484, "completions/min_length": 28.75, "epoch": 7.198560436832961, "grad_norm": 0.004658252605430419, "kl": 0.083953857421875, "learning_rate": 1.849191013607902e-07, "loss": 8.403870015172288e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3621, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 66.36458492279053, "completions/min_length": 18.25, "epoch": 7.2005460412012905, "grad_norm": 1.4665064388892504, "kl": 0.07342529296875, "learning_rate": 1.846742019437472e-07, "loss": -0.017064182087779045, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3622, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 64.22916793823242, "completions/min_length": 17.625, "epoch": 7.2025316455696204, "grad_norm": 0.005372467996328005, "kl": 0.09332275390625, "learning_rate": 1.844294280646399e-07, "loss": 9.324972052127123e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3623, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.0, "completions/mean_length": 73.14583683013916, "completions/min_length": 27.75, "epoch": 7.2045172499379495, "grad_norm": 0.003615080561329921, "kl": 0.08953857421875, "learning_rate": 1.8418477982091767e-07, "loss": 8.955893281381577e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3624, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.875, "completions/mean_length": 67.70833492279053, "completions/min_length": 25.25, "epoch": 7.2065028543062795, "grad_norm": 0.0034238844445201888, "kl": 0.089599609375, "learning_rate": 1.8394025730997986e-07, "loss": 8.977434481494129e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3625, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.75, "completions/mean_length": 69.01041841506958, "completions/min_length": 23.875, "epoch": 7.208488458674609, "grad_norm": 0.0032246664842049536, "kl": 0.084564208984375, "learning_rate": 1.8369586062917692e-07, "loss": 8.454352791886777e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3626, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 72.35416746139526, "completions/min_length": 21.125, "epoch": 7.2104740630429385, "grad_norm": 0.0035702864863273835, "kl": 0.077392578125, "learning_rate": 1.8345158987580789e-07, "loss": 7.734754763077945e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3627, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 66.73958444595337, "completions/min_length": 24.375, "epoch": 7.212459667411268, "grad_norm": 0.004527331339223867, "kl": 0.07568359375, "learning_rate": 1.832074451471221e-07, "loss": 7.57636662456207e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3628, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 72.11458492279053, "completions/min_length": 26.875, "epoch": 7.2144452717795975, "grad_norm": 0.003252213307206212, "kl": 0.072357177734375, "learning_rate": 1.8296342654031915e-07, "loss": 7.23116536391899e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3629, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.125, "completions/mean_length": 67.13541841506958, "completions/min_length": 22.375, "epoch": 7.216430876147927, "grad_norm": 0.003263218079245263, "kl": 0.068878173828125, "learning_rate": 1.827195341525476e-07, "loss": 6.885667971801013e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3630, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 66.19791889190674, "completions/min_length": 22.25, "epoch": 7.218416480516257, "grad_norm": 0.005888469859037103, "kl": 0.0772705078125, "learning_rate": 1.824757680809067e-07, "loss": 7.722133887000382e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3631, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 72.91666793823242, "completions/min_length": 24.875, "epoch": 7.2204020848845865, "grad_norm": 0.0033011679554252817, "kl": 0.066375732421875, "learning_rate": 1.8223212842244445e-07, "loss": 6.642604421358556e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3632, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 68.00000238418579, "completions/min_length": 24.5, "epoch": 7.222387689252916, "grad_norm": 0.0030918208023705614, "kl": 0.064300537109375, "learning_rate": 1.8198861527415927e-07, "loss": 6.431159272324294e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3633, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.375, "completions/mean_length": 65.44791841506958, "completions/min_length": 17.375, "epoch": 7.224373293621246, "grad_norm": 0.003680694392114215, "kl": 0.062042236328125, "learning_rate": 1.8174522873299907e-07, "loss": 6.202972144819796e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3634, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.875, "completions/mean_length": 71.65625238418579, "completions/min_length": 21.0, "epoch": 7.226358897989575, "grad_norm": 0.0038020674881131256, "kl": 0.06915283203125, "learning_rate": 1.815019688958609e-07, "loss": 6.918206781847402e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3635, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 67.72916841506958, "completions/min_length": 20.5, "epoch": 7.228344502357905, "grad_norm": 1.85318553184896, "kl": 0.0838623046875, "learning_rate": 1.8125883585959207e-07, "loss": 0.0008341341163031757, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3636, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.625, "completions/mean_length": 58.85416841506958, "completions/min_length": 21.375, "epoch": 7.230330106726234, "grad_norm": 1.1640243228901448, "kl": 0.09576416015625, "learning_rate": 1.8101582972098883e-07, "loss": -0.0021224557422101498, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3637, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.375, "completions/mean_length": 59.97916841506958, "completions/min_length": 25.875, "epoch": 7.232315711094564, "grad_norm": 0.008255185801152087, "kl": 0.066986083984375, "learning_rate": 1.8077295057679694e-07, "loss": 6.694767944281921e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3638, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.875, "completions/mean_length": 63.13541889190674, "completions/min_length": 18.0, "epoch": 7.234301315462894, "grad_norm": 1.3654172547692, "kl": 0.107025146484375, "learning_rate": 1.8053019852371194e-07, "loss": 0.01162738911807537, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3639, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 67.54166889190674, "completions/min_length": 21.0, "epoch": 7.236286919831223, "grad_norm": 0.0032878012538459568, "kl": 0.0692138671875, "learning_rate": 1.8028757365837882e-07, "loss": 6.929754454176873e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3640, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 57.57291889190674, "completions/min_length": 20.125, "epoch": 7.238272524199553, "grad_norm": 0.0035414413192917907, "kl": 0.05938720703125, "learning_rate": 1.800450760773914e-07, "loss": 5.931744090048596e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3641, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 71.13541984558105, "completions/min_length": 30.75, "epoch": 7.240258128567882, "grad_norm": 0.00784555685785729, "kl": 0.0799560546875, "learning_rate": 1.7980270587729336e-07, "loss": 7.995144551387057e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3642, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 68.97916889190674, "completions/min_length": 22.25, "epoch": 7.242243732936212, "grad_norm": 0.003570096861221876, "kl": 0.075531005859375, "learning_rate": 1.7956046315457723e-07, "loss": 7.559488585684448e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3643, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 71.63541841506958, "completions/min_length": 23.375, "epoch": 7.244229337304542, "grad_norm": 0.005837440928017228, "kl": 0.074310302734375, "learning_rate": 1.793183480056853e-07, "loss": 7.432702841470018e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3644, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.375, "completions/mean_length": 73.31250238418579, "completions/min_length": 25.875, "epoch": 7.246214941672871, "grad_norm": 0.004100565317778416, "kl": 0.069091796875, "learning_rate": 1.7907636052700864e-07, "loss": 6.91145978635177e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3645, "train_speed(iter/s)": 0.022664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.625, "completions/mean_length": 71.68750238418579, "completions/min_length": 22.625, "epoch": 7.248200546041201, "grad_norm": 0.004179961518369871, "kl": 0.06793212890625, "learning_rate": 1.7883450081488732e-07, "loss": 6.79224613122642e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3646, "train_speed(iter/s)": 0.022664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.125, "completions/mean_length": 60.000001430511475, "completions/min_length": 20.375, "epoch": 7.250186150409531, "grad_norm": 0.05588890825765668, "kl": 0.08642578125, "learning_rate": 1.785927689656115e-07, "loss": 8.647267532069236e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3647, "train_speed(iter/s)": 0.022664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.625, "completions/mean_length": 67.72916889190674, "completions/min_length": 24.5, "epoch": 7.25217175477786, "grad_norm": 0.0038290025056919638, "kl": 0.085540771484375, "learning_rate": 1.783511650754194e-07, "loss": 8.555524982511997e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3648, "train_speed(iter/s)": 0.022664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.5, "completions/mean_length": 75.93750286102295, "completions/min_length": 17.375, "epoch": 7.25415735914619, "grad_norm": 0.0034253709040634895, "kl": 0.064971923828125, "learning_rate": 1.7810968924049863e-07, "loss": 6.494233821285889e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3649, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.375, "completions/mean_length": 65.02083539962769, "completions/min_length": 21.125, "epoch": 7.256142963514519, "grad_norm": 0.004813522845239572, "kl": 0.071014404296875, "learning_rate": 1.778683415569861e-07, "loss": 7.104119868017733e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3650, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 66.94791889190674, "completions/min_length": 27.375, "epoch": 7.258128567882849, "grad_norm": 0.003657625826420808, "kl": 0.068511962890625, "learning_rate": 1.7762712212096726e-07, "loss": 6.845461757620797e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3651, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 58.781251430511475, "completions/min_length": 18.0, "epoch": 7.260114172251179, "grad_norm": 0.004929471879582024, "kl": 0.067047119140625, "learning_rate": 1.7738603102847693e-07, "loss": 6.699098594253883e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3652, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.375, "completions/mean_length": 66.70833492279053, "completions/min_length": 17.875, "epoch": 7.262099776619508, "grad_norm": 0.007014026328482691, "kl": 0.08203125, "learning_rate": 1.771450683754984e-07, "loss": 8.207526843762025e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3653, "train_speed(iter/s)": 0.022663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.25, "completions/mean_length": 70.25000286102295, "completions/min_length": 26.125, "epoch": 7.264085380987838, "grad_norm": 0.3831553643252573, "kl": 0.164306640625, "learning_rate": 1.7690423425796418e-07, "loss": 0.00016419807798229158, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3654, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 70.87500190734863, "completions/min_length": 25.625, "epoch": 7.266070985356167, "grad_norm": 0.003026175210895782, "kl": 0.0753173828125, "learning_rate": 1.766635287717556e-07, "loss": 7.538365025538951e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3655, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.125, "completions/mean_length": 67.94791841506958, "completions/min_length": 19.125, "epoch": 7.268056589724497, "grad_norm": 0.003917943083694712, "kl": 0.0838623046875, "learning_rate": 1.7642295201270258e-07, "loss": 8.375368634006009e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3656, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 67.97916793823242, "completions/min_length": 25.5, "epoch": 7.270042194092827, "grad_norm": 0.004391567091387477, "kl": 0.0802001953125, "learning_rate": 1.761825040765836e-07, "loss": 8.012625039555132e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3657, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.375, "completions/mean_length": 77.44791889190674, "completions/min_length": 23.125, "epoch": 7.272027798461156, "grad_norm": 0.010286213826059558, "kl": 0.07977294921875, "learning_rate": 1.759421850591265e-07, "loss": 7.975217886269093e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3658, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 65.34375190734863, "completions/min_length": 22.5, "epoch": 7.274013402829486, "grad_norm": 1.1376665233089656, "kl": 0.067047119140625, "learning_rate": 1.757019950560071e-07, "loss": 0.008707335218787193, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3659, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 70.75000286102295, "completions/min_length": 29.625, "epoch": 7.275999007197816, "grad_norm": 0.0034317891252947782, "kl": 0.06640625, "learning_rate": 1.7546193416285028e-07, "loss": 6.636339094256982e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3660, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 64.38541793823242, "completions/min_length": 21.0, "epoch": 7.277984611566145, "grad_norm": 0.003994041936600481, "kl": 0.079681396484375, "learning_rate": 1.7522200247522962e-07, "loss": 7.963718235259876e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3661, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.375, "completions/mean_length": 73.69791793823242, "completions/min_length": 21.625, "epoch": 7.279970215934475, "grad_norm": 0.004547813220369437, "kl": 0.068115234375, "learning_rate": 1.749822000886667e-07, "loss": 6.820970156695694e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3662, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 65.25000238418579, "completions/min_length": 18.5, "epoch": 7.281955820302804, "grad_norm": 0.003709043515648076, "kl": 0.071563720703125, "learning_rate": 1.747425270986323e-07, "loss": 7.153738988563418e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3663, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.5, "completions/mean_length": 73.79166889190674, "completions/min_length": 24.75, "epoch": 7.283941424671134, "grad_norm": 0.007759282471757907, "kl": 0.093658447265625, "learning_rate": 1.7450298360054522e-07, "loss": 9.368751489091665e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3664, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 70.60416841506958, "completions/min_length": 22.5, "epoch": 7.285927029039464, "grad_norm": 0.004104329198069711, "kl": 0.0682373046875, "learning_rate": 1.7426356968977263e-07, "loss": 6.822431168984622e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3665, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 64.07291841506958, "completions/min_length": 20.375, "epoch": 7.287912633407793, "grad_norm": 0.006823326499259972, "kl": 0.08245849609375, "learning_rate": 1.7402428546163073e-07, "loss": 8.253773557953537e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3666, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.125, "completions/mean_length": 61.26041841506958, "completions/min_length": 16.875, "epoch": 7.289898237776123, "grad_norm": 0.0038679289246737803, "kl": 0.062408447265625, "learning_rate": 1.7378513101138327e-07, "loss": 6.239158392418176e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3667, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 60.36458444595337, "completions/min_length": 15.625, "epoch": 7.291883842144452, "grad_norm": 0.004286003661968799, "kl": 0.070831298828125, "learning_rate": 1.7354610643424295e-07, "loss": 7.08344450686127e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3668, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 66.72916793823242, "completions/min_length": 20.125, "epoch": 7.293869446512782, "grad_norm": 0.004885827089730887, "kl": 0.0672607421875, "learning_rate": 1.7330721182537072e-07, "loss": 6.717625365126878e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3669, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.0, "completions/mean_length": 75.37500238418579, "completions/min_length": 24.0, "epoch": 7.295855050881112, "grad_norm": 0.018599687850253455, "kl": 0.111846923828125, "learning_rate": 1.730684472798753e-07, "loss": 0.00011176713451277465, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3670, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 60.45833492279053, "completions/min_length": 22.0, "epoch": 7.297840655249441, "grad_norm": 0.003950746716331168, "kl": 0.073028564453125, "learning_rate": 1.7282981289281428e-07, "loss": 7.299071876332164e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3671, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 67.81250095367432, "completions/min_length": 18.375, "epoch": 7.299826259617771, "grad_norm": 0.00506439280920554, "kl": 0.06732177734375, "learning_rate": 1.7259130875919292e-07, "loss": 6.73301619826816e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3672, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 72.42708492279053, "completions/min_length": 22.25, "epoch": 7.301811863986101, "grad_norm": 0.0036809495612570146, "kl": 0.06866455078125, "learning_rate": 1.7235293497396463e-07, "loss": 6.871431833133101e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3673, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.875, "completions/mean_length": 70.23958539962769, "completions/min_length": 19.0, "epoch": 7.30379746835443, "grad_norm": 0.003067456714673495, "kl": 0.096466064453125, "learning_rate": 1.7211469163203142e-07, "loss": 9.651621803641319e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3674, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 60.66666889190674, "completions/min_length": 20.75, "epoch": 7.30578307272276, "grad_norm": 0.004575498811356828, "kl": 0.07110595703125, "learning_rate": 1.7187657882824285e-07, "loss": 7.108508725650609e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3675, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.5, "completions/mean_length": 67.41666889190674, "completions/min_length": 28.0, "epoch": 7.307768677091089, "grad_norm": 0.005539836408968533, "kl": 0.068817138671875, "learning_rate": 1.71638596657397e-07, "loss": 6.887719791848212e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3676, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.5, "completions/mean_length": 74.65625190734863, "completions/min_length": 27.25, "epoch": 7.309754281459419, "grad_norm": 0.21519201724514148, "kl": 0.322509765625, "learning_rate": 1.7140074521423942e-07, "loss": 0.0003220312064513564, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3677, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 66.895836353302, "completions/min_length": 25.375, "epoch": 7.311739885827749, "grad_norm": 0.004288689024352217, "kl": 0.066070556640625, "learning_rate": 1.7116302459346378e-07, "loss": 6.615303573198617e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3678, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.125, "completions/mean_length": 71.37500143051147, "completions/min_length": 28.625, "epoch": 7.313725490196078, "grad_norm": 0.002940887286837973, "kl": 0.07940673828125, "learning_rate": 1.7092543488971196e-07, "loss": 7.931856089271605e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3679, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.0, "completions/mean_length": 66.83333587646484, "completions/min_length": 23.0, "epoch": 7.315711094564408, "grad_norm": 0.00489847858574876, "kl": 0.07293701171875, "learning_rate": 1.7068797619757318e-07, "loss": 7.286130858119577e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3680, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 70.43750238418579, "completions/min_length": 25.0, "epoch": 7.317696698932737, "grad_norm": 0.7745185552654259, "kl": 0.096099853515625, "learning_rate": 1.704506486115851e-07, "loss": -0.0005042193224653602, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3681, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.0, "completions/mean_length": 74.50000238418579, "completions/min_length": 24.0, "epoch": 7.319682303301067, "grad_norm": 2.542847507760862, "kl": 0.386383056640625, "learning_rate": 1.7021345222623296e-07, "loss": 0.007674667984247208, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3682, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.75, "completions/mean_length": 75.01041841506958, "completions/min_length": 25.875, "epoch": 7.321667907669397, "grad_norm": 1.057283856004343, "kl": 0.073974609375, "learning_rate": 1.699763871359494e-07, "loss": -0.0048217372968792915, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3683, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.5, "completions/mean_length": 69.35416793823242, "completions/min_length": 22.75, "epoch": 7.323653512037726, "grad_norm": 0.004235557869708119, "kl": 0.08636474609375, "learning_rate": 1.697394534351154e-07, "loss": 8.627890929346904e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3684, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.125, "completions/mean_length": 79.67708539962769, "completions/min_length": 24.375, "epoch": 7.325639116406056, "grad_norm": 0.003045113123197206, "kl": 0.06195068359375, "learning_rate": 1.6950265121805925e-07, "loss": 6.197634502314031e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3685, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.75, "completions/mean_length": 67.5729193687439, "completions/min_length": 25.125, "epoch": 7.327624720774386, "grad_norm": 0.0036909093213039804, "kl": 0.0718994140625, "learning_rate": 1.6926598057905667e-07, "loss": 7.185227877926081e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3686, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 67.35416841506958, "completions/min_length": 20.0, "epoch": 7.329610325142715, "grad_norm": 2.269020247000388, "kl": 0.078460693359375, "learning_rate": 1.6902944161233156e-07, "loss": 0.008842803537845612, "memory(GiB)": 94.21, "reward": 1.6666666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.375051774084568, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3687, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 72.00000238418579, "completions/min_length": 19.0, "epoch": 7.331595929511045, "grad_norm": 0.00448452043183264, "kl": 0.070159912109375, "learning_rate": 1.6879303441205534e-07, "loss": 7.01812095940113e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3688, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.375, "completions/mean_length": 66.04166841506958, "completions/min_length": 26.25, "epoch": 7.333581533879374, "grad_norm": 0.005052600696867211, "kl": 0.073028564453125, "learning_rate": 1.685567590723463e-07, "loss": 7.30978135834448e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3689, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 68.95833539962769, "completions/min_length": 24.625, "epoch": 7.335567138247704, "grad_norm": 0.0033262025791730046, "kl": 0.0770263671875, "learning_rate": 1.6832061568727112e-07, "loss": 7.711358921369538e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3690, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 75.38541746139526, "completions/min_length": 26.625, "epoch": 7.337552742616034, "grad_norm": 0.004018029285821298, "kl": 0.063812255859375, "learning_rate": 1.6808460435084314e-07, "loss": 6.3762825448066e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3691, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 64.47916841506958, "completions/min_length": 26.25, "epoch": 7.339538346984363, "grad_norm": 0.937614273501965, "kl": 0.063751220703125, "learning_rate": 1.6784872515702397e-07, "loss": 0.0017140706768259406, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3692, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.75, "completions/mean_length": 80.00000333786011, "completions/min_length": 27.0, "epoch": 7.341523951352693, "grad_norm": 0.4813402208183356, "kl": 0.06744384765625, "learning_rate": 1.6761297819972188e-07, "loss": -0.02132234536111355, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3693, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 54.38541841506958, "completions/min_length": 18.125, "epoch": 7.343509555721022, "grad_norm": 0.004352288017120049, "kl": 0.080718994140625, "learning_rate": 1.6737736357279242e-07, "loss": 8.059882384259254e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3694, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.625, "completions/mean_length": 70.40625143051147, "completions/min_length": 18.625, "epoch": 7.345495160089352, "grad_norm": 0.004990534503485936, "kl": 0.071929931640625, "learning_rate": 1.671418813700395e-07, "loss": 7.192611519712955e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3695, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.0, "completions/mean_length": 70.18750238418579, "completions/min_length": 18.875, "epoch": 7.347480764457682, "grad_norm": 0.005006900854738232, "kl": 0.065093994140625, "learning_rate": 1.669065316852133e-07, "loss": 6.509164086310193e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3696, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.0, "completions/mean_length": 75.88541793823242, "completions/min_length": 31.25, "epoch": 7.349466368826011, "grad_norm": 0.00550760219413375, "kl": 0.084442138671875, "learning_rate": 1.666713146120114e-07, "loss": 8.445083949482068e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3697, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 68.04166841506958, "completions/min_length": 24.5, "epoch": 7.351451973194341, "grad_norm": 0.0034477512932322004, "kl": 0.07135009765625, "learning_rate": 1.6643623024407904e-07, "loss": 7.129686127882451e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3698, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 69.35416841506958, "completions/min_length": 27.125, "epoch": 7.353437577562671, "grad_norm": 0.00520627033337959, "kl": 0.069671630859375, "learning_rate": 1.6620127867500804e-07, "loss": 6.969399692025036e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3699, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.0, "completions/mean_length": 70.96875190734863, "completions/min_length": 17.375, "epoch": 7.355423181931, "grad_norm": 0.004511789607719959, "kl": 0.075042724609375, "learning_rate": 1.659664599983379e-07, "loss": 7.499841740354896e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3700, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.0, "completions/mean_length": 77.08333539962769, "completions/min_length": 17.875, "epoch": 7.35740878629933, "grad_norm": 0.0036551016086177044, "kl": 0.078765869140625, "learning_rate": 1.657317743075547e-07, "loss": 7.884902151999995e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3701, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 67.41666841506958, "completions/min_length": 20.5, "epoch": 7.359394390667659, "grad_norm": 0.0038184194672485614, "kl": 0.067535400390625, "learning_rate": 1.6549722169609194e-07, "loss": 6.749337626388296e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3702, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 77.6041693687439, "completions/min_length": 32.75, "epoch": 7.361379995035989, "grad_norm": 0.003729448272134716, "kl": 0.086639404296875, "learning_rate": 1.6526280225733018e-07, "loss": 8.666288340464234e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3703, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 64.04166841506958, "completions/min_length": 19.5, "epoch": 7.363365599404319, "grad_norm": 0.0035874148845389973, "kl": 0.086578369140625, "learning_rate": 1.6502851608459668e-07, "loss": 8.64756730152294e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3704, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.625, "completions/mean_length": 58.27083492279053, "completions/min_length": 17.875, "epoch": 7.365351203772648, "grad_norm": 0.003226149159225007, "kl": 0.06427001953125, "learning_rate": 1.647943632711656e-07, "loss": 6.430871144402772e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3705, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 62.11458444595337, "completions/min_length": 18.0, "epoch": 7.367336808140978, "grad_norm": 0.004341457548288576, "kl": 0.09503173828125, "learning_rate": 1.6456034391025846e-07, "loss": 9.500091982772574e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3706, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.125, "completions/mean_length": 66.35416841506958, "completions/min_length": 30.0, "epoch": 7.369322412509307, "grad_norm": 1.2009485138725684, "kl": 0.0792236328125, "learning_rate": 1.6432645809504308e-07, "loss": -0.011171567253768444, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7604166669771075, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3707, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 62.72916841506958, "completions/min_length": 16.75, "epoch": 7.371308016877637, "grad_norm": 0.005311722189602332, "kl": 0.089691162109375, "learning_rate": 1.6409270591863455e-07, "loss": 8.972895739134401e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3708, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 62.625001430511475, "completions/min_length": 22.5, "epoch": 7.373293621245967, "grad_norm": 1.6268324295172136, "kl": 0.074859619140625, "learning_rate": 1.6385908747409483e-07, "loss": -0.010995155200362206, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.06436607986688614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3709, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.375, "completions/mean_length": 60.9791693687439, "completions/min_length": 20.0, "epoch": 7.375279225614296, "grad_norm": 1.4815832501801613, "kl": 0.086883544921875, "learning_rate": 1.6362560285443194e-07, "loss": -0.00015392526984214783, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3710, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 68.41666841506958, "completions/min_length": 20.625, "epoch": 7.377264829982626, "grad_norm": 1.684582227167695, "kl": 0.078704833984375, "learning_rate": 1.6339225215260156e-07, "loss": 0.01579081267118454, "memory(GiB)": 94.21, "reward": 1.9062500149011612, "reward_std": 0.057790378108620644, "rewards/CineAccuracyORM/mean": 0.9062500074505806, "rewards/CineAccuracyORM/std": 0.15001969039440155, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3711, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 68.59375333786011, "completions/min_length": 26.625, "epoch": 7.379250434350956, "grad_norm": 0.003311680993112983, "kl": 0.079254150390625, "learning_rate": 1.6315903546150533e-07, "loss": 7.929686398711056e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3712, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.875, "completions/mean_length": 74.57291984558105, "completions/min_length": 25.5, "epoch": 7.381236038719285, "grad_norm": 0.006924024548109724, "kl": 0.0894775390625, "learning_rate": 1.6292595287399175e-07, "loss": 8.934707148000598e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3713, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 63.7604193687439, "completions/min_length": 17.25, "epoch": 7.383221643087615, "grad_norm": 0.003552167184073572, "kl": 0.070220947265625, "learning_rate": 1.6269300448285616e-07, "loss": 7.019557961029932e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3714, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.125, "completions/mean_length": 61.44791889190674, "completions/min_length": 22.375, "epoch": 7.385207247455944, "grad_norm": 0.0032023829340718335, "kl": 0.0833740234375, "learning_rate": 1.6246019038084008e-07, "loss": 8.337446342920884e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3715, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 72.01041889190674, "completions/min_length": 29.0, "epoch": 7.387192851824274, "grad_norm": 0.039242887684721936, "kl": 0.08941650390625, "learning_rate": 1.6222751066063184e-07, "loss": 8.92629032023251e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3716, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 64.25000095367432, "completions/min_length": 19.125, "epoch": 7.389178456192604, "grad_norm": 0.010162560220756218, "kl": 0.081787109375, "learning_rate": 1.6199496541486646e-07, "loss": 8.174665708793327e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3717, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 69.833336353302, "completions/min_length": 29.125, "epoch": 7.391164060560933, "grad_norm": 0.008968494846799457, "kl": 0.081756591796875, "learning_rate": 1.6176255473612477e-07, "loss": 8.176537085091695e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3718, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 67.27083587646484, "completions/min_length": 21.75, "epoch": 7.393149664929263, "grad_norm": 0.009010814340404596, "kl": 0.085052490234375, "learning_rate": 1.6153027871693482e-07, "loss": 8.500387048115954e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3719, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 71.81250238418579, "completions/min_length": 26.25, "epoch": 7.395135269297592, "grad_norm": 0.00867112265807232, "kl": 0.085784912109375, "learning_rate": 1.6129813744977027e-07, "loss": 8.597113628638908e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3720, "train_speed(iter/s)": 0.022662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 74.75000381469727, "completions/min_length": 27.5, "epoch": 7.397120873665922, "grad_norm": 0.0033974172272662295, "kl": 0.0770263671875, "learning_rate": 1.6106613102705192e-07, "loss": 7.699093839619309e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3721, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 79.06250381469727, "completions/min_length": 24.75, "epoch": 7.399106478034252, "grad_norm": 0.006914635592586072, "kl": 0.080047607421875, "learning_rate": 1.6083425954114604e-07, "loss": 8.005928248167038e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3722, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.375, "completions/mean_length": 73.5416693687439, "completions/min_length": 24.5, "epoch": 7.401092082402581, "grad_norm": 0.009539204097367379, "kl": 0.082061767578125, "learning_rate": 1.606025230843659e-07, "loss": 8.195355621865019e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3723, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.125, "completions/mean_length": 62.69791793823242, "completions/min_length": 21.75, "epoch": 7.403077686770911, "grad_norm": 0.017346996036748843, "kl": 0.079925537109375, "learning_rate": 1.603709217489708e-07, "loss": 8.00511843408458e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3724, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.875, "completions/mean_length": 82.77083587646484, "completions/min_length": 32.375, "epoch": 7.405063291139241, "grad_norm": 0.0036209832409043437, "kl": 0.08355712890625, "learning_rate": 1.6013945562716613e-07, "loss": 8.352326403837651e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3725, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 71.41666746139526, "completions/min_length": 25.0, "epoch": 7.40704889550757, "grad_norm": 0.0030248817930064007, "kl": 0.067626953125, "learning_rate": 1.5990812481110322e-07, "loss": 6.757134542567655e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3726, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 72.82291841506958, "completions/min_length": 29.125, "epoch": 7.4090344998759, "grad_norm": 0.013381122097346324, "kl": 0.092742919921875, "learning_rate": 1.5967692939288018e-07, "loss": 9.265523112844676e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3727, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.75, "completions/mean_length": 63.15625190734863, "completions/min_length": 23.75, "epoch": 7.411020104244229, "grad_norm": 1.0832269839662216, "kl": 0.071044921875, "learning_rate": 1.5944586946454054e-07, "loss": 0.008739238604903221, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3728, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 72.47916793823242, "completions/min_length": 25.25, "epoch": 7.413005708612559, "grad_norm": 0.007544861598679511, "kl": 0.092529296875, "learning_rate": 1.5921494511807427e-07, "loss": 9.254638280253857e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3729, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.125, "completions/mean_length": 66.34375286102295, "completions/min_length": 23.375, "epoch": 7.414991312980889, "grad_norm": 0.00966382524771158, "kl": 0.071197509765625, "learning_rate": 1.5898415644541757e-07, "loss": 7.125074625946581e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3730, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.125, "completions/mean_length": 80.802086353302, "completions/min_length": 18.875, "epoch": 7.416976917349218, "grad_norm": 0.0030872172996127987, "kl": 0.073822021484375, "learning_rate": 1.587535035384519e-07, "loss": 7.389920938294381e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3731, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 73.12500143051147, "completions/min_length": 32.0, "epoch": 7.418962521717548, "grad_norm": 0.0038140118721076686, "kl": 0.06573486328125, "learning_rate": 1.585229864890056e-07, "loss": 6.571651465492323e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3732, "train_speed(iter/s)": 0.022661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 81.68750190734863, "completions/min_length": 21.875, "epoch": 7.420948126085877, "grad_norm": 0.004606751552241449, "kl": 0.074798583984375, "learning_rate": 1.5829260538885202e-07, "loss": 7.473808364011347e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3733, "train_speed(iter/s)": 0.02266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 63.80208444595337, "completions/min_length": 24.75, "epoch": 7.422933730454207, "grad_norm": 0.005899212493461777, "kl": 0.060760498046875, "learning_rate": 1.5806236032971087e-07, "loss": 6.0760499764001e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3734, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 68.17708492279053, "completions/min_length": 21.375, "epoch": 7.424919334822537, "grad_norm": 0.0038211351096829015, "kl": 0.0880126953125, "learning_rate": 1.5783225140324784e-07, "loss": 8.808104757918045e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3735, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.25, "completions/mean_length": 57.125001192092896, "completions/min_length": 20.375, "epoch": 7.426904939190866, "grad_norm": 0.0051748181219927, "kl": 0.07061767578125, "learning_rate": 1.576022787010739e-07, "loss": 7.057129550958052e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3736, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.0, "completions/mean_length": 70.14583587646484, "completions/min_length": 27.75, "epoch": 7.428890543559196, "grad_norm": 0.006471687925077589, "kl": 0.076416015625, "learning_rate": 1.5737244231474622e-07, "loss": 7.638930401299149e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3737, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 64.83333539962769, "completions/min_length": 15.625, "epoch": 7.430876147927526, "grad_norm": 0.011829371247196185, "kl": 0.08349609375, "learning_rate": 1.571427423357678e-07, "loss": 8.343130321009085e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3738, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.5, "completions/mean_length": 71.75000143051147, "completions/min_length": 25.125, "epoch": 7.432861752295855, "grad_norm": 0.0050189514726394074, "kl": 0.075408935546875, "learning_rate": 1.5691317885558674e-07, "loss": 7.537516648881137e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3739, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 71.75000143051147, "completions/min_length": 26.75, "epoch": 7.434847356664185, "grad_norm": 0.0049279509134033915, "kl": 0.089813232421875, "learning_rate": 1.5668375196559752e-07, "loss": 8.974697266239673e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3740, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.5, "completions/mean_length": 68.83333539962769, "completions/min_length": 26.75, "epoch": 7.436832961032514, "grad_norm": 0.004362219475897078, "kl": 0.065704345703125, "learning_rate": 1.5645446175713965e-07, "loss": 6.572126585524529e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3741, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 76.12500286102295, "completions/min_length": 21.5, "epoch": 7.438818565400844, "grad_norm": 0.003470998081431405, "kl": 0.06817626953125, "learning_rate": 1.5622530832149844e-07, "loss": 6.820004637120292e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3742, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.375, "completions/mean_length": 72.19791841506958, "completions/min_length": 20.125, "epoch": 7.440804169769174, "grad_norm": 1.1073680879673646, "kl": 0.069976806640625, "learning_rate": 1.559962917499048e-07, "loss": 0.006491821259260178, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3743, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.125, "completions/mean_length": 61.15625190734863, "completions/min_length": 17.375, "epoch": 7.442789774137503, "grad_norm": 0.004105450294446705, "kl": 0.075225830078125, "learning_rate": 1.5576741213353533e-07, "loss": 7.524635293520987e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3744, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.125, "completions/mean_length": 65.29166841506958, "completions/min_length": 20.25, "epoch": 7.444775378505833, "grad_norm": 0.8790661554746932, "kl": 0.077423095703125, "learning_rate": 1.5553866956351158e-07, "loss": 0.006379756145179272, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3745, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 68.88541889190674, "completions/min_length": 22.125, "epoch": 7.446760982874162, "grad_norm": 0.005639493796178616, "kl": 0.08843994140625, "learning_rate": 1.5531006413090113e-07, "loss": 8.833927859086543e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3746, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/mean_length": 59.531250953674316, "completions/min_length": 22.875, "epoch": 7.448746587242492, "grad_norm": 0.0029492879741779853, "kl": 0.069976806640625, "learning_rate": 1.5508159592671643e-07, "loss": 6.995679723331705e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3747, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.125, "completions/mean_length": 71.17708587646484, "completions/min_length": 19.375, "epoch": 7.450732191610822, "grad_norm": 0.00480114962165369, "kl": 0.0621337890625, "learning_rate": 1.5485326504191582e-07, "loss": 6.207319529494271e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3748, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.125, "completions/mean_length": 68.364586353302, "completions/min_length": 24.0, "epoch": 7.452717795979151, "grad_norm": 0.0033737137917122075, "kl": 0.073333740234375, "learning_rate": 1.546250715674024e-07, "loss": 7.329837535507977e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3749, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.375, "completions/mean_length": 68.61458683013916, "completions/min_length": 23.5, "epoch": 7.454703400347481, "grad_norm": 0.005075451648058721, "kl": 0.075836181640625, "learning_rate": 1.54397015594025e-07, "loss": 7.57932139094919e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3750, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.75, "completions/mean_length": 54.38541793823242, "completions/min_length": 19.375, "epoch": 7.456689004715811, "grad_norm": 0.010110975905962378, "kl": 0.07672119140625, "learning_rate": 1.541690972125778e-07, "loss": 7.67758465372026e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3751, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 67.64583492279053, "completions/min_length": 27.75, "epoch": 7.45867460908414, "grad_norm": 0.002907574246184972, "kl": 0.063629150390625, "learning_rate": 1.5394131651379978e-07, "loss": 6.357365055009723e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3752, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 64.30208492279053, "completions/min_length": 21.0, "epoch": 7.46066021345247, "grad_norm": 0.0043593015739744586, "kl": 0.067626953125, "learning_rate": 1.537136735883751e-07, "loss": 6.76568306516856e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3753, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 69.14583492279053, "completions/min_length": 21.625, "epoch": 7.462645817820799, "grad_norm": 0.005498400713452654, "kl": 0.065155029296875, "learning_rate": 1.534861685269337e-07, "loss": 6.51189693599008e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3754, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.625, "completions/mean_length": 69.33333539962769, "completions/min_length": 24.875, "epoch": 7.464631422189129, "grad_norm": 0.005280976080447809, "kl": 0.065216064453125, "learning_rate": 1.5325880142004976e-07, "loss": 6.517578003695235e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3755, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 70.91666889190674, "completions/min_length": 27.625, "epoch": 7.466617026557459, "grad_norm": 0.0051732542778264575, "kl": 0.074066162109375, "learning_rate": 1.5303157235824321e-07, "loss": 7.413503772113472e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3756, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.625, "completions/mean_length": 66.57291889190674, "completions/min_length": 20.5, "epoch": 7.468602630925788, "grad_norm": 0.005910914817176071, "kl": 0.076995849609375, "learning_rate": 1.5280448143197888e-07, "loss": 7.704936433583498e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3757, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 68.00000095367432, "completions/min_length": 24.25, "epoch": 7.470588235294118, "grad_norm": 0.006723453251228797, "kl": 0.078125, "learning_rate": 1.5257752873166636e-07, "loss": 7.806930807419121e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3758, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.125, "completions/mean_length": 72.05208492279053, "completions/min_length": 20.125, "epoch": 7.472573839662447, "grad_norm": 0.004978863733786664, "kl": 0.072784423828125, "learning_rate": 1.523507143476605e-07, "loss": 7.274825475178659e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3759, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.125, "completions/mean_length": 56.562501430511475, "completions/min_length": 21.125, "epoch": 7.474559444030777, "grad_norm": 0.008352283349744437, "kl": 0.08782958984375, "learning_rate": 1.5212403837026073e-07, "loss": 8.781059295870364e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3760, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 68.32291746139526, "completions/min_length": 27.875, "epoch": 7.476545048399107, "grad_norm": 0.0049554829647225065, "kl": 0.066436767578125, "learning_rate": 1.5189750088971193e-07, "loss": 6.647556438110769e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3761, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 66.31250143051147, "completions/min_length": 25.75, "epoch": 7.478530652767436, "grad_norm": 0.004140960885884446, "kl": 0.065521240234375, "learning_rate": 1.5167110199620332e-07, "loss": 6.554085848620161e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3762, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 69.91666841506958, "completions/min_length": 30.125, "epoch": 7.480516257135766, "grad_norm": 0.004859213633604374, "kl": 0.081146240234375, "learning_rate": 1.5144484177986882e-07, "loss": 8.106774475891143e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3763, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.75, "completions/mean_length": 66.97916889190674, "completions/min_length": 24.375, "epoch": 7.482501861504096, "grad_norm": 1.9024908033060055, "kl": 0.090301513671875, "learning_rate": 1.512187203307881e-07, "loss": -0.008817656897008419, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3764, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 66.46875190734863, "completions/min_length": 21.75, "epoch": 7.484487465872425, "grad_norm": 0.003921188377169107, "kl": 0.07464599609375, "learning_rate": 1.5099273773898458e-07, "loss": 7.463831570930779e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3765, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 62.94791793823242, "completions/min_length": 24.5, "epoch": 7.486473070240755, "grad_norm": 0.0034034509333290977, "kl": 0.07257080078125, "learning_rate": 1.507668940944266e-07, "loss": 7.255197851918638e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3766, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.875, "completions/mean_length": 64.10416841506958, "completions/min_length": 21.875, "epoch": 7.488458674609084, "grad_norm": 0.004638652438963976, "kl": 0.063385009765625, "learning_rate": 1.5054118948702777e-07, "loss": 6.33524323347956e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3767, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 62.177085399627686, "completions/min_length": 24.375, "epoch": 7.490444278977414, "grad_norm": 0.005349412643887444, "kl": 0.087188720703125, "learning_rate": 1.5031562400664544e-07, "loss": 8.722725760890171e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3768, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 66.17708492279053, "completions/min_length": 23.5, "epoch": 7.492429883345744, "grad_norm": 1.1079419482299055, "kl": 0.078125, "learning_rate": 1.5009019774308246e-07, "loss": -0.005216313526034355, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3769, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.125, "completions/mean_length": 60.35416841506958, "completions/min_length": 22.375, "epoch": 7.494415487714073, "grad_norm": 0.004640896821779925, "kl": 0.0684814453125, "learning_rate": 1.4986491078608553e-07, "loss": 6.846869655419141e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3770, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.375, "completions/mean_length": 81.31250333786011, "completions/min_length": 31.875, "epoch": 7.496401092082403, "grad_norm": 0.004028672403164803, "kl": 0.0826416015625, "learning_rate": 1.4963976322534634e-07, "loss": 8.25505267130211e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3771, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.625, "completions/mean_length": 71.57291984558105, "completions/min_length": 24.0, "epoch": 7.498386696450732, "grad_norm": 0.004321633643688434, "kl": 0.09393310546875, "learning_rate": 1.49414755150501e-07, "loss": 9.384322765981779e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3772, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 66.12500190734863, "completions/min_length": 22.375, "epoch": 7.500372300819062, "grad_norm": 0.0042623108101929276, "kl": 0.077178955078125, "learning_rate": 1.4918988665113001e-07, "loss": 7.707101758569479e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3773, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 63.23958683013916, "completions/min_length": 20.75, "epoch": 7.502357905187392, "grad_norm": 0.005855976134385495, "kl": 0.05926513671875, "learning_rate": 1.4896515781675816e-07, "loss": 5.921687261434272e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3774, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.75, "completions/mean_length": 72.32291889190674, "completions/min_length": 30.125, "epoch": 7.504343509555721, "grad_norm": 1.2358669762558965, "kl": 0.089691162109375, "learning_rate": 1.4874056873685502e-07, "loss": -0.01331346295773983, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3775, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.875, "completions/mean_length": 68.34375333786011, "completions/min_length": 29.25, "epoch": 7.506329113924051, "grad_norm": 0.0054817869126398905, "kl": 0.075653076171875, "learning_rate": 1.485161195008341e-07, "loss": 7.557076605735347e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3776, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 66.86458444595337, "completions/min_length": 22.75, "epoch": 7.508314718292381, "grad_norm": 0.005857472286654964, "kl": 0.08251953125, "learning_rate": 1.4829181019805347e-07, "loss": 8.240701572503895e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3777, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 61.375001430511475, "completions/min_length": 22.0, "epoch": 7.51030032266071, "grad_norm": 0.0037511826756653297, "kl": 0.077484130859375, "learning_rate": 1.4806764091781564e-07, "loss": 7.743419701000676e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3778, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.125, "completions/mean_length": 61.76041793823242, "completions/min_length": 27.25, "epoch": 7.51228592702904, "grad_norm": 0.005221846055470195, "kl": 0.06646728515625, "learning_rate": 1.4784361174936698e-07, "loss": 6.639228377025574e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3779, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 69.75000286102295, "completions/min_length": 26.875, "epoch": 7.514271531397369, "grad_norm": 0.006329034328525194, "kl": 0.09356689453125, "learning_rate": 1.476197227818985e-07, "loss": 9.355320798931643e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3780, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 64.66666889190674, "completions/min_length": 23.75, "epoch": 7.516257135765699, "grad_norm": 0.0032341825381367443, "kl": 0.08154296875, "learning_rate": 1.47395974104545e-07, "loss": 8.149001223500818e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3781, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.375, "completions/mean_length": 73.20833492279053, "completions/min_length": 27.375, "epoch": 7.518242740134029, "grad_norm": 0.005130550171671116, "kl": 0.0706787109375, "learning_rate": 1.471723658063856e-07, "loss": 7.061640644678846e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3782, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 66.53125190734863, "completions/min_length": 27.5, "epoch": 7.520228344502358, "grad_norm": 0.0035068428068767256, "kl": 0.0836181640625, "learning_rate": 1.4694889797644367e-07, "loss": 8.361946674995124e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3783, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 58.97916841506958, "completions/min_length": 24.5, "epoch": 7.522213948870688, "grad_norm": 0.006485952362945099, "kl": 0.06097412109375, "learning_rate": 1.467255707036863e-07, "loss": 6.101143662817776e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3784, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 60.32291841506958, "completions/min_length": 21.5, "epoch": 7.524199553239017, "grad_norm": 0.006433364720810747, "kl": 0.07720947265625, "learning_rate": 1.4650238407702503e-07, "loss": 7.71501800045371e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3785, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.375, "completions/mean_length": 67.04166889190674, "completions/min_length": 27.625, "epoch": 7.526185157607347, "grad_norm": 0.003986761223215965, "kl": 0.080596923828125, "learning_rate": 1.4627933818531534e-07, "loss": 8.061522385105491e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3786, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 55.37500190734863, "completions/min_length": 18.375, "epoch": 7.528170761975677, "grad_norm": 0.004357447648336865, "kl": 0.070159912109375, "learning_rate": 1.4605643311735626e-07, "loss": 7.016626477707177e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3787, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.375, "completions/mean_length": 65.6979193687439, "completions/min_length": 19.25, "epoch": 7.530156366344006, "grad_norm": 1.9299555046978079, "kl": 0.07037353515625, "learning_rate": 1.4583366896189138e-07, "loss": -0.003820901270955801, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3788, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 64.97916841506958, "completions/min_length": 21.0, "epoch": 7.532141970712336, "grad_norm": 1.0584800564007786, "kl": 0.085906982421875, "learning_rate": 1.456110458076077e-07, "loss": 0.010107404552400112, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3789, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 68.81250143051147, "completions/min_length": 24.75, "epoch": 7.5341275750806656, "grad_norm": 0.0032157425030490605, "kl": 0.069183349609375, "learning_rate": 1.4538856374313608e-07, "loss": 6.924809713382274e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3790, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.375, "completions/mean_length": 69.95833492279053, "completions/min_length": 20.625, "epoch": 7.536113179448995, "grad_norm": 0.003533533666412546, "kl": 0.065826416015625, "learning_rate": 1.4516622285705155e-07, "loss": 6.572699203388765e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3791, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.25, "completions/mean_length": 68.89583587646484, "completions/min_length": 27.5, "epoch": 7.538098783817325, "grad_norm": 2.004831805286954, "kl": 0.097900390625, "learning_rate": 1.4494402323787296e-07, "loss": 0.005995898507535458, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3792, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 68.7604193687439, "completions/min_length": 27.75, "epoch": 7.540084388185654, "grad_norm": 0.004367124048634768, "kl": 0.089019775390625, "learning_rate": 1.4472196497406236e-07, "loss": 8.89451039256528e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3793, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 67.33333444595337, "completions/min_length": 25.375, "epoch": 7.542069992553984, "grad_norm": 0.0031753886345738573, "kl": 0.0809326171875, "learning_rate": 1.445000481540263e-07, "loss": 8.095223165582865e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3794, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.375, "completions/mean_length": 70.59375190734863, "completions/min_length": 28.5, "epoch": 7.5440555969223135, "grad_norm": 0.006079296620358689, "kl": 0.08233642578125, "learning_rate": 1.442782728661141e-07, "loss": 8.227508806157857e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3795, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.625, "completions/mean_length": 69.37500190734863, "completions/min_length": 24.125, "epoch": 7.546041201290643, "grad_norm": 0.004001955070932801, "kl": 0.066253662109375, "learning_rate": 1.4405663919861977e-07, "loss": 6.627905531786382e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3796, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.0, "completions/mean_length": 61.57291793823242, "completions/min_length": 23.0, "epoch": 7.5480268056589725, "grad_norm": 0.0050473173425620655, "kl": 0.07659912109375, "learning_rate": 1.438351472397799e-07, "loss": 7.653064676560462e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3797, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 72.42708539962769, "completions/min_length": 32.75, "epoch": 7.550012410027302, "grad_norm": 0.004899609328287532, "kl": 0.080535888671875, "learning_rate": 1.436137970777755e-07, "loss": 8.046612492762506e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3798, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 59.22916793823242, "completions/min_length": 28.0, "epoch": 7.551998014395632, "grad_norm": 1.7891476329441975, "kl": 0.076080322265625, "learning_rate": 1.433925888007308e-07, "loss": -0.0001973348407773301, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.05103103630244732, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.3245695158839226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3799, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.0, "completions/mean_length": 68.6979193687439, "completions/min_length": 26.25, "epoch": 7.5539836187639615, "grad_norm": 0.004071081516049575, "kl": 0.076019287109375, "learning_rate": 1.4317152249671337e-07, "loss": 7.594324415549636e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3800, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 65.51041841506958, "completions/min_length": 22.625, "epoch": 7.555969223132291, "grad_norm": 0.00372150144917829, "kl": 0.06512451171875, "learning_rate": 1.4295059825373461e-07, "loss": 6.519451562780887e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3801, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 65.67708539962769, "completions/min_length": 23.5, "epoch": 7.5579548275006205, "grad_norm": 0.0031629980312355395, "kl": 0.082366943359375, "learning_rate": 1.427298161597491e-07, "loss": 8.251075632870197e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3802, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.875, "completions/mean_length": 66.20833539962769, "completions/min_length": 27.25, "epoch": 7.5599404318689505, "grad_norm": 0.020388673417674308, "kl": 0.102783203125, "learning_rate": 1.425091763026548e-07, "loss": 0.00010280066635459661, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3803, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 67.30208539962769, "completions/min_length": 23.875, "epoch": 7.5619260362372795, "grad_norm": 0.004594253566820516, "kl": 0.072967529296875, "learning_rate": 1.4228867877029333e-07, "loss": 7.303150778170675e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3804, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.25, "completions/mean_length": 74.65625286102295, "completions/min_length": 23.75, "epoch": 7.5639116406056095, "grad_norm": 0.00311300994489222, "kl": 0.074127197265625, "learning_rate": 1.4206832365044923e-07, "loss": 7.414726132992655e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3805, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 64.03125238418579, "completions/min_length": 20.5, "epoch": 7.565897244973939, "grad_norm": 0.005210254097619597, "kl": 0.083526611328125, "learning_rate": 1.418481110308507e-07, "loss": 8.356572652701288e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3806, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 63.145835399627686, "completions/min_length": 21.0, "epoch": 7.5678828493422685, "grad_norm": 0.005606869038669202, "kl": 0.0777587890625, "learning_rate": 1.4162804099916932e-07, "loss": 7.770962110953405e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3807, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 62.510417461395264, "completions/min_length": 17.5, "epoch": 7.5698684537105985, "grad_norm": 0.005429184242739169, "kl": 0.073699951171875, "learning_rate": 1.414081136430193e-07, "loss": 7.376716530416161e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3808, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.125, "completions/mean_length": 61.38541793823242, "completions/min_length": 23.875, "epoch": 7.5718540580789275, "grad_norm": 0.006276110970431092, "kl": 0.067138671875, "learning_rate": 1.4118832904995875e-07, "loss": 6.710530578857288e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3809, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.75, "completions/mean_length": 67.7604193687439, "completions/min_length": 20.875, "epoch": 7.5738396624472575, "grad_norm": 0.0029336926068383026, "kl": 0.0693359375, "learning_rate": 1.409686873074884e-07, "loss": 6.93323599989526e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3810, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.375, "completions/mean_length": 70.63541865348816, "completions/min_length": 22.75, "epoch": 7.5758252668155865, "grad_norm": 0.004899217007670302, "kl": 0.1007080078125, "learning_rate": 1.407491885030523e-07, "loss": 0.00010074060264742002, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3811, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.875, "completions/mean_length": 68.80208492279053, "completions/min_length": 24.25, "epoch": 7.5778108711839165, "grad_norm": 0.0038766021489333147, "kl": 0.08477783203125, "learning_rate": 1.4052983272403757e-07, "loss": 8.473803609376773e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3812, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.375, "completions/mean_length": 69.54166841506958, "completions/min_length": 25.75, "epoch": 7.5797964755522464, "grad_norm": 0.004622770475997417, "kl": 0.088134765625, "learning_rate": 1.4031062005777473e-07, "loss": 8.805944526102394e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3813, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 66.72916841506958, "completions/min_length": 25.75, "epoch": 7.5817820799205755, "grad_norm": 0.0034354835599022635, "kl": 0.07568359375, "learning_rate": 1.400915505915367e-07, "loss": 7.565581472590566e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3814, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 61.114585399627686, "completions/min_length": 17.375, "epoch": 7.5837676842889055, "grad_norm": 0.005817830902787357, "kl": 0.069793701171875, "learning_rate": 1.3987262441254e-07, "loss": 6.983404455240816e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3815, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 68.16666793823242, "completions/min_length": 28.125, "epoch": 7.585753288657235, "grad_norm": 0.005157108721092594, "kl": 0.0640869140625, "learning_rate": 1.3965384160794347e-07, "loss": 6.403190491255373e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3816, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 74.78125190734863, "completions/min_length": 20.625, "epoch": 7.5877388930255645, "grad_norm": 0.004824453982976149, "kl": 0.08428955078125, "learning_rate": 1.3943520226484962e-07, "loss": 8.422048267675564e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3817, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 62.30208444595337, "completions/min_length": 22.5, "epoch": 7.589724497393894, "grad_norm": 0.0038294148197715572, "kl": 0.077789306640625, "learning_rate": 1.392167064703032e-07, "loss": 7.776329584885389e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3818, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.875, "completions/mean_length": 72.90625095367432, "completions/min_length": 25.25, "epoch": 7.5917101017622235, "grad_norm": 0.006284562017998628, "kl": 0.099090576171875, "learning_rate": 1.3899835431129175e-07, "loss": 9.91778215393424e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3819, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 69.96875190734863, "completions/min_length": 26.5, "epoch": 7.5936957061305534, "grad_norm": 0.0035761528511995206, "kl": 0.07391357421875, "learning_rate": 1.3878014587474662e-07, "loss": 7.381969771813601e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3820, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 69.32291841506958, "completions/min_length": 24.625, "epoch": 7.595681310498883, "grad_norm": 0.0036286533714774724, "kl": 0.10113525390625, "learning_rate": 1.3856208124754088e-07, "loss": 0.00010125023982254788, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3821, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 62.31250190734863, "completions/min_length": 19.25, "epoch": 7.5976669148672125, "grad_norm": 0.0034877308395075577, "kl": 0.072998046875, "learning_rate": 1.3834416051649055e-07, "loss": 7.299243588931859e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3822, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.0, "completions/mean_length": 54.86458492279053, "completions/min_length": 18.625, "epoch": 7.599652519235542, "grad_norm": 0.003132542223039554, "kl": 0.073883056640625, "learning_rate": 1.381263837683549e-07, "loss": 7.385817298199981e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3823, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.375, "completions/mean_length": 71.76041984558105, "completions/min_length": 23.75, "epoch": 7.6016381236038715, "grad_norm": 0.0039784297043140345, "kl": 0.089874267578125, "learning_rate": 1.379087510898352e-07, "loss": 8.992602670332417e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3824, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.125, "completions/mean_length": 58.16666793823242, "completions/min_length": 17.625, "epoch": 7.603623727972201, "grad_norm": 0.00713559702926011, "kl": 0.068206787109375, "learning_rate": 1.376912625675757e-07, "loss": 6.826203025411814e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3825, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.625, "completions/mean_length": 72.03125286102295, "completions/min_length": 20.0, "epoch": 7.605609332340531, "grad_norm": 0.0031252772054555663, "kl": 0.0643310546875, "learning_rate": 1.3747391828816347e-07, "loss": 6.431882502511144e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3826, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 64.28125143051147, "completions/min_length": 22.5, "epoch": 7.6075949367088604, "grad_norm": 0.00351842678753326, "kl": 0.06585693359375, "learning_rate": 1.3725671833812764e-07, "loss": 6.579048931598663e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3827, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.375, "completions/mean_length": 78.53125095367432, "completions/min_length": 25.25, "epoch": 7.60958054107719, "grad_norm": 0.0037101368806568813, "kl": 0.086029052734375, "learning_rate": 1.3703966280394036e-07, "loss": 8.598676504334435e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3828, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.125, "completions/mean_length": 71.10416889190674, "completions/min_length": 25.0, "epoch": 7.61156614544552, "grad_norm": 0.004323057091798282, "kl": 0.072601318359375, "learning_rate": 1.3682275177201603e-07, "loss": 7.258218829520047e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3829, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.625, "completions/mean_length": 64.96875143051147, "completions/min_length": 20.875, "epoch": 7.613551749813849, "grad_norm": 0.0057692096428140225, "kl": 0.074859619140625, "learning_rate": 1.366059853287113e-07, "loss": 7.488192932214588e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3830, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 63.84375190734863, "completions/min_length": 21.75, "epoch": 7.615537354182179, "grad_norm": 0.9623374980197493, "kl": 0.0753631591796875, "learning_rate": 1.3638936356032588e-07, "loss": 0.008153287693858147, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166669771075, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3831, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 68.86458539962769, "completions/min_length": 23.625, "epoch": 7.617522958550508, "grad_norm": 0.005837272487351229, "kl": 0.0802001953125, "learning_rate": 1.361728865531012e-07, "loss": 8.019919914659113e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3832, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 69.25000286102295, "completions/min_length": 20.5, "epoch": 7.619508562918838, "grad_norm": 0.0034887088475826413, "kl": 0.08001708984375, "learning_rate": 1.3595655439322163e-07, "loss": 7.99636691226624e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3833, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.875, "completions/mean_length": 57.364586353302, "completions/min_length": 16.875, "epoch": 7.621494167287168, "grad_norm": 0.004570205789125989, "kl": 0.069915771484375, "learning_rate": 1.3574036716681364e-07, "loss": 6.994244176894426e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3834, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 74.8229193687439, "completions/min_length": 28.25, "epoch": 7.623479771655497, "grad_norm": 1.0640701791162204, "kl": 0.08599853515625, "learning_rate": 1.3552432495994575e-07, "loss": 0.0036701548378914595, "memory(GiB)": 94.21, "reward": 1.71875, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3835, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.625, "completions/mean_length": 73.0104193687439, "completions/min_length": 30.25, "epoch": 7.625465376023827, "grad_norm": 0.007398065378854317, "kl": 0.0765380859375, "learning_rate": 1.3530842785862928e-07, "loss": 7.650951738469303e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3836, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.875, "completions/mean_length": 61.145835399627686, "completions/min_length": 16.375, "epoch": 7.627450980392156, "grad_norm": 0.002851991828770374, "kl": 0.065643310546875, "learning_rate": 1.3509267594881713e-07, "loss": 6.563091301359236e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3837, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 66.70833492279053, "completions/min_length": 25.5, "epoch": 7.629436584760486, "grad_norm": 0.004248191399794432, "kl": 0.065399169921875, "learning_rate": 1.348770693164051e-07, "loss": 6.544891220983118e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3838, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 59.53125238418579, "completions/min_length": 20.75, "epoch": 7.631422189128816, "grad_norm": 0.003373364449470411, "kl": 0.063232421875, "learning_rate": 1.3466160804723042e-07, "loss": 6.322674744296819e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3839, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 61.14583492279053, "completions/min_length": 20.25, "epoch": 7.633407793497145, "grad_norm": 0.005341285629886416, "kl": 0.069915771484375, "learning_rate": 1.3444629222707305e-07, "loss": 6.989236135268584e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3840, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 68.18750238418579, "completions/min_length": 26.75, "epoch": 7.635393397865475, "grad_norm": 0.003544579968788282, "kl": 0.0733642578125, "learning_rate": 1.3423112194165497e-07, "loss": 7.330399967031553e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3841, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.375, "completions/mean_length": 64.59375190734863, "completions/min_length": 18.25, "epoch": 7.637379002233805, "grad_norm": 1.3279956347277142, "kl": 0.08892822265625, "learning_rate": 1.3401609727663988e-07, "loss": -0.01843891851603985, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3842, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.25, "completions/mean_length": 61.427085399627686, "completions/min_length": 22.75, "epoch": 7.639364606602134, "grad_norm": 1.4531341156494462, "kl": 0.226776123046875, "learning_rate": 1.3380121831763354e-07, "loss": -0.015078927390277386, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3843, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/mean_length": 58.60416889190674, "completions/min_length": 17.625, "epoch": 7.641350210970464, "grad_norm": 0.009695697133040825, "kl": 0.0684814453125, "learning_rate": 1.335864851501841e-07, "loss": 6.850939098512754e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3844, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.75, "completions/mean_length": 66.09375190734863, "completions/min_length": 22.875, "epoch": 7.643335815338793, "grad_norm": 0.9979786384564562, "kl": 0.08343505859375, "learning_rate": 1.3337189785978125e-07, "loss": 0.0007822262123227119, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3845, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.125, "completions/mean_length": 59.19791889190674, "completions/min_length": 24.25, "epoch": 7.645321419707123, "grad_norm": 0.0038477150137922337, "kl": 0.087158203125, "learning_rate": 1.331574565318569e-07, "loss": 8.724215877009556e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3846, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 61.07291841506958, "completions/min_length": 21.125, "epoch": 7.647307024075453, "grad_norm": 0.005068134747830288, "kl": 0.06317138671875, "learning_rate": 1.3294316125178473e-07, "loss": 6.313101766863838e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3847, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 63.562500953674316, "completions/min_length": 27.25, "epoch": 7.649292628443782, "grad_norm": 0.0037538206632814564, "kl": 0.07318115234375, "learning_rate": 1.3272901210488014e-07, "loss": 7.314438698813319e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3848, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 67.4166693687439, "completions/min_length": 26.125, "epoch": 7.651278232812112, "grad_norm": 0.005920629853202438, "kl": 0.083740234375, "learning_rate": 1.3251500917640067e-07, "loss": 8.384125248994678e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3849, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 63.739585876464844, "completions/min_length": 20.25, "epoch": 7.653263837180441, "grad_norm": 0.0032517789972344125, "kl": 0.070159912109375, "learning_rate": 1.3230115255154538e-07, "loss": 7.010986155364662e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3850, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 69.42708492279053, "completions/min_length": 24.625, "epoch": 7.655249441548771, "grad_norm": 0.0035803549911116884, "kl": 0.079986572265625, "learning_rate": 1.3208744231545492e-07, "loss": 7.997571083251387e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3851, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.625, "completions/mean_length": 64.45833492279053, "completions/min_length": 24.5, "epoch": 7.657235045917101, "grad_norm": 0.011159006428603476, "kl": 0.094818115234375, "learning_rate": 1.318738785532123e-07, "loss": 9.478756692260504e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3852, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.25, "completions/mean_length": 56.39583444595337, "completions/min_length": 17.375, "epoch": 7.65922065028543, "grad_norm": 0.002846713363513895, "kl": 0.073455810546875, "learning_rate": 1.3166046134984142e-07, "loss": 7.338711293414235e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3853, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.125, "completions/mean_length": 62.604167461395264, "completions/min_length": 22.125, "epoch": 7.66120625465376, "grad_norm": 0.0038446117106336595, "kl": 0.069427490234375, "learning_rate": 1.3144719079030853e-07, "loss": 6.934361590538174e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3854, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.875, "completions/mean_length": 62.333335399627686, "completions/min_length": 28.125, "epoch": 7.66319185902209, "grad_norm": 0.004201565164909353, "kl": 0.076568603515625, "learning_rate": 1.3123406695952117e-07, "loss": 7.650322368135676e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3855, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.25, "completions/mean_length": 62.364585399627686, "completions/min_length": 23.25, "epoch": 7.665177463390419, "grad_norm": 0.0069294008205900495, "kl": 0.078399658203125, "learning_rate": 1.3102108994232825e-07, "loss": 7.845751679269597e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3856, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 64.95833492279053, "completions/min_length": 18.875, "epoch": 7.667163067758749, "grad_norm": 1.187931763149915, "kl": 0.097198486328125, "learning_rate": 1.3080825982352077e-07, "loss": -0.002422519028186798, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3857, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 69.52083492279053, "completions/min_length": 28.875, "epoch": 7.669148672127078, "grad_norm": 0.002938643074916235, "kl": 0.082244873046875, "learning_rate": 1.3059557668783084e-07, "loss": 8.231324318330735e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3858, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 58.56250190734863, "completions/min_length": 22.75, "epoch": 7.671134276495408, "grad_norm": 0.003799052043380333, "kl": 0.06842041015625, "learning_rate": 1.30383040619932e-07, "loss": 6.852422666270286e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3859, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 62.98958492279053, "completions/min_length": 26.625, "epoch": 7.673119880863738, "grad_norm": 0.003458717684124392, "kl": 0.079071044921875, "learning_rate": 1.3017065170443946e-07, "loss": 7.898827607277781e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3860, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 69.07291841506958, "completions/min_length": 24.625, "epoch": 7.675105485232067, "grad_norm": 0.003952200137858557, "kl": 0.0587158203125, "learning_rate": 1.2995841002591006e-07, "loss": 5.869668530067429e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3861, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 67.36458587646484, "completions/min_length": 25.5, "epoch": 7.677091089600397, "grad_norm": 0.003941403506617953, "kl": 0.0537109375, "learning_rate": 1.2974631566884136e-07, "loss": 5.367131598177366e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3862, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.375, "completions/mean_length": 63.46875238418579, "completions/min_length": 28.0, "epoch": 7.679076693968726, "grad_norm": 0.009386621949527022, "kl": 0.081939697265625, "learning_rate": 1.2953436871767298e-07, "loss": 8.189590880647302e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3863, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 60.062501430511475, "completions/min_length": 21.5, "epoch": 7.681062298337056, "grad_norm": 2.6844684037516444, "kl": 0.21484375, "learning_rate": 1.293225692567852e-07, "loss": -0.0160828884691, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3864, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 66.13541793823242, "completions/min_length": 24.875, "epoch": 7.683047902705386, "grad_norm": 1.0338487841698705, "kl": 0.063751220703125, "learning_rate": 1.2911091737050027e-07, "loss": 0.010955302976071835, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3865, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 66.65625190734863, "completions/min_length": 23.0, "epoch": 7.685033507073715, "grad_norm": 0.008101933829857383, "kl": 0.085479736328125, "learning_rate": 1.288994131430811e-07, "loss": 8.540777344023809e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3866, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.0, "completions/mean_length": 56.46875190734863, "completions/min_length": 19.75, "epoch": 7.687019111442045, "grad_norm": 0.00580529201571199, "kl": 0.062530517578125, "learning_rate": 1.2868805665873184e-07, "loss": 6.254997424548492e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3867, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 66.14583492279053, "completions/min_length": 27.875, "epoch": 7.689004715810375, "grad_norm": 0.005377720616617876, "kl": 0.083740234375, "learning_rate": 1.2847684800159853e-07, "loss": 8.378714846912771e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3868, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.375, "completions/mean_length": 65.00000095367432, "completions/min_length": 29.5, "epoch": 7.690990320178704, "grad_norm": 0.006377999742467398, "kl": 0.082977294921875, "learning_rate": 1.282657872557676e-07, "loss": 8.286055526696146e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3869, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 58.437501430511475, "completions/min_length": 22.5, "epoch": 7.692975924547034, "grad_norm": 0.004440370370414892, "kl": 0.097930908203125, "learning_rate": 1.2805487450526665e-07, "loss": 9.791778575163335e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3870, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.625, "completions/mean_length": 63.87500190734863, "completions/min_length": 23.0, "epoch": 7.694961528915364, "grad_norm": 1.3893921491849417, "kl": 0.0911865234375, "learning_rate": 1.2784410983406486e-07, "loss": -0.0011434592306613922, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3871, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 67.91666889190674, "completions/min_length": 29.875, "epoch": 7.696947133283693, "grad_norm": 0.003432952514643224, "kl": 0.058441162109375, "learning_rate": 1.2763349332607187e-07, "loss": 5.8417776017449796e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3872, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.5, "completions/mean_length": 60.52083492279053, "completions/min_length": 19.125, "epoch": 7.698932737652023, "grad_norm": 0.007740923115435708, "kl": 0.095245361328125, "learning_rate": 1.274230250651389e-07, "loss": 9.521457104710862e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3873, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.875, "completions/mean_length": 63.65625238418579, "completions/min_length": 17.0, "epoch": 7.700918342020352, "grad_norm": 0.004679144919841729, "kl": 0.068572998046875, "learning_rate": 1.272127051350576e-07, "loss": 6.858836422907189e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3874, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 60.6666693687439, "completions/min_length": 22.875, "epoch": 7.702903946388682, "grad_norm": 0.0033703393272158026, "kl": 0.06671142578125, "learning_rate": 1.2700253361956091e-07, "loss": 6.672116433037445e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3875, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 68.81250095367432, "completions/min_length": 23.75, "epoch": 7.704889550757011, "grad_norm": 0.00444089625798143, "kl": 0.065704345703125, "learning_rate": 1.2679251060232276e-07, "loss": 6.565947842318565e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3876, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.5, "completions/mean_length": 63.77083492279053, "completions/min_length": 22.625, "epoch": 7.706875155125341, "grad_norm": 0.0037948797975675265, "kl": 0.08343505859375, "learning_rate": 1.2658263616695752e-07, "loss": 8.33876256365329e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3877, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.125, "completions/mean_length": 55.68750190734863, "completions/min_length": 20.0, "epoch": 7.708860759493671, "grad_norm": 0.0032764098465749053, "kl": 0.067962646484375, "learning_rate": 1.2637291039702103e-07, "loss": 6.792052590753883e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3878, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 64.25000238418579, "completions/min_length": 23.5, "epoch": 7.710846363862, "grad_norm": 2.0970881609837786, "kl": 0.078155517578125, "learning_rate": 1.2616333337600937e-07, "loss": -0.011503200978040695, "memory(GiB)": 94.21, "reward": 1.8437500149011612, "reward_std": 0.05779037997126579, "rewards/CineAccuracyORM/mean": 0.8437500037252903, "rewards/CineAccuracyORM/std": 0.16290925815701485, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3879, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 70.38541841506958, "completions/min_length": 28.875, "epoch": 7.71283196823033, "grad_norm": 0.003226638792558709, "kl": 0.07977294921875, "learning_rate": 1.259539051873595e-07, "loss": 7.976028427947313e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3880, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.625, "completions/mean_length": 79.10416889190674, "completions/min_length": 28.0, "epoch": 7.71481757259866, "grad_norm": 0.0035974545990974903, "kl": 0.07464599609375, "learning_rate": 1.257446259144494e-07, "loss": 7.466791430488229e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3881, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.375, "completions/mean_length": 57.23958492279053, "completions/min_length": 20.375, "epoch": 7.716803176966989, "grad_norm": 0.005518855760240403, "kl": 0.07415771484375, "learning_rate": 1.2553549564059785e-07, "loss": 7.423026545438915e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3882, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 66.10416889190674, "completions/min_length": 20.25, "epoch": 7.718788781335319, "grad_norm": 0.005431646810206131, "kl": 0.07177734375, "learning_rate": 1.253265144490636e-07, "loss": 7.168115553213283e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3883, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 69.23958539962769, "completions/min_length": 25.75, "epoch": 7.720774385703649, "grad_norm": 0.0062842108858253976, "kl": 0.082977294921875, "learning_rate": 1.2511768242304704e-07, "loss": 8.305140363518149e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3884, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 69.05208587646484, "completions/min_length": 26.75, "epoch": 7.722759990071978, "grad_norm": 0.005109899437266735, "kl": 0.068939208984375, "learning_rate": 1.2490899964568823e-07, "loss": 6.900308653712273e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3885, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.375, "completions/mean_length": 58.35416841506958, "completions/min_length": 19.125, "epoch": 7.724745594440308, "grad_norm": 0.003215219035282089, "kl": 0.090911865234375, "learning_rate": 1.2470046620006857e-07, "loss": 9.096015128307045e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3886, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.625, "completions/mean_length": 72.31250286102295, "completions/min_length": 26.0, "epoch": 7.726731198808637, "grad_norm": 0.005316643862158534, "kl": 0.08538818359375, "learning_rate": 1.2449208216920948e-07, "loss": 8.545012678951025e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3887, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.5, "completions/mean_length": 67.35416889190674, "completions/min_length": 27.5, "epoch": 7.728716803176967, "grad_norm": 0.0056797898705136405, "kl": 0.0538330078125, "learning_rate": 1.242838476360729e-07, "loss": 5.389576472225599e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3888, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 66.25000238418579, "completions/min_length": 25.875, "epoch": 7.730702407545296, "grad_norm": 0.005387402377753929, "kl": 0.07366943359375, "learning_rate": 1.2407576268356196e-07, "loss": 7.36262445570901e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3889, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.125, "completions/mean_length": 65.50000190734863, "completions/min_length": 28.0, "epoch": 7.732688011913626, "grad_norm": 0.003991402989264238, "kl": 0.069580078125, "learning_rate": 1.2386782739451945e-07, "loss": 6.965005013626069e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3890, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.5, "completions/mean_length": 69.38541841506958, "completions/min_length": 24.5, "epoch": 7.734673616281956, "grad_norm": 0.003969477911141373, "kl": 0.07080078125, "learning_rate": 1.2366004185172874e-07, "loss": 7.072483276715502e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3891, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 69.8541693687439, "completions/min_length": 26.75, "epoch": 7.736659220650285, "grad_norm": 0.003786106620533908, "kl": 0.0819091796875, "learning_rate": 1.234524061379139e-07, "loss": 8.186566265067086e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3892, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 63.85416841506958, "completions/min_length": 27.625, "epoch": 7.738644825018615, "grad_norm": 0.004941575415389164, "kl": 0.07891845703125, "learning_rate": 1.2324492033573892e-07, "loss": 7.896412716945633e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3893, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.5, "completions/mean_length": 62.708335399627686, "completions/min_length": 23.625, "epoch": 7.740630429386945, "grad_norm": 0.011346158168316975, "kl": 0.0738525390625, "learning_rate": 1.230375845278086e-07, "loss": 7.386261131614447e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3894, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.875, "completions/mean_length": 78.71875333786011, "completions/min_length": 30.75, "epoch": 7.742616033755274, "grad_norm": 0.0036536957602840083, "kl": 0.079437255859375, "learning_rate": 1.228303987966675e-07, "loss": 7.951415318530053e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3895, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.25, "completions/mean_length": 85.39583587646484, "completions/min_length": 30.25, "epoch": 7.744601638123604, "grad_norm": 0.00280663164142008, "kl": 0.059967041015625, "learning_rate": 1.2262336322480076e-07, "loss": 6.000800567562692e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3896, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 65.12500190734863, "completions/min_length": 19.75, "epoch": 7.746587242491934, "grad_norm": 0.008002030435721228, "kl": 0.090118408203125, "learning_rate": 1.2241647789463383e-07, "loss": 9.01563762454316e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3897, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 68.52083683013916, "completions/min_length": 22.125, "epoch": 7.748572846860263, "grad_norm": 0.003710607147933289, "kl": 0.07269287109375, "learning_rate": 1.2220974288853208e-07, "loss": 7.264433952514082e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3898, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.5, "completions/mean_length": 70.84375286102295, "completions/min_length": 25.625, "epoch": 7.750558451228593, "grad_norm": 0.0046607345701737074, "kl": 0.073089599609375, "learning_rate": 1.2200315828880091e-07, "loss": 7.309476495720446e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3899, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.125, "completions/mean_length": 67.51041889190674, "completions/min_length": 26.0, "epoch": 7.752544055596922, "grad_norm": 0.004811622878731625, "kl": 0.06817626953125, "learning_rate": 1.2179672417768644e-07, "loss": 6.826211756560951e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3900, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.875, "completions/mean_length": 61.96875238418579, "completions/min_length": 21.5, "epoch": 7.754529659965252, "grad_norm": 0.008958614384968833, "kl": 0.06927490234375, "learning_rate": 1.2159044063737416e-07, "loss": 6.927098729647696e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3901, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 66.90625238418579, "completions/min_length": 23.75, "epoch": 7.756515264333581, "grad_norm": 0.002930563414245782, "kl": 0.070953369140625, "learning_rate": 1.213843077499901e-07, "loss": 7.09326850483194e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3902, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.25, "completions/mean_length": 75.47916984558105, "completions/min_length": 26.75, "epoch": 7.758500868701911, "grad_norm": 0.002937133231444342, "kl": 0.07049560546875, "learning_rate": 1.2117832559760032e-07, "loss": 7.053184526739642e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3903, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 64.53125095367432, "completions/min_length": 22.875, "epoch": 7.760486473070241, "grad_norm": 0.0030746675067557672, "kl": 0.079132080078125, "learning_rate": 1.209724942622104e-07, "loss": 7.920600182842463e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3904, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.375, "completions/mean_length": 73.28125238418579, "completions/min_length": 27.5, "epoch": 7.76247207743857, "grad_norm": 0.0030448895607094566, "kl": 0.079376220703125, "learning_rate": 1.2076681382576649e-07, "loss": 7.947770063765347e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3905, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 71.21875238418579, "completions/min_length": 28.25, "epoch": 7.7644576818069, "grad_norm": 0.003104678129102219, "kl": 0.082977294921875, "learning_rate": 1.2056128437015423e-07, "loss": 8.300077024614438e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3906, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.75, "completions/mean_length": 59.88541841506958, "completions/min_length": 25.25, "epoch": 7.76644328617523, "grad_norm": 0.004007677571024156, "kl": 0.071533203125, "learning_rate": 1.203559059771992e-07, "loss": 7.14878406142816e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3907, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.125, "completions/mean_length": 76.43750333786011, "completions/min_length": 30.5, "epoch": 7.768428890543559, "grad_norm": 0.0031034467729061906, "kl": 0.075775146484375, "learning_rate": 1.2015067872866692e-07, "loss": 7.574223855044693e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3908, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.75, "completions/mean_length": 69.25000143051147, "completions/min_length": 23.375, "epoch": 7.770414494911889, "grad_norm": 0.002958063125281366, "kl": 0.061553955078125, "learning_rate": 1.1994560270626303e-07, "loss": 6.148389365989715e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3909, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.25, "completions/mean_length": 57.80208492279053, "completions/min_length": 21.75, "epoch": 7.772400099280219, "grad_norm": 0.004323160213968585, "kl": 0.068328857421875, "learning_rate": 1.1974067799163236e-07, "loss": 6.843900337116793e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3910, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 62.59375238418579, "completions/min_length": 20.75, "epoch": 7.774385703648548, "grad_norm": 0.002687032254564319, "kl": 0.089019775390625, "learning_rate": 1.1953590466636e-07, "loss": 8.915412763599306e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3911, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 68.77083492279053, "completions/min_length": 28.125, "epoch": 7.776371308016878, "grad_norm": 0.0091714866763257, "kl": 0.06671142578125, "learning_rate": 1.193312828119704e-07, "loss": 6.665413093287498e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3912, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 65.35416889190674, "completions/min_length": 26.5, "epoch": 7.778356912385207, "grad_norm": 0.005737679892904283, "kl": 0.0882568359375, "learning_rate": 1.1912681250992818e-07, "loss": 8.836119377519935e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3913, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.25, "completions/mean_length": 67.28125143051147, "completions/min_length": 24.75, "epoch": 7.780342516753537, "grad_norm": 0.004287156588465882, "kl": 0.062164306640625, "learning_rate": 1.1892249384163716e-07, "loss": 6.219823262654245e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3914, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 79.52083539962769, "completions/min_length": 27.5, "epoch": 7.782328121121866, "grad_norm": 0.003097808161389412, "kl": 0.08026123046875, "learning_rate": 1.1871832688844064e-07, "loss": 8.024100679904222e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3915, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.375, "completions/mean_length": 72.50000047683716, "completions/min_length": 26.875, "epoch": 7.784313725490196, "grad_norm": 0.004664612230150489, "kl": 0.07159423828125, "learning_rate": 1.1851431173162246e-07, "loss": 7.156870560720563e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3916, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 63.30208396911621, "completions/min_length": 26.25, "epoch": 7.786299329858526, "grad_norm": 0.003746678758558942, "kl": 0.07366943359375, "learning_rate": 1.1831044845240517e-07, "loss": 7.370024832198396e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3917, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 65.02083587646484, "completions/min_length": 21.375, "epoch": 7.788284934226855, "grad_norm": 0.006024809792970926, "kl": 0.08074951171875, "learning_rate": 1.1810673713195091e-07, "loss": 8.068706665653735e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3918, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 66.14583587646484, "completions/min_length": 24.125, "epoch": 7.790270538595185, "grad_norm": 0.004079514111671265, "kl": 0.07763671875, "learning_rate": 1.1790317785136178e-07, "loss": 7.769724470563233e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3919, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 65.37500143051147, "completions/min_length": 29.125, "epoch": 7.792256142963515, "grad_norm": 0.0035315499964259163, "kl": 0.069915771484375, "learning_rate": 1.1769977069167881e-07, "loss": 6.988467066548765e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3920, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 69.32291984558105, "completions/min_length": 32.625, "epoch": 7.794241747331844, "grad_norm": 1.8762943647906174, "kl": 0.08843994140625, "learning_rate": 1.1749651573388297e-07, "loss": 0.00012748813605867326, "memory(GiB)": 94.21, "reward": 1.84375, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3921, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 69.40625190734863, "completions/min_length": 22.75, "epoch": 7.796227351700174, "grad_norm": 0.03621445969327345, "kl": 0.1529541015625, "learning_rate": 1.1729341305889418e-07, "loss": 0.0001528703432995826, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3922, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.375, "completions/mean_length": 76.79166793823242, "completions/min_length": 23.75, "epoch": 7.798212956068504, "grad_norm": 0.0039241723109373905, "kl": 0.079925537109375, "learning_rate": 1.1709046274757206e-07, "loss": 7.990228914422914e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3923, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 62.125001430511475, "completions/min_length": 22.625, "epoch": 7.800198560436833, "grad_norm": 0.008012809373704676, "kl": 0.062469482421875, "learning_rate": 1.1688766488071567e-07, "loss": 6.249164289329201e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3924, "train_speed(iter/s)": 0.022653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 70.52083539962769, "completions/min_length": 27.25, "epoch": 7.802184164805163, "grad_norm": 0.08130746158088281, "kl": 0.13604736328125, "learning_rate": 1.1668501953906278e-07, "loss": 0.00013631889305543154, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3925, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.875, "completions/mean_length": 71.03125190734863, "completions/min_length": 22.125, "epoch": 7.804169769173492, "grad_norm": 0.0038527334217906624, "kl": 0.0703125, "learning_rate": 1.1648252680329124e-07, "loss": 7.025589002296329e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3926, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.125, "completions/mean_length": 59.79166889190674, "completions/min_length": 21.0, "epoch": 7.806155373541822, "grad_norm": 0.0041760832134088936, "kl": 0.0712890625, "learning_rate": 1.1628018675401746e-07, "loss": 7.130164885893464e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3927, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 67.79166889190674, "completions/min_length": 21.125, "epoch": 7.808140977910151, "grad_norm": 0.005805128359271577, "kl": 0.07257080078125, "learning_rate": 1.1607799947179731e-07, "loss": 7.261057908181101e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3928, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.875, "completions/mean_length": 55.718751430511475, "completions/min_length": 19.125, "epoch": 7.810126582278481, "grad_norm": 0.004760596682025078, "kl": 0.06549072265625, "learning_rate": 1.1587596503712593e-07, "loss": 6.548453529831022e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3929, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.125, "completions/mean_length": 62.66666889190674, "completions/min_length": 24.375, "epoch": 7.812112186646811, "grad_norm": 0.0029035237032259955, "kl": 0.0738525390625, "learning_rate": 1.1567408353043772e-07, "loss": 7.38902308512479e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3930, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 74.04166793823242, "completions/min_length": 30.625, "epoch": 7.81409779101514, "grad_norm": 0.0030996362781128047, "kl": 0.07464599609375, "learning_rate": 1.1547235503210567e-07, "loss": 7.468024705303833e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3931, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 61.29166889190674, "completions/min_length": 21.875, "epoch": 7.81608339538347, "grad_norm": 0.003118357433967189, "kl": 0.0721435546875, "learning_rate": 1.1527077962244264e-07, "loss": 7.216303492896259e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3932, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 70.12500143051147, "completions/min_length": 31.75, "epoch": 7.8180689997518, "grad_norm": 1.0024017915716292, "kl": 0.06280517578125, "learning_rate": 1.1506935738169966e-07, "loss": 0.012461773119866848, "memory(GiB)": 94.21, "reward": 1.9583333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9583333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3933, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.125, "completions/mean_length": 65.62500238418579, "completions/min_length": 24.625, "epoch": 7.820054604120129, "grad_norm": 0.004677012561452153, "kl": 0.07257080078125, "learning_rate": 1.1486808839006756e-07, "loss": 7.2706192440819e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3934, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.125, "completions/mean_length": 70.93750143051147, "completions/min_length": 21.5, "epoch": 7.822040208488459, "grad_norm": 0.00356557350716112, "kl": 0.06951904296875, "learning_rate": 1.1466697272767573e-07, "loss": 6.948283407837152e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3935, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.0, "completions/mean_length": 72.28125143051147, "completions/min_length": 26.0, "epoch": 7.824025812856789, "grad_norm": 0.004005422780099686, "kl": 0.0704345703125, "learning_rate": 1.144660104745923e-07, "loss": 7.047977123875171e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3936, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 71.10416889190674, "completions/min_length": 26.75, "epoch": 7.826011417225118, "grad_norm": 0.004455311189684372, "kl": 0.06365966796875, "learning_rate": 1.142652017108252e-07, "loss": 6.372777716023847e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3937, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.375, "completions/mean_length": 71.78125190734863, "completions/min_length": 28.875, "epoch": 7.827997021593448, "grad_norm": 1.2196324186255922, "kl": 0.06695556640625, "learning_rate": 1.1406454651632041e-07, "loss": -0.001656563370488584, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3938, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 71.83333444595337, "completions/min_length": 23.25, "epoch": 7.829982625961777, "grad_norm": 0.004990552170123915, "kl": 0.086395263671875, "learning_rate": 1.1386404497096286e-07, "loss": 8.630442607682198e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3939, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.875, "completions/mean_length": 78.23958492279053, "completions/min_length": 19.375, "epoch": 7.831968230330107, "grad_norm": 0.003390239272871487, "kl": 0.07720947265625, "learning_rate": 1.1366369715457686e-07, "loss": 7.72233324823901e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3940, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.25, "completions/mean_length": 57.81250190734863, "completions/min_length": 17.75, "epoch": 7.833953834698436, "grad_norm": 0.017397658830204277, "kl": 0.0635986328125, "learning_rate": 1.1346350314692483e-07, "loss": 6.358828250085935e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3941, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 64.89583539962769, "completions/min_length": 16.0, "epoch": 7.835939439066766, "grad_norm": 0.003349649973414028, "kl": 0.07232666015625, "learning_rate": 1.1326346302770856e-07, "loss": 7.224710134323686e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3942, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 63.63541841506958, "completions/min_length": 22.375, "epoch": 7.837925043435096, "grad_norm": 1.131790717561578, "kl": 0.08013916015625, "learning_rate": 1.1306357687656803e-07, "loss": 0.00048503652215003967, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3943, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.75, "completions/mean_length": 65.60416793823242, "completions/min_length": 28.5, "epoch": 7.839910647803425, "grad_norm": 0.003771207554082733, "kl": 0.075836181640625, "learning_rate": 1.128638447730823e-07, "loss": 7.567742432001978e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3944, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.25, "completions/mean_length": 68.90625143051147, "completions/min_length": 27.875, "epoch": 7.841896252171755, "grad_norm": 0.006001467102062294, "kl": 0.068817138671875, "learning_rate": 1.1266426679676916e-07, "loss": 6.874018436064944e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3945, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.625, "completions/mean_length": 68.27083587646484, "completions/min_length": 26.125, "epoch": 7.843881856540085, "grad_norm": 0.007147402732750008, "kl": 0.080718994140625, "learning_rate": 1.1246484302708464e-07, "loss": 8.062220149440691e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3946, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 63.395836353302, "completions/min_length": 24.5, "epoch": 7.845867460908414, "grad_norm": 0.004209083040122269, "kl": 0.06805419921875, "learning_rate": 1.1226557354342359e-07, "loss": 6.810553168179467e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3947, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 69.55208587646484, "completions/min_length": 30.75, "epoch": 7.847853065276744, "grad_norm": 0.00288344454114979, "kl": 0.06158447265625, "learning_rate": 1.120664584251197e-07, "loss": 6.161877536214888e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3948, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.625, "completions/mean_length": 70.75000143051147, "completions/min_length": 26.75, "epoch": 7.849838669645074, "grad_norm": 0.003549570276149742, "kl": 0.0755615234375, "learning_rate": 1.1186749775144461e-07, "loss": 7.552633178420365e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3949, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 82.44791889190674, "completions/min_length": 30.375, "epoch": 7.851824274013403, "grad_norm": 0.004586165674970795, "kl": 0.082794189453125, "learning_rate": 1.1166869160160897e-07, "loss": 8.27049880172126e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3950, "train_speed(iter/s)": 0.022659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.125, "completions/mean_length": 70.36458587646484, "completions/min_length": 26.125, "epoch": 7.853809878381733, "grad_norm": 0.005966937217203743, "kl": 0.075042724609375, "learning_rate": 1.114700400547619e-07, "loss": 7.49956670915708e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3951, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.125, "completions/mean_length": 72.65625143051147, "completions/min_length": 30.5, "epoch": 7.855795482750062, "grad_norm": 0.0044505339446736444, "kl": 0.07171630859375, "learning_rate": 1.1127154318999055e-07, "loss": 7.179038948379457e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3952, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.875, "completions/mean_length": 65.35416889190674, "completions/min_length": 23.75, "epoch": 7.857781087118392, "grad_norm": 0.00562006406060834, "kl": 0.06060791015625, "learning_rate": 1.1107320108632107e-07, "loss": 6.057949940441176e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3953, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 70.40625238418579, "completions/min_length": 33.625, "epoch": 7.859766691486721, "grad_norm": 0.004724624752407142, "kl": 0.06048583984375, "learning_rate": 1.1087501382271752e-07, "loss": 6.0438782384153455e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3954, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 62.52083492279053, "completions/min_length": 16.75, "epoch": 7.861752295855051, "grad_norm": 0.005456370823632248, "kl": 0.065673828125, "learning_rate": 1.106769814780823e-07, "loss": 6.565966759808362e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3955, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 66.84375095367432, "completions/min_length": 19.625, "epoch": 7.863737900223381, "grad_norm": 2.084006326342192, "kl": 0.078125, "learning_rate": 1.1047910413125666e-07, "loss": 0.001870767562650144, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3956, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 66.05208492279053, "completions/min_length": 24.375, "epoch": 7.86572350459171, "grad_norm": 0.003694985994740653, "kl": 0.07672119140625, "learning_rate": 1.1028138186101954e-07, "loss": 7.670488412259147e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3957, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 61.72916841506958, "completions/min_length": 22.875, "epoch": 7.86770910896004, "grad_norm": 0.0036277151361194175, "kl": 0.0550537109375, "learning_rate": 1.1008381474608847e-07, "loss": 5.509276161319576e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3958, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 74.23958587646484, "completions/min_length": 27.75, "epoch": 7.86969471332837, "grad_norm": 0.6972247215945936, "kl": 0.068634033203125, "learning_rate": 1.098864028651193e-07, "loss": -0.007476377300918102, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166669771075, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3959, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.375, "completions/mean_length": 73.34375238418579, "completions/min_length": 23.375, "epoch": 7.871680317696699, "grad_norm": 0.9724131018010981, "kl": 0.07489013671875, "learning_rate": 1.0968914629670567e-07, "loss": -0.006799768656492233, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3960, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 60.500001430511475, "completions/min_length": 22.125, "epoch": 7.873665922065029, "grad_norm": 0.0033869019037312174, "kl": 0.06768798828125, "learning_rate": 1.0949204511937987e-07, "loss": 6.76974595990032e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3961, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 64.87500238418579, "completions/min_length": 19.375, "epoch": 7.875651526433359, "grad_norm": 0.0052025305653062804, "kl": 0.069122314453125, "learning_rate": 1.092950994116118e-07, "loss": 6.916293932590634e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3962, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 66.52083492279053, "completions/min_length": 23.125, "epoch": 7.877637130801688, "grad_norm": 0.003574107424793834, "kl": 0.076171875, "learning_rate": 1.0909830925181007e-07, "loss": 7.619446114404127e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3963, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 66.82291841506958, "completions/min_length": 25.375, "epoch": 7.879622735170018, "grad_norm": 0.0046079734417892, "kl": 0.064056396484375, "learning_rate": 1.0890167471832079e-07, "loss": 6.407794717233628e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3964, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.125, "completions/mean_length": 61.89583492279053, "completions/min_length": 21.625, "epoch": 7.881608339538347, "grad_norm": 0.004339507910135874, "kl": 0.05731201171875, "learning_rate": 1.0870519588942839e-07, "loss": 5.7298144383821636e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3965, "train_speed(iter/s)": 0.022658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 68.08333587646484, "completions/min_length": 30.625, "epoch": 7.883593943906677, "grad_norm": 0.0060038038960135, "kl": 0.0738525390625, "learning_rate": 1.0850887284335557e-07, "loss": 7.388897938653827e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3966, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 69.57291889190674, "completions/min_length": 25.875, "epoch": 7.885579548275006, "grad_norm": 0.003203117876970192, "kl": 0.064453125, "learning_rate": 1.0831270565826256e-07, "loss": 6.445555482059717e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3967, "train_speed(iter/s)": 0.022657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 65.40625286102295, "completions/min_length": 26.0, "epoch": 7.887565152643336, "grad_norm": 0.003903757523988194, "kl": 0.0623779296875, "learning_rate": 1.0811669441224747e-07, "loss": 6.240440416149795e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3968, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 61.187501430511475, "completions/min_length": 21.5, "epoch": 7.889550757011666, "grad_norm": 0.005555410627060006, "kl": 0.064208984375, "learning_rate": 1.0792083918334694e-07, "loss": 6.419791316147894e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3969, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.25, "completions/mean_length": 69.69791746139526, "completions/min_length": 26.125, "epoch": 7.891536361379995, "grad_norm": 0.003928206364544882, "kl": 0.0650634765625, "learning_rate": 1.0772514004953482e-07, "loss": 6.50549991405569e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3970, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 71.00000238418579, "completions/min_length": 24.5, "epoch": 7.893521965748325, "grad_norm": 0.0034110524675974307, "kl": 0.099365234375, "learning_rate": 1.0752959708872323e-07, "loss": 9.939714800566435e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3971, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 68.41666793823242, "completions/min_length": 23.25, "epoch": 7.895507570116655, "grad_norm": 0.0037294759588588767, "kl": 0.082122802734375, "learning_rate": 1.0733421037876212e-07, "loss": 8.214589615818113e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3972, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.375, "completions/mean_length": 71.38541889190674, "completions/min_length": 22.625, "epoch": 7.897493174484984, "grad_norm": 0.0034435531961676076, "kl": 0.08465576171875, "learning_rate": 1.0713897999743887e-07, "loss": 8.470690227113664e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3973, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.375, "completions/mean_length": 64.18750143051147, "completions/min_length": 22.0, "epoch": 7.899478778853314, "grad_norm": 0.005370907145198794, "kl": 0.069732666015625, "learning_rate": 1.0694390602247915e-07, "loss": 6.971009133849293e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3974, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.375, "completions/mean_length": 56.98958444595337, "completions/min_length": 22.625, "epoch": 7.901464383221644, "grad_norm": 0.0035612176504184437, "kl": 0.071868896484375, "learning_rate": 1.0674898853154595e-07, "loss": 7.19110103091225e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3975, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 67.11458587646484, "completions/min_length": 21.875, "epoch": 7.903449987589973, "grad_norm": 1.5403271126599327, "kl": 0.06585693359375, "learning_rate": 1.0655422760223991e-07, "loss": -0.0016677286475896835, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3976, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.125, "completions/mean_length": 71.08333539962769, "completions/min_length": 27.875, "epoch": 7.905435591958303, "grad_norm": 0.005724520886709921, "kl": 0.07281494140625, "learning_rate": 1.063596233120997e-07, "loss": 7.272971561178565e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3977, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.875, "completions/mean_length": 59.85416889190674, "completions/min_length": 16.375, "epoch": 7.907421196326632, "grad_norm": 0.004306675059180274, "kl": 0.058319091796875, "learning_rate": 1.061651757386015e-07, "loss": 5.830315058119595e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3978, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.5, "completions/mean_length": 76.71875333786011, "completions/min_length": 24.5, "epoch": 7.909406800694962, "grad_norm": 2.4623701375594593, "kl": 0.09613037109375, "learning_rate": 1.0597088495915885e-07, "loss": 0.0013852929696440697, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3979, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.625, "completions/mean_length": 80.46875286102295, "completions/min_length": 35.5, "epoch": 7.911392405063291, "grad_norm": 1.0930304552102974, "kl": 0.087646484375, "learning_rate": 1.0577675105112327e-07, "loss": 0.0016900020418688655, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3980, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.375, "completions/mean_length": 69.97916913032532, "completions/min_length": 22.375, "epoch": 7.913378009431621, "grad_norm": 0.0045694434191028335, "kl": 0.079986572265625, "learning_rate": 1.055827740917834e-07, "loss": 8.007009455468506e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3981, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.875, "completions/mean_length": 69.35416841506958, "completions/min_length": 22.625, "epoch": 7.915363613799951, "grad_norm": 0.003578187146288709, "kl": 0.063201904296875, "learning_rate": 1.0538895415836586e-07, "loss": 6.32756418781355e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3982, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 70.60416841506958, "completions/min_length": 28.375, "epoch": 7.91734921816828, "grad_norm": 0.005479163859475252, "kl": 0.075531005859375, "learning_rate": 1.0519529132803435e-07, "loss": 7.554507465101779e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3983, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 60.73958492279053, "completions/min_length": 19.875, "epoch": 7.91933482253661, "grad_norm": 0.05137156518902156, "kl": 0.075531005859375, "learning_rate": 1.0500178567788992e-07, "loss": 7.551905582658947e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3984, "train_speed(iter/s)": 0.022656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 67.26041889190674, "completions/min_length": 29.5, "epoch": 7.9213204269049395, "grad_norm": 0.0035733051544297022, "kl": 0.06304931640625, "learning_rate": 1.0480843728497185e-07, "loss": 6.30665963399224e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3985, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 64.04166841506958, "completions/min_length": 23.5, "epoch": 7.923306031273269, "grad_norm": 1.2324147131910126, "kl": 0.08349609375, "learning_rate": 1.04615246226256e-07, "loss": -0.004237992223352194, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.1870916150510311, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3986, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.875, "completions/mean_length": 81.65625190734863, "completions/min_length": 25.375, "epoch": 7.9252916356415986, "grad_norm": 0.0049523810292853, "kl": 0.069549560546875, "learning_rate": 1.0442221257865569e-07, "loss": 6.958723679417744e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3987, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.875, "completions/mean_length": 66.8229193687439, "completions/min_length": 25.375, "epoch": 7.9272772400099285, "grad_norm": 2.1543636817651994, "kl": 0.07806396484375, "learning_rate": 1.0422933641902209e-07, "loss": -0.00914867501705885, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3988, "train_speed(iter/s)": 0.022655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.25, "completions/mean_length": 65.10416793823242, "completions/min_length": 27.0, "epoch": 7.929262844378258, "grad_norm": 0.003563084520555722, "kl": 0.068206787109375, "learning_rate": 1.0403661782414297e-07, "loss": 6.818232213845477e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3989, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.625, "completions/mean_length": 67.31250190734863, "completions/min_length": 27.0, "epoch": 7.9312484487465875, "grad_norm": 0.006284192524869154, "kl": 0.09234619140625, "learning_rate": 1.0384405687074398e-07, "loss": 9.232441516360268e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3990, "train_speed(iter/s)": 0.022654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.5, "completions/mean_length": 65.96875238418579, "completions/min_length": 27.125, "epoch": 7.933234053114917, "grad_norm": 0.004095798525843277, "kl": 0.064422607421875, "learning_rate": 1.0365165363548756e-07, "loss": 6.44034007564187e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3991, "train_speed(iter/s)": 0.022652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 67.58333396911621, "completions/min_length": 21.25, "epoch": 7.9352196574832465, "grad_norm": 0.003729799932089468, "kl": 0.069854736328125, "learning_rate": 1.0345940819497356e-07, "loss": 6.987794040469453e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3992, "train_speed(iter/s)": 0.022651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.625, "completions/mean_length": 61.69791793823242, "completions/min_length": 26.25, "epoch": 7.937205261851576, "grad_norm": 0.0031253009101299216, "kl": 0.06756591796875, "learning_rate": 1.0326732062573928e-07, "loss": 6.76056879456155e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3993, "train_speed(iter/s)": 0.02265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.125, "completions/mean_length": 69.72916841506958, "completions/min_length": 23.25, "epoch": 7.9391908662199056, "grad_norm": 0.003471343815855204, "kl": 0.069488525390625, "learning_rate": 1.0307539100425861e-07, "loss": 6.961461622267962e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3994, "train_speed(iter/s)": 0.022649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 65.21875286102295, "completions/min_length": 25.75, "epoch": 7.9411764705882355, "grad_norm": 0.0030315112973599463, "kl": 0.0889892578125, "learning_rate": 1.028836194069428e-07, "loss": 8.894351776689291e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3995, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 63.85416793823242, "completions/min_length": 24.625, "epoch": 7.943162074956565, "grad_norm": 0.0039174260477899975, "kl": 0.0589599609375, "learning_rate": 1.0269200591014043e-07, "loss": 5.8904166508000344e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3996, "train_speed(iter/s)": 0.022649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.625, "completions/mean_length": 63.18750190734863, "completions/min_length": 19.125, "epoch": 7.9451476793248945, "grad_norm": 0.0034270196269446072, "kl": 0.06951904296875, "learning_rate": 1.0250055059013668e-07, "loss": 6.958996527828276e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3997, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.75, "completions/mean_length": 63.94791841506958, "completions/min_length": 18.125, "epoch": 7.9471332836932245, "grad_norm": 0.005162829840412207, "kl": 0.082916259765625, "learning_rate": 1.0230925352315417e-07, "loss": 8.288287790492177e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3998, "train_speed(iter/s)": 0.022649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 60.65625238418579, "completions/min_length": 22.0, "epoch": 7.9491188880615535, "grad_norm": 0.0032668409913887402, "kl": 0.0660400390625, "learning_rate": 1.0211811478535237e-07, "loss": 6.602262874366716e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3999, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.875, "completions/mean_length": 65.46875190734863, "completions/min_length": 16.375, "epoch": 7.9511044924298835, "grad_norm": 0.0032284745387936202, "kl": 0.0870361328125, "learning_rate": 1.0192713445282758e-07, "loss": 8.700194302946329e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4000, "train_speed(iter/s)": 0.022648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.625, "completions/mean_length": 67.76041889190674, "completions/min_length": 22.75, "epoch": 7.953090096798213, "grad_norm": 0.003204916427357215, "kl": 0.07672119140625, "learning_rate": 1.0173631260161325e-07, "loss": 7.67066449043341e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4001, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 69.27083539962769, "completions/min_length": 24.25, "epoch": 7.9550757011665425, "grad_norm": 1.6446934124304393, "kl": 0.07208251953125, "learning_rate": 1.015456493076795e-07, "loss": -0.015286803245544434, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4002, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.875, "completions/mean_length": 64.739586353302, "completions/min_length": 19.625, "epoch": 7.9570613055348725, "grad_norm": 0.003191960120300276, "kl": 0.089691162109375, "learning_rate": 1.0135514464693367e-07, "loss": 8.963925938587636e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4003, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.375, "completions/mean_length": 51.14583396911621, "completions/min_length": 20.0, "epoch": 7.9590469099032015, "grad_norm": 0.0052657993842108175, "kl": 0.06829833984375, "learning_rate": 1.0116479869521966e-07, "loss": 6.825456512160599e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4004, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.5, "completions/mean_length": 58.50000190734863, "completions/min_length": 20.0, "epoch": 7.9610325142715315, "grad_norm": 0.0049532569589100595, "kl": 0.071990966796875, "learning_rate": 1.0097461152831788e-07, "loss": 7.203160203061998e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4005, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 63.70833444595337, "completions/min_length": 25.875, "epoch": 7.9630181186398605, "grad_norm": 0.0027871873703943484, "kl": 0.073394775390625, "learning_rate": 1.0078458322194656e-07, "loss": 7.327820640057325e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4006, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 71.052086353302, "completions/min_length": 22.875, "epoch": 7.9650037230081905, "grad_norm": 0.003864486813525583, "kl": 0.070159912109375, "learning_rate": 1.0059471385175966e-07, "loss": 7.01984972693026e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4007, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 69.13541841506958, "completions/min_length": 22.25, "epoch": 7.96698932737652, "grad_norm": 0.00489322407329731, "kl": 0.06585693359375, "learning_rate": 1.0040500349334819e-07, "loss": 6.583495996892452e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4008, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.625, "completions/mean_length": 61.73958396911621, "completions/min_length": 21.5, "epoch": 7.9689749317448495, "grad_norm": 0.00307659212544642, "kl": 0.077667236328125, "learning_rate": 1.0021545222224004e-07, "loss": 7.769867079332471e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4009, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 70.75000095367432, "completions/min_length": 26.125, "epoch": 7.9709605361131795, "grad_norm": 0.0031728972314285543, "kl": 0.060546875, "learning_rate": 1.0002606011389947e-07, "loss": 6.048924842616543e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4010, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.0, "completions/mean_length": 74.09375190734863, "completions/min_length": 21.875, "epoch": 7.972946140481509, "grad_norm": 0.0023315589751248846, "kl": 0.063995361328125, "learning_rate": 9.983682724372778e-08, "loss": 6.404868327081203e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4011, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 72.34375143051147, "completions/min_length": 20.875, "epoch": 7.9749317448498385, "grad_norm": 0.003709643454558521, "kl": 0.066192626953125, "learning_rate": 9.964775368706224e-08, "loss": 6.613855657633394e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4012, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 70.5729193687439, "completions/min_length": 25.75, "epoch": 7.976917349218168, "grad_norm": 0.0028197719205080457, "kl": 0.06787109375, "learning_rate": 9.945883951917733e-08, "loss": 6.784204015275463e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4013, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 70.70833539962769, "completions/min_length": 22.25, "epoch": 7.978902953586498, "grad_norm": 0.0028342742453965997, "kl": 0.063629150390625, "learning_rate": 9.927008481528393e-08, "loss": 6.357110396493226e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4014, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.625, "completions/mean_length": 69.28125286102295, "completions/min_length": 23.5, "epoch": 7.980888557954827, "grad_norm": 0.0030782556058395706, "kl": 0.06329345703125, "learning_rate": 9.90814896505291e-08, "loss": 6.321975524770096e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4015, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 72.58333587646484, "completions/min_length": 26.75, "epoch": 7.982874162323157, "grad_norm": 0.0033123396647415223, "kl": 0.07421875, "learning_rate": 9.889305409999655e-08, "loss": 7.412924605887383e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4016, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 63.29166793823242, "completions/min_length": 27.125, "epoch": 7.9848597666914864, "grad_norm": 0.0029834375885894747, "kl": 0.0618896484375, "learning_rate": 9.870477823870676e-08, "loss": 6.177874456625432e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4017, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.125, "completions/mean_length": 61.00000190734863, "completions/min_length": 26.125, "epoch": 7.986845371059816, "grad_norm": 3.940874805059957, "kl": 0.097564697265625, "learning_rate": 9.851666214161609e-08, "loss": -0.007386643439531326, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4018, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.875, "completions/mean_length": 68.56250190734863, "completions/min_length": 25.0, "epoch": 7.9888309754281455, "grad_norm": 0.009082513953037507, "kl": 0.0775146484375, "learning_rate": 9.832870588361769e-08, "loss": 7.744548929622397e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4019, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 65.3854193687439, "completions/min_length": 29.25, "epoch": 7.990816579796475, "grad_norm": 0.006155403665751887, "kl": 0.077850341796875, "learning_rate": 9.814090953954107e-08, "loss": 7.795381679898128e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4020, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 71.09375238418579, "completions/min_length": 26.875, "epoch": 7.992802184164805, "grad_norm": 0.002904945499749162, "kl": 0.06842041015625, "learning_rate": 9.79532731841518e-08, "loss": 6.833539373474196e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4021, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 66.57291841506958, "completions/min_length": 22.75, "epoch": 7.994787788533134, "grad_norm": 0.0028966892993152315, "kl": 0.055084228515625, "learning_rate": 9.776579689215208e-08, "loss": 5.5054631957318634e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4022, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.375, "completions/mean_length": 74.11458587646484, "completions/min_length": 24.375, "epoch": 7.996773392901464, "grad_norm": 0.002860955525261995, "kl": 0.0621337890625, "learning_rate": 9.75784807381802e-08, "loss": 6.212804146343842e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4023, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 63.58333396911621, "completions/min_length": 26.25, "epoch": 7.998758997269794, "grad_norm": 1.7040984097229772, "kl": 0.071533203125, "learning_rate": 9.73913247968105e-08, "loss": -0.0030699982307851315, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4024, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.0, "completions/mean_length": 61.06250190734863, "completions/min_length": 21.875, "epoch": 8.001985604368329, "grad_norm": 0.00317775556564026, "kl": 0.064727783203125, "learning_rate": 9.720432914255405e-08, "loss": 6.46327025606297e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4025, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 73.7604193687439, "completions/min_length": 25.125, "epoch": 8.00397120873666, "grad_norm": 0.003873065301164897, "kl": 0.061187744140625, "learning_rate": 9.701749384985753e-08, "loss": 6.116794247645885e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4026, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 71.71875190734863, "completions/min_length": 20.875, "epoch": 8.005956813104989, "grad_norm": 0.0031089829157041247, "kl": 0.1026611328125, "learning_rate": 9.683081899310425e-08, "loss": 0.00010262842988595366, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4027, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 67.40625190734863, "completions/min_length": 23.375, "epoch": 8.007942417473318, "grad_norm": 0.00313588910603342, "kl": 0.07672119140625, "learning_rate": 9.664430464661355e-08, "loss": 7.669499609619379e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4028, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.625, "completions/mean_length": 70.27083444595337, "completions/min_length": 22.625, "epoch": 8.009928021841649, "grad_norm": 0.0029826969212667326, "kl": 0.079864501953125, "learning_rate": 9.645795088464049e-08, "loss": 7.972571620484814e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4029, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 71.21875286102295, "completions/min_length": 22.625, "epoch": 8.011913626209978, "grad_norm": 0.0037555325614692476, "kl": 0.085418701171875, "learning_rate": 9.627175778137681e-08, "loss": 8.544397132936865e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4030, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 63.64583492279053, "completions/min_length": 20.75, "epoch": 8.013899230578307, "grad_norm": 0.0029392915911424402, "kl": 0.054046630859375, "learning_rate": 9.608572541094979e-08, "loss": 5.404389230534434e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4031, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 68.39583587646484, "completions/min_length": 19.625, "epoch": 8.015884834946636, "grad_norm": 0.0032770770532584326, "kl": 0.0848388671875, "learning_rate": 9.589985384742272e-08, "loss": 8.481745317112654e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4032, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 57.63541793823242, "completions/min_length": 23.25, "epoch": 8.017870439314967, "grad_norm": 0.03650768508430622, "kl": 0.13665771484375, "learning_rate": 9.571414316479526e-08, "loss": 0.00013677947572432458, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4033, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 68.85416841506958, "completions/min_length": 22.0, "epoch": 8.019856043683296, "grad_norm": 0.002998717115668105, "kl": 0.054656982421875, "learning_rate": 9.552859343700287e-08, "loss": 5.469063762575388e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4034, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.375, "completions/mean_length": 62.156251430511475, "completions/min_length": 23.25, "epoch": 8.021841648051625, "grad_norm": 2.107848497134729, "kl": 0.073486328125, "learning_rate": 9.534320473791657e-08, "loss": -0.007579359225928783, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4035, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.125, "completions/mean_length": 74.17708492279053, "completions/min_length": 27.75, "epoch": 8.023827252419956, "grad_norm": 0.00395081026216689, "kl": 0.0684814453125, "learning_rate": 9.515797714134388e-08, "loss": 6.839950219728053e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4036, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 66.01041841506958, "completions/min_length": 21.5, "epoch": 8.025812856788285, "grad_norm": 0.0026239163575432797, "kl": 0.055023193359375, "learning_rate": 9.497291072102764e-08, "loss": 5.510847404366359e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4037, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 75.11458587646484, "completions/min_length": 30.375, "epoch": 8.027798461156614, "grad_norm": 0.003687340491071612, "kl": 0.066009521484375, "learning_rate": 9.478800555064693e-08, "loss": 6.606719398405403e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4038, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/mean_length": 55.46875190734863, "completions/min_length": 24.5, "epoch": 8.029784065524945, "grad_norm": 0.005124023417827816, "kl": 0.096099853515625, "learning_rate": 9.460326170381616e-08, "loss": 9.604774822946638e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4039, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.625, "completions/mean_length": 50.312501668930054, "completions/min_length": 20.5, "epoch": 8.031769669893274, "grad_norm": 0.004241864820696868, "kl": 0.072723388671875, "learning_rate": 9.441867925408603e-08, "loss": 7.272530638147146e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4040, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.5, "completions/mean_length": 72.81250095367432, "completions/min_length": 30.75, "epoch": 8.033755274261603, "grad_norm": 0.003914396390815221, "kl": 0.087921142578125, "learning_rate": 9.42342582749428e-08, "loss": 8.797112241154537e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4041, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.375, "completions/mean_length": 79.66666984558105, "completions/min_length": 32.25, "epoch": 8.035740878629934, "grad_norm": 0.0027636424029366035, "kl": 0.07147216796875, "learning_rate": 9.404999883980818e-08, "loss": 7.146518328227103e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4042, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.75, "completions/mean_length": 65.76041889190674, "completions/min_length": 21.5, "epoch": 8.037726482998263, "grad_norm": 0.0034206465232461828, "kl": 0.0628662109375, "learning_rate": 9.386590102204006e-08, "loss": 6.285082054091617e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4043, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.5, "completions/mean_length": 62.67708492279053, "completions/min_length": 21.75, "epoch": 8.039712087366592, "grad_norm": 0.0030202910660872668, "kl": 0.062347412109375, "learning_rate": 9.368196489493158e-08, "loss": 6.229724385775626e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4044, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 65.21875190734863, "completions/min_length": 29.625, "epoch": 8.041697691734921, "grad_norm": 0.004281372973099747, "kl": 0.061248779296875, "learning_rate": 9.349819053171143e-08, "loss": 6.11817158642225e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4045, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 65.57291889190674, "completions/min_length": 22.375, "epoch": 8.043683296103252, "grad_norm": 0.004369602290796568, "kl": 0.073211669921875, "learning_rate": 9.331457800554438e-08, "loss": 7.32242115191184e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4046, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 69.47916793823242, "completions/min_length": 24.25, "epoch": 8.045668900471581, "grad_norm": 0.003263658173740175, "kl": 0.071929931640625, "learning_rate": 9.31311273895305e-08, "loss": 7.192780321929604e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4047, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 60.05208444595337, "completions/min_length": 21.75, "epoch": 8.04765450483991, "grad_norm": 0.0026106636178048507, "kl": 0.056884765625, "learning_rate": 9.294783875670525e-08, "loss": 5.689482713933103e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4048, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 71.78125143051147, "completions/min_length": 26.875, "epoch": 8.04964010920824, "grad_norm": 0.003861870313570463, "kl": 0.069244384765625, "learning_rate": 9.27647121800399e-08, "loss": 6.936269346624613e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4049, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.875, "completions/mean_length": 60.33333492279053, "completions/min_length": 25.375, "epoch": 8.05162571357657, "grad_norm": 0.006238776573855147, "kl": 0.071929931640625, "learning_rate": 9.258174773244087e-08, "loss": 7.195457874331623e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4050, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 72.94791841506958, "completions/min_length": 24.875, "epoch": 8.053611317944899, "grad_norm": 0.6090824620184125, "kl": 0.067230224609375, "learning_rate": 9.239894548675048e-08, "loss": 0.0017213996034115553, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4051, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 60.39583492279053, "completions/min_length": 26.625, "epoch": 8.05559692231323, "grad_norm": 0.005101422124545626, "kl": 0.10040283203125, "learning_rate": 9.221630551574599e-08, "loss": 0.00010034696606453508, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4052, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 73.37500143051147, "completions/min_length": 24.375, "epoch": 8.057582526681559, "grad_norm": 0.002804186407576077, "kl": 0.069610595703125, "learning_rate": 9.20338278921401e-08, "loss": 6.949788075871766e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4053, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 59.437501430511475, "completions/min_length": 23.875, "epoch": 8.059568131049888, "grad_norm": 0.00411066377930944, "kl": 0.0697021484375, "learning_rate": 9.185151268858155e-08, "loss": 6.966612272663042e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4054, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.5, "completions/mean_length": 72.73958492279053, "completions/min_length": 26.75, "epoch": 8.061553735418219, "grad_norm": 0.004542473033607802, "kl": 0.074981689453125, "learning_rate": 9.166935997765362e-08, "loss": 7.499718776671216e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4055, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 75.08333492279053, "completions/min_length": 27.0, "epoch": 8.063539339786548, "grad_norm": 1.1801106984366552, "kl": 0.093902587890625, "learning_rate": 9.148736983187517e-08, "loss": 0.014648554846644402, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4056, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 68.11458492279053, "completions/min_length": 35.0, "epoch": 8.065524944154877, "grad_norm": 0.00396779478542917, "kl": 0.060150146484375, "learning_rate": 9.130554232370047e-08, "loss": 6.0100439441157505e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4057, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 65.14583444595337, "completions/min_length": 25.5, "epoch": 8.067510548523206, "grad_norm": 1.2509881699243472, "kl": 0.0645751953125, "learning_rate": 9.112387752551876e-08, "loss": 0.02153899148106575, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4058, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 62.97916793823242, "completions/min_length": 24.875, "epoch": 8.069496152891537, "grad_norm": 0.003362197664843273, "kl": 0.072052001953125, "learning_rate": 9.094237550965494e-08, "loss": 7.204393477877602e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4059, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 58.13541841506958, "completions/min_length": 18.0, "epoch": 8.071481757259866, "grad_norm": 0.003201434858193056, "kl": 0.0660400390625, "learning_rate": 9.076103634836857e-08, "loss": 6.600699271075428e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4060, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.375, "completions/mean_length": 75.54166841506958, "completions/min_length": 29.0, "epoch": 8.073467361628195, "grad_norm": 0.002389214077048698, "kl": 0.066497802734375, "learning_rate": 9.057986011385477e-08, "loss": 6.653812306467444e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4061, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 64.88541793823242, "completions/min_length": 27.0, "epoch": 8.075452965996526, "grad_norm": 0.004664337703180663, "kl": 0.053863525390625, "learning_rate": 9.039884687824383e-08, "loss": 5.383374082157388e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4062, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 68.28125286102295, "completions/min_length": 21.875, "epoch": 8.077438570364855, "grad_norm": 0.0037005413346408054, "kl": 0.082000732421875, "learning_rate": 9.02179967136008e-08, "loss": 8.186304330592975e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4063, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 71.80208492279053, "completions/min_length": 25.5, "epoch": 8.079424174733184, "grad_norm": 0.003275447530899798, "kl": 0.0560302734375, "learning_rate": 9.003730969192586e-08, "loss": 5.599199721473269e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4064, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 60.09375190734863, "completions/min_length": 19.375, "epoch": 8.081409779101515, "grad_norm": 0.005609424848859443, "kl": 0.068939208984375, "learning_rate": 8.985678588515472e-08, "loss": 6.901115557411686e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4065, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.125, "completions/mean_length": 69.03125095367432, "completions/min_length": 24.75, "epoch": 8.083395383469844, "grad_norm": 0.0027454080180372925, "kl": 0.06927490234375, "learning_rate": 8.967642536515741e-08, "loss": 6.925908382982016e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4066, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 64.20833444595337, "completions/min_length": 18.375, "epoch": 8.085380987838173, "grad_norm": 0.004700629423002741, "kl": 0.0675048828125, "learning_rate": 8.949622820373947e-08, "loss": 6.75107876304537e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4067, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 74.00000333786011, "completions/min_length": 27.0, "epoch": 8.087366592206504, "grad_norm": 0.003273976470075609, "kl": 0.071014404296875, "learning_rate": 8.931619447264139e-08, "loss": 7.101658411556855e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4068, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.375, "completions/mean_length": 60.59375190734863, "completions/min_length": 22.375, "epoch": 8.089352196574833, "grad_norm": 0.004178098590463913, "kl": 0.072509765625, "learning_rate": 8.913632424353811e-08, "loss": 7.248263864312321e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4069, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 66.14583539962769, "completions/min_length": 30.25, "epoch": 8.091337800943162, "grad_norm": 0.002955209261232975, "kl": 0.05487060546875, "learning_rate": 8.895661758804019e-08, "loss": 5.485026849783026e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4070, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.0, "completions/mean_length": 70.47916889190674, "completions/min_length": 27.375, "epoch": 8.09332340531149, "grad_norm": 0.0036654771903251167, "kl": 0.061492919921875, "learning_rate": 8.877707457769246e-08, "loss": 6.141657650005072e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4071, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.5, "completions/mean_length": 58.61458444595337, "completions/min_length": 18.625, "epoch": 8.095309009679822, "grad_norm": 0.003092474652028288, "kl": 0.069610595703125, "learning_rate": 8.85976952839747e-08, "loss": 6.964871863601729e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4072, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.875, "completions/mean_length": 62.7291693687439, "completions/min_length": 24.5, "epoch": 8.09729461404815, "grad_norm": 0.0031028527747228654, "kl": 0.073638916015625, "learning_rate": 8.841847977830196e-08, "loss": 7.363753684330732e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4073, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 64.21875095367432, "completions/min_length": 28.25, "epoch": 8.09928021841648, "grad_norm": 0.0027384040201808833, "kl": 0.05816650390625, "learning_rate": 8.823942813202351e-08, "loss": 5.811927258037031e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4074, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 68.12500190734863, "completions/min_length": 21.875, "epoch": 8.10126582278481, "grad_norm": 0.0029689076591138033, "kl": 0.08026123046875, "learning_rate": 8.806054041642364e-08, "loss": 8.026678551686928e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4075, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.125, "completions/mean_length": 76.92708587646484, "completions/min_length": 22.625, "epoch": 8.10325142715314, "grad_norm": 0.004230721993249369, "kl": 0.07611083984375, "learning_rate": 8.788181670272165e-08, "loss": 7.606188592035323e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4076, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.75, "completions/mean_length": 58.177085876464844, "completions/min_length": 17.375, "epoch": 8.105237031521469, "grad_norm": 0.820822794801226, "kl": 0.0841064453125, "learning_rate": 8.770325706207087e-08, "loss": 0.0140147116035223, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4077, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 63.27083396911621, "completions/min_length": 26.25, "epoch": 8.1072226358898, "grad_norm": 0.006041376137169389, "kl": 0.081451416015625, "learning_rate": 8.752486156556011e-08, "loss": 8.147205517161638e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4078, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 64.76041889190674, "completions/min_length": 19.875, "epoch": 8.109208240258129, "grad_norm": 0.0035371449929289733, "kl": 0.05908203125, "learning_rate": 8.734663028421207e-08, "loss": 5.902050179429352e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4079, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.875, "completions/mean_length": 71.91666889190674, "completions/min_length": 28.5, "epoch": 8.111193844626458, "grad_norm": 0.002545608884861, "kl": 0.07098388671875, "learning_rate": 8.71685632889847e-08, "loss": 7.094950706232339e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4080, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.875, "completions/mean_length": 65.61458492279053, "completions/min_length": 24.0, "epoch": 8.113179448994789, "grad_norm": 0.004816686634404716, "kl": 0.069732666015625, "learning_rate": 8.699066065077004e-08, "loss": 6.979263707762584e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4081, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.875, "completions/mean_length": 76.25000286102295, "completions/min_length": 29.25, "epoch": 8.115165053363118, "grad_norm": 0.0034794009648493143, "kl": 0.0733642578125, "learning_rate": 8.681292244039507e-08, "loss": 7.324795296881348e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4082, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 68.59375286102295, "completions/min_length": 25.0, "epoch": 8.117150657731447, "grad_norm": 0.0025915006659035476, "kl": 0.05963134765625, "learning_rate": 8.663534872862127e-08, "loss": 5.96054524066858e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4083, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 65.46875238418579, "completions/min_length": 25.25, "epoch": 8.119136262099776, "grad_norm": 0.0036968277020846033, "kl": 0.073486328125, "learning_rate": 8.645793958614439e-08, "loss": 7.34370551072061e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4084, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.25, "completions/mean_length": 73.56250190734863, "completions/min_length": 24.625, "epoch": 8.121121866468107, "grad_norm": 1.0190478048282317, "kl": 0.06744384765625, "learning_rate": 8.62806950835947e-08, "loss": -0.006059292703866959, "memory(GiB)": 94.21, "reward": 1.7083333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.32266222313046455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4085, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/mean_length": 55.70833492279053, "completions/min_length": 16.375, "epoch": 8.123107470836436, "grad_norm": 0.005860968019304921, "kl": 0.064727783203125, "learning_rate": 8.610361529153721e-08, "loss": 6.472819222835824e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4086, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.125, "completions/mean_length": 66.60416793823242, "completions/min_length": 20.625, "epoch": 8.125093075204765, "grad_norm": 0.0038769452834917424, "kl": 0.076568603515625, "learning_rate": 8.592670028047105e-08, "loss": 7.65736767789349e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4087, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.375, "completions/mean_length": 66.17708539962769, "completions/min_length": 20.5, "epoch": 8.127078679573096, "grad_norm": 0.0037981058310298777, "kl": 0.072662353515625, "learning_rate": 8.574995012082986e-08, "loss": 7.259578705998138e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4088, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 65.36458587646484, "completions/min_length": 19.25, "epoch": 8.129064283941425, "grad_norm": 0.0032027048390117097, "kl": 0.0709228515625, "learning_rate": 8.557336488298184e-08, "loss": 7.099266804289073e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4089, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.625, "completions/mean_length": 58.63541793823242, "completions/min_length": 19.375, "epoch": 8.131049888309754, "grad_norm": 0.0035778668017920966, "kl": 0.08074951171875, "learning_rate": 8.539694463722907e-08, "loss": 8.079313556663692e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4090, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.125, "completions/mean_length": 74.66666984558105, "completions/min_length": 29.375, "epoch": 8.133035492678085, "grad_norm": 0.0037775964220259457, "kl": 0.06561279296875, "learning_rate": 8.52206894538085e-08, "loss": 6.557812594110146e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4091, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.5, "completions/mean_length": 70.48958444595337, "completions/min_length": 21.625, "epoch": 8.135021097046414, "grad_norm": 0.006361624664001505, "kl": 0.0615234375, "learning_rate": 8.504459940289094e-08, "loss": 6.153984577395022e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4092, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 64.67708539962769, "completions/min_length": 20.875, "epoch": 8.137006701414743, "grad_norm": 1.3408310523189166, "kl": 0.133636474609375, "learning_rate": 8.486867455458147e-08, "loss": -0.003510394599288702, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166669771075, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4093, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.625, "completions/mean_length": 76.08333587646484, "completions/min_length": 21.75, "epoch": 8.138992305783074, "grad_norm": 0.0038748184402661455, "kl": 0.07562255859375, "learning_rate": 8.469291497891978e-08, "loss": 7.560574886156246e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4094, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.25, "completions/mean_length": 62.083335399627686, "completions/min_length": 21.625, "epoch": 8.140977910151403, "grad_norm": 0.012435054507674568, "kl": 0.076202392578125, "learning_rate": 8.451732074587925e-08, "loss": 7.620621181558818e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4095, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.125, "completions/mean_length": 60.375000953674316, "completions/min_length": 19.625, "epoch": 8.142963514519732, "grad_norm": 0.01043506512653113, "kl": 0.09088134765625, "learning_rate": 8.434189192536784e-08, "loss": 9.082769247470424e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4096, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 63.06250286102295, "completions/min_length": 22.625, "epoch": 8.14494911888806, "grad_norm": 0.0039158193857139045, "kl": 0.0780029296875, "learning_rate": 8.416662858722767e-08, "loss": 7.798105070833117e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4097, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.375, "completions/mean_length": 71.55208587646484, "completions/min_length": 31.125, "epoch": 8.146934723256392, "grad_norm": 0.008277865993363477, "kl": 0.081756591796875, "learning_rate": 8.39915308012345e-08, "loss": 8.176160918083042e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4098, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.125, "completions/mean_length": 73.51041793823242, "completions/min_length": 32.375, "epoch": 8.14892032762472, "grad_norm": 1.6377928806517925, "kl": 0.091583251953125, "learning_rate": 8.381659863709878e-08, "loss": 0.002245645970106125, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166669771075, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4099, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.375, "completions/mean_length": 66.05208444595337, "completions/min_length": 21.75, "epoch": 8.15090593199305, "grad_norm": 0.0029685574784494456, "kl": 0.073638916015625, "learning_rate": 8.364183216446463e-08, "loss": 7.353271939791739e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4100, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 65.01041889190674, "completions/min_length": 22.625, "epoch": 8.15289153636138, "grad_norm": 0.003499743460911753, "kl": 0.094970703125, "learning_rate": 8.346723145291012e-08, "loss": 9.492230310570449e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4101, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.625, "completions/mean_length": 63.51041793823242, "completions/min_length": 23.875, "epoch": 8.15487714072971, "grad_norm": 0.006329133950969244, "kl": 0.084197998046875, "learning_rate": 8.329279657194777e-08, "loss": 8.425174746662378e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4102, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.375, "completions/mean_length": 67.13541889190674, "completions/min_length": 21.0, "epoch": 8.156862745098039, "grad_norm": 0.007857997723493278, "kl": 0.071685791015625, "learning_rate": 8.311852759102384e-08, "loss": 7.163742702687159e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4103, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.125, "completions/mean_length": 66.25000286102295, "completions/min_length": 22.75, "epoch": 8.15884834946637, "grad_norm": 0.006423835602411644, "kl": 0.08184814453125, "learning_rate": 8.294442457951839e-08, "loss": 8.184979378711432e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4104, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 67.34375143051147, "completions/min_length": 22.25, "epoch": 8.160833953834699, "grad_norm": 0.004043087551216295, "kl": 0.0616455078125, "learning_rate": 8.277048760674571e-08, "loss": 6.158360338304192e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4105, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 70.33333587646484, "completions/min_length": 26.25, "epoch": 8.162819558203028, "grad_norm": 0.0027480908783146405, "kl": 0.07806396484375, "learning_rate": 8.259671674195357e-08, "loss": 7.805389032000676e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4106, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.625, "completions/mean_length": 75.4166693687439, "completions/min_length": 23.25, "epoch": 8.164805162571358, "grad_norm": 0.003731988327493677, "kl": 0.078857421875, "learning_rate": 8.242311205432417e-08, "loss": 7.878868200350553e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4107, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 62.89583492279053, "completions/min_length": 20.375, "epoch": 8.166790766939688, "grad_norm": 0.02846207143304965, "kl": 0.099334716796875, "learning_rate": 8.224967361297313e-08, "loss": 9.936468995874748e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4108, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 69.96875190734863, "completions/min_length": 24.125, "epoch": 8.168776371308017, "grad_norm": 0.011059952324884666, "kl": 0.0650634765625, "learning_rate": 8.207640148694966e-08, "loss": 6.504629709525034e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4109, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.75, "completions/mean_length": 69.1979193687439, "completions/min_length": 28.5, "epoch": 8.170761975676346, "grad_norm": 0.003633069098050467, "kl": 0.059112548828125, "learning_rate": 8.190329574523769e-08, "loss": 5.9063844673801214e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4110, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.75, "completions/mean_length": 71.98958444595337, "completions/min_length": 22.25, "epoch": 8.172747580044676, "grad_norm": 0.005319457264759278, "kl": 0.08221435546875, "learning_rate": 8.173035645675402e-08, "loss": 8.227470971178263e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4111, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.5, "completions/mean_length": 73.16666793823242, "completions/min_length": 23.5, "epoch": 8.174733184413006, "grad_norm": 0.0031280795768653755, "kl": 0.07427978515625, "learning_rate": 8.155758369034931e-08, "loss": 7.426535012200475e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4112, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.375, "completions/mean_length": 66.41666793823242, "completions/min_length": 16.375, "epoch": 8.176718788781335, "grad_norm": 1.5545793203686245, "kl": 0.075897216796875, "learning_rate": 8.138497751480843e-08, "loss": -0.00784082617610693, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4113, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 69.73958492279053, "completions/min_length": 23.25, "epoch": 8.178704393149665, "grad_norm": 0.0032630133977261198, "kl": 0.0810546875, "learning_rate": 8.121253799884925e-08, "loss": 8.110299677355215e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4114, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 65.89583587646484, "completions/min_length": 27.25, "epoch": 8.180689997517995, "grad_norm": 0.008743879651911703, "kl": 0.075897216796875, "learning_rate": 8.104026521112383e-08, "loss": 7.593727787025273e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4115, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.125, "completions/mean_length": 74.89583492279053, "completions/min_length": 29.625, "epoch": 8.182675601886324, "grad_norm": 0.0029907263768583067, "kl": 0.065582275390625, "learning_rate": 8.086815922021773e-08, "loss": 6.562257476616651e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4116, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 75.56250238418579, "completions/min_length": 33.25, "epoch": 8.184661206254654, "grad_norm": 0.0028489223577943357, "kl": 0.060882568359375, "learning_rate": 8.069622009464971e-08, "loss": 6.0922317061340436e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4117, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.875, "completions/mean_length": 63.937501430511475, "completions/min_length": 20.875, "epoch": 8.186646810622983, "grad_norm": 0.0030751870741928377, "kl": 0.074798583984375, "learning_rate": 8.052444790287277e-08, "loss": 7.484326488338411e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4118, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.75, "completions/mean_length": 64.85416746139526, "completions/min_length": 25.5, "epoch": 8.188632414991313, "grad_norm": 1.1219008008134526, "kl": 0.068817138671875, "learning_rate": 8.035284271327275e-08, "loss": -0.0020768556278198957, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4119, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 67.40625286102295, "completions/min_length": 26.375, "epoch": 8.190618019359643, "grad_norm": 0.0034370151587727283, "kl": 0.0684814453125, "learning_rate": 8.018140459416961e-08, "loss": 6.846078031230718e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4120, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.875, "completions/mean_length": 62.57291793823242, "completions/min_length": 19.625, "epoch": 8.192603623727972, "grad_norm": 0.004910802271437788, "kl": 0.06805419921875, "learning_rate": 8.001013361381647e-08, "loss": 6.797845708206296e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4121, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 64.06250190734863, "completions/min_length": 23.875, "epoch": 8.194589228096302, "grad_norm": 0.0030304360408127838, "kl": 0.069976806640625, "learning_rate": 7.983902984039964e-08, "loss": 6.999982724664733e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4122, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 74.27083587646484, "completions/min_length": 23.0, "epoch": 8.19657483246463, "grad_norm": 0.0029410458394492347, "kl": 0.064727783203125, "learning_rate": 7.966809334203973e-08, "loss": 6.478787690866739e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4123, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 71.06250190734863, "completions/min_length": 28.625, "epoch": 8.198560436832961, "grad_norm": 1.0115103998029777, "kl": 0.0679931640625, "learning_rate": 7.949732418678989e-08, "loss": -0.004359547048807144, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4124, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.625, "completions/mean_length": 71.25000095367432, "completions/min_length": 26.75, "epoch": 8.20054604120129, "grad_norm": 0.006876665635144105, "kl": 0.06024169921875, "learning_rate": 7.932672244263694e-08, "loss": 6.0246908105909824e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4125, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.625, "completions/mean_length": 57.38541841506958, "completions/min_length": 20.625, "epoch": 8.20253164556962, "grad_norm": 0.0025990072756236693, "kl": 0.07000732421875, "learning_rate": 7.915628817750126e-08, "loss": 7.009755790932104e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4126, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 64.22916841506958, "completions/min_length": 18.375, "epoch": 8.20451724993795, "grad_norm": 0.003151522712492612, "kl": 0.06591796875, "learning_rate": 7.898602145923616e-08, "loss": 6.577809836016968e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4127, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 66.19791793823242, "completions/min_length": 27.375, "epoch": 8.20650285430628, "grad_norm": 0.00721878819101889, "kl": 0.08380126953125, "learning_rate": 7.881592235562867e-08, "loss": 8.370440627913922e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4128, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.375, "completions/mean_length": 69.39583492279053, "completions/min_length": 27.625, "epoch": 8.208488458674609, "grad_norm": 0.0027359970759121316, "kl": 0.087371826171875, "learning_rate": 7.864599093439867e-08, "loss": 8.735234587220475e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4129, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.875, "completions/mean_length": 70.46875190734863, "completions/min_length": 23.0, "epoch": 8.21047406304294, "grad_norm": 0.003015899280429733, "kl": 0.068267822265625, "learning_rate": 7.847622726319963e-08, "loss": 6.83972320985049e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4130, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.625, "completions/mean_length": 53.61458444595337, "completions/min_length": 19.5, "epoch": 8.212459667411268, "grad_norm": 0.002594072173009858, "kl": 0.05609130859375, "learning_rate": 7.830663140961813e-08, "loss": 5.607612183666788e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4131, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.25, "completions/mean_length": 61.60416793823242, "completions/min_length": 22.875, "epoch": 8.214445271779597, "grad_norm": 0.0031819551447294482, "kl": 0.050537109375, "learning_rate": 7.81372034411738e-08, "loss": 5.0561644457047805e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4132, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 66.36458587646484, "completions/min_length": 22.625, "epoch": 8.216430876147928, "grad_norm": 0.0031428361418349773, "kl": 0.075103759765625, "learning_rate": 7.796794342531948e-08, "loss": 7.505275425501168e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4133, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 64.83333587646484, "completions/min_length": 20.875, "epoch": 8.218416480516257, "grad_norm": 0.003842512834826663, "kl": 0.069549560546875, "learning_rate": 7.779885142944143e-08, "loss": 6.952918192837387e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4134, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 64.05208492279053, "completions/min_length": 22.125, "epoch": 8.220402084884586, "grad_norm": 0.003592286326326378, "kl": 0.05181884765625, "learning_rate": 7.762992752085845e-08, "loss": 5.182556196814403e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4135, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.125, "completions/mean_length": 73.30208539962769, "completions/min_length": 28.125, "epoch": 8.222387689252916, "grad_norm": 0.004092355370945299, "kl": 0.096466064453125, "learning_rate": 7.746117176682298e-08, "loss": 9.637869516154751e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4136, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.875, "completions/mean_length": 73.91666889190674, "completions/min_length": 21.0, "epoch": 8.224373293621246, "grad_norm": 0.004276666411209176, "kl": 0.06622314453125, "learning_rate": 7.729258423452034e-08, "loss": 6.61782396491617e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4137, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 70.46875238418579, "completions/min_length": 23.875, "epoch": 8.226358897989575, "grad_norm": 0.0034572716650727886, "kl": 0.059356689453125, "learning_rate": 7.712416499106866e-08, "loss": 5.928528480581008e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4138, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.0, "completions/mean_length": 58.593751430511475, "completions/min_length": 17.75, "epoch": 8.228344502357904, "grad_norm": 0.005875647132462992, "kl": 0.073455810546875, "learning_rate": 7.695591410351937e-08, "loss": 7.339852163568139e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4139, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 74.95833587646484, "completions/min_length": 26.25, "epoch": 8.230330106726235, "grad_norm": 0.005229200179778609, "kl": 0.07354736328125, "learning_rate": 7.678783163885677e-08, "loss": 7.35757130314596e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4140, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 69.36458587646484, "completions/min_length": 22.5, "epoch": 8.232315711094564, "grad_norm": 0.011158680805305942, "kl": 0.0693359375, "learning_rate": 7.661991766399783e-08, "loss": 6.93343099555932e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4141, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.75, "completions/mean_length": 79.71875286102295, "completions/min_length": 27.375, "epoch": 8.234301315462893, "grad_norm": 0.0032093801197590932, "kl": 0.072296142578125, "learning_rate": 7.645217224579298e-08, "loss": 7.238502439577132e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4142, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.25, "completions/mean_length": 79.03125190734863, "completions/min_length": 33.625, "epoch": 8.236286919831224, "grad_norm": 0.0027834144916026836, "kl": 0.0782470703125, "learning_rate": 7.6284595451025e-08, "loss": 7.819591701263562e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4143, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 68.54166984558105, "completions/min_length": 31.75, "epoch": 8.238272524199553, "grad_norm": 0.0054449676195110755, "kl": 0.0589599609375, "learning_rate": 7.611718734640992e-08, "loss": 5.8841393183683977e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4144, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 66.94791841506958, "completions/min_length": 18.625, "epoch": 8.240258128567882, "grad_norm": 0.0028408136476142138, "kl": 0.051544189453125, "learning_rate": 7.594994799859661e-08, "loss": 5.152905941940844e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4145, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.875, "completions/mean_length": 77.87500190734863, "completions/min_length": 22.25, "epoch": 8.242243732936213, "grad_norm": 0.003269737092685624, "kl": 0.063323974609375, "learning_rate": 7.57828774741664e-08, "loss": 6.335931539069861e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4146, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.125, "completions/mean_length": 63.625001430511475, "completions/min_length": 20.75, "epoch": 8.244229337304542, "grad_norm": 0.0034545693166952575, "kl": 0.0953369140625, "learning_rate": 7.561597583963386e-08, "loss": 9.530247189104557e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4147, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 71.53125143051147, "completions/min_length": 27.875, "epoch": 8.246214941672871, "grad_norm": 1.8560897717679077, "kl": 0.069244384765625, "learning_rate": 7.544924316144596e-08, "loss": 0.001715024933218956, "memory(GiB)": 94.21, "reward": 1.6979166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4148, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 65.36458444595337, "completions/min_length": 23.5, "epoch": 8.2482005460412, "grad_norm": 0.0058954658707857325, "kl": 0.074554443359375, "learning_rate": 7.528267950598244e-08, "loss": 7.45235156500712e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4149, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 71.08333539962769, "completions/min_length": 26.25, "epoch": 8.250186150409531, "grad_norm": 0.004067599576072198, "kl": 0.069915771484375, "learning_rate": 7.511628493955591e-08, "loss": 6.992083945078775e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4150, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 66.27083539962769, "completions/min_length": 22.75, "epoch": 8.25217175477786, "grad_norm": 0.002812674172602009, "kl": 0.059722900390625, "learning_rate": 7.495005952841182e-08, "loss": 5.9765385231003165e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4151, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 64.71875190734863, "completions/min_length": 17.75, "epoch": 8.25415735914619, "grad_norm": 2.132789467048576, "kl": 0.078033447265625, "learning_rate": 7.478400333872775e-08, "loss": 0.00574676226824522, "memory(GiB)": 94.21, "reward": 1.5520833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.5520833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4152, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 63.208335876464844, "completions/min_length": 19.625, "epoch": 8.25614296351452, "grad_norm": 0.005439234240909005, "kl": 0.0738525390625, "learning_rate": 7.461811643661447e-08, "loss": 7.384568016277626e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4153, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.125, "completions/mean_length": 68.11458492279053, "completions/min_length": 29.0, "epoch": 8.25812856788285, "grad_norm": 0.005615012995516823, "kl": 0.069091796875, "learning_rate": 7.445239888811489e-08, "loss": 6.908044451847672e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4154, "train_speed(iter/s)": 0.022646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.5, "completions/mean_length": 69.80208587646484, "completions/min_length": 26.375, "epoch": 8.260114172251178, "grad_norm": 0.00862034448060035, "kl": 0.0672607421875, "learning_rate": 7.428685075920488e-08, "loss": 6.722434773109853e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4155, "train_speed(iter/s)": 0.022645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.5, "completions/mean_length": 72.09375286102295, "completions/min_length": 21.125, "epoch": 8.26209977661951, "grad_norm": 0.003062503697419942, "kl": 0.061309814453125, "learning_rate": 7.412147211579267e-08, "loss": 6.11907453276217e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4156, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.75, "completions/mean_length": 71.70833539962769, "completions/min_length": 26.5, "epoch": 8.264085380987838, "grad_norm": 0.002940944783097714, "kl": 0.075286865234375, "learning_rate": 7.395626302371866e-08, "loss": 7.530323637183756e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4157, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.375, "completions/mean_length": 69.98958587646484, "completions/min_length": 22.5, "epoch": 8.266070985356167, "grad_norm": 0.004050780055793523, "kl": 0.05810546875, "learning_rate": 7.379122354875672e-08, "loss": 5.808388232253492e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4158, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 71.44791889190674, "completions/min_length": 21.625, "epoch": 8.268056589724498, "grad_norm": 0.0041788549885734315, "kl": 0.061431884765625, "learning_rate": 7.362635375661224e-08, "loss": 6.149184628156945e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4159, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.125, "completions/mean_length": 72.68750095367432, "completions/min_length": 24.375, "epoch": 8.270042194092827, "grad_norm": 0.0035975429601007762, "kl": 0.071258544921875, "learning_rate": 7.346165371292334e-08, "loss": 7.12537657818757e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4160, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 60.69791841506958, "completions/min_length": 19.625, "epoch": 8.272027798461156, "grad_norm": 0.003689711535103805, "kl": 0.067352294921875, "learning_rate": 7.329712348326089e-08, "loss": 6.73617105348967e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4161, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.875, "completions/mean_length": 69.48958444595337, "completions/min_length": 19.5, "epoch": 8.274013402829485, "grad_norm": 0.013905783244096769, "kl": 0.080718994140625, "learning_rate": 7.313276313312761e-08, "loss": 8.067675662459806e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4162, "train_speed(iter/s)": 0.022644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.375, "completions/mean_length": 76.47916889190674, "completions/min_length": 26.375, "epoch": 8.275999007197816, "grad_norm": 0.0037742084416650286, "kl": 0.057647705078125, "learning_rate": 7.296857272795914e-08, "loss": 5.7666573411552235e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4163, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.625, "completions/mean_length": 61.10416841506958, "completions/min_length": 26.125, "epoch": 8.277984611566145, "grad_norm": 0.0031587650698535743, "kl": 0.05743408203125, "learning_rate": 7.280455233312294e-08, "loss": 5.738392792409286e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4164, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.875, "completions/mean_length": 63.73958492279053, "completions/min_length": 16.75, "epoch": 8.279970215934474, "grad_norm": 0.008640423519536786, "kl": 0.073883056640625, "learning_rate": 7.264070201391908e-08, "loss": 7.377046858891845e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4165, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 65.20833587646484, "completions/min_length": 24.0, "epoch": 8.281955820302805, "grad_norm": 0.005045420398543357, "kl": 0.0760498046875, "learning_rate": 7.247702183558007e-08, "loss": 7.604577695019543e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4166, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 69.895836353302, "completions/min_length": 23.0, "epoch": 8.283941424671134, "grad_norm": 0.0047622319186527845, "kl": 0.066497802734375, "learning_rate": 7.231351186327029e-08, "loss": 6.651488365605474e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4167, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 71.20833587646484, "completions/min_length": 26.75, "epoch": 8.285927029039463, "grad_norm": 0.0036402493787030593, "kl": 0.073089599609375, "learning_rate": 7.215017216208663e-08, "loss": 7.308584463316947e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4168, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.125, "completions/mean_length": 67.30208539962769, "completions/min_length": 19.875, "epoch": 8.287912633407794, "grad_norm": 0.002346681151183932, "kl": 0.0552978515625, "learning_rate": 7.198700279705816e-08, "loss": 5.5294203775702044e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4169, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 72.56250286102295, "completions/min_length": 27.375, "epoch": 8.289898237776123, "grad_norm": 0.0024313301656414834, "kl": 0.064056396484375, "learning_rate": 7.18240038331459e-08, "loss": 6.406335887731984e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4170, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.625, "completions/mean_length": 77.44791984558105, "completions/min_length": 26.5, "epoch": 8.291883842144452, "grad_norm": 0.0033700621789313095, "kl": 0.072906494140625, "learning_rate": 7.166117533524335e-08, "loss": 7.281712896656245e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4171, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.0, "completions/mean_length": 73.81250095367432, "completions/min_length": 23.125, "epoch": 8.293869446512783, "grad_norm": 0.004418134707223982, "kl": 0.06329345703125, "learning_rate": 7.149851736817608e-08, "loss": 6.32929295534268e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4172, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 61.395835876464844, "completions/min_length": 21.625, "epoch": 8.295855050881112, "grad_norm": 0.007909765588515852, "kl": 0.0648193359375, "learning_rate": 7.133602999670152e-08, "loss": 6.484874757006764e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4173, "train_speed(iter/s)": 0.022643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 71.16666889190674, "completions/min_length": 24.125, "epoch": 8.297840655249441, "grad_norm": 0.0032113258236920804, "kl": 0.062652587890625, "learning_rate": 7.117371328550958e-08, "loss": 6.254656909732148e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4174, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 69.09375238418579, "completions/min_length": 22.5, "epoch": 8.29982625961777, "grad_norm": 0.04362664365925082, "kl": 0.146697998046875, "learning_rate": 7.10115672992218e-08, "loss": 0.00014679976447951049, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4175, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.125, "completions/mean_length": 74.73958683013916, "completions/min_length": 26.5, "epoch": 8.301811863986101, "grad_norm": 0.003522197880223704, "kl": 0.0709228515625, "learning_rate": 7.084959210239217e-08, "loss": 7.094758620951325e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4176, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.375, "completions/mean_length": 60.187500953674316, "completions/min_length": 22.875, "epoch": 8.30379746835443, "grad_norm": 0.0075772484225363835, "kl": 0.063812255859375, "learning_rate": 7.068778775950635e-08, "loss": 6.386919994838536e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4177, "train_speed(iter/s)": 0.022642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.625, "completions/mean_length": 72.52083587646484, "completions/min_length": 27.625, "epoch": 8.30578307272276, "grad_norm": 0.0038363602636287708, "kl": 0.063751220703125, "learning_rate": 7.052615433498194e-08, "loss": 6.378068792400882e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4178, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.75, "completions/mean_length": 83.78125190734863, "completions/min_length": 23.375, "epoch": 8.30776867709109, "grad_norm": 0.0028843949095304793, "kl": 0.0670166015625, "learning_rate": 7.036469189316902e-08, "loss": 6.692897295579314e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4179, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.625, "completions/mean_length": 56.927085876464844, "completions/min_length": 21.75, "epoch": 8.30975428145942, "grad_norm": 0.0035091268879498387, "kl": 0.0538330078125, "learning_rate": 7.020340049834905e-08, "loss": 5.382742892834358e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4180, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 66.25000143051147, "completions/min_length": 20.25, "epoch": 8.311739885827748, "grad_norm": 0.00444426978875113, "kl": 0.0625, "learning_rate": 7.004228021473551e-08, "loss": 6.246841803658754e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4181, "train_speed(iter/s)": 0.022641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.5, "completions/mean_length": 71.40625190734863, "completions/min_length": 27.5, "epoch": 8.313725490196079, "grad_norm": 0.0027014348671752595, "kl": 0.06146240234375, "learning_rate": 6.988133110647399e-08, "loss": 6.152319838292897e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4182, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 76.14583492279053, "completions/min_length": 27.0, "epoch": 8.315711094564408, "grad_norm": 1.0067082371838725, "kl": 0.062164306640625, "learning_rate": 6.972055323764154e-08, "loss": 0.004236373584717512, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.3624799847602844, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4183, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.625, "completions/mean_length": 82.458336353302, "completions/min_length": 30.375, "epoch": 8.317696698932737, "grad_norm": 0.0067966967303409774, "kl": 0.0849609375, "learning_rate": 6.955994667224758e-08, "loss": 8.506099402438849e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4184, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 68.85416841506958, "completions/min_length": 21.875, "epoch": 8.319682303301068, "grad_norm": 0.0024507451794612708, "kl": 0.05487060546875, "learning_rate": 6.939951147423268e-08, "loss": 5.4899119277251884e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4185, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.125, "completions/mean_length": 72.22916793823242, "completions/min_length": 26.75, "epoch": 8.321667907669397, "grad_norm": 0.0030961854293762737, "kl": 0.067657470703125, "learning_rate": 6.923924770746964e-08, "loss": 6.76825875416398e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4186, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.375, "completions/mean_length": 80.26041984558105, "completions/min_length": 21.5, "epoch": 8.323653512037726, "grad_norm": 0.0023012681409521223, "kl": 0.056182861328125, "learning_rate": 6.907915543576309e-08, "loss": 5.6165015848819166e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4187, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 66.06250143051147, "completions/min_length": 20.125, "epoch": 8.325639116406055, "grad_norm": 0.0031776690081823794, "kl": 0.0855712890625, "learning_rate": 6.8919234722849e-08, "loss": 8.568624616600573e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4188, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 72.53125190734863, "completions/min_length": 26.875, "epoch": 8.327624720774386, "grad_norm": 0.002911179027491114, "kl": 0.05902099609375, "learning_rate": 6.875948563239514e-08, "loss": 5.8942554460372776e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4189, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 63.38541793823242, "completions/min_length": 20.75, "epoch": 8.329610325142715, "grad_norm": 0.003549150843362108, "kl": 0.06512451171875, "learning_rate": 6.859990822800121e-08, "loss": 6.507930083898827e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4190, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.875, "completions/mean_length": 70.25000238418579, "completions/min_length": 21.125, "epoch": 8.331595929511044, "grad_norm": 0.003814766423729236, "kl": 0.07135009765625, "learning_rate": 6.844050257319822e-08, "loss": 7.128540892153978e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4191, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.375, "completions/mean_length": 70.88541889190674, "completions/min_length": 24.625, "epoch": 8.333581533879375, "grad_norm": 0.0029312588171825104, "kl": 0.071319580078125, "learning_rate": 6.828126873144908e-08, "loss": 7.132625614758581e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4192, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.125, "completions/mean_length": 61.3541693687439, "completions/min_length": 23.125, "epoch": 8.335567138247704, "grad_norm": 0.006832784007863792, "kl": 0.0645751953125, "learning_rate": 6.812220676614822e-08, "loss": 6.461890006903559e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4193, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.625, "completions/mean_length": 67.75000286102295, "completions/min_length": 16.375, "epoch": 8.337552742616033, "grad_norm": 0.003571284680253693, "kl": 0.066802978515625, "learning_rate": 6.796331674062145e-08, "loss": 6.683095853077248e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4194, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 86.34375286102295, "completions/min_length": 35.5, "epoch": 8.339538346984364, "grad_norm": 0.003652215954812687, "kl": 0.0672607421875, "learning_rate": 6.78045987181265e-08, "loss": 6.717565702274442e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4195, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.125, "completions/mean_length": 71.47916889190674, "completions/min_length": 28.625, "epoch": 8.341523951352693, "grad_norm": 0.0028766820526552237, "kl": 0.068206787109375, "learning_rate": 6.764605276185226e-08, "loss": 6.815900997025892e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4196, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.625, "completions/mean_length": 64.84375190734863, "completions/min_length": 23.375, "epoch": 8.343509555721022, "grad_norm": 0.006820664564001713, "kl": 0.075592041015625, "learning_rate": 6.748767893491919e-08, "loss": 7.563058898085728e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4197, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 72.18750095367432, "completions/min_length": 23.25, "epoch": 8.345495160089353, "grad_norm": 1.6536671859206011, "kl": 0.082611083984375, "learning_rate": 6.732947730037935e-08, "loss": -0.007601022720336914, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4198, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 62.86458492279053, "completions/min_length": 19.75, "epoch": 8.347480764457682, "grad_norm": 0.0038511280632069367, "kl": 0.075103759765625, "learning_rate": 6.717144792121638e-08, "loss": 7.513690798077732e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4199, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 77.86458587646484, "completions/min_length": 32.125, "epoch": 8.349466368826011, "grad_norm": 1.5334265370103473, "kl": 0.0750732421875, "learning_rate": 6.701359086034487e-08, "loss": 0.01664251834154129, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2573831044137478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4200, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 78.520836353302, "completions/min_length": 29.25, "epoch": 8.35145197319434, "grad_norm": 0.5257373404217704, "kl": 0.244232177734375, "learning_rate": 6.685590618061132e-08, "loss": -0.020722713321447372, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4201, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 64.5416693687439, "completions/min_length": 22.0, "epoch": 8.353437577562671, "grad_norm": 0.004414779735111849, "kl": 0.0689697265625, "learning_rate": 6.669839394479315e-08, "loss": 6.89075532136485e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4202, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.0, "completions/mean_length": 71.01041841506958, "completions/min_length": 21.875, "epoch": 8.355423181931, "grad_norm": 0.002537753573189819, "kl": 0.07489013671875, "learning_rate": 6.654105421559958e-08, "loss": 7.493824523407966e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4203, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.5, "completions/mean_length": 72.63541889190674, "completions/min_length": 24.875, "epoch": 8.35740878629933, "grad_norm": 0.006485065913781162, "kl": 0.072998046875, "learning_rate": 6.638388705567067e-08, "loss": 7.296283729374409e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4204, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.875, "completions/mean_length": 61.01041793823242, "completions/min_length": 22.625, "epoch": 8.35939439066766, "grad_norm": 0.0027873423026066172, "kl": 0.0648193359375, "learning_rate": 6.622689252757813e-08, "loss": 6.491924432339147e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4205, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 62.687501430511475, "completions/min_length": 26.875, "epoch": 8.361379995035989, "grad_norm": 0.0035625609199343247, "kl": 0.06781005859375, "learning_rate": 6.607007069382497e-08, "loss": 6.777978705940768e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4206, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 68.802086353302, "completions/min_length": 25.0, "epoch": 8.363365599404318, "grad_norm": 0.002636273941322249, "kl": 0.052398681640625, "learning_rate": 6.59134216168451e-08, "loss": 5.239554957370274e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4207, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.25, "completions/mean_length": 66.77083492279053, "completions/min_length": 21.125, "epoch": 8.365351203772649, "grad_norm": 0.003237833987991112, "kl": 0.069549560546875, "learning_rate": 6.575694535900411e-08, "loss": 6.955623393878341e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4208, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.125, "completions/mean_length": 68.83333539962769, "completions/min_length": 29.875, "epoch": 8.367336808140978, "grad_norm": 0.0030580252218723194, "kl": 0.06549072265625, "learning_rate": 6.560064198259835e-08, "loss": 6.559217581525445e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4209, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 85.88541984558105, "completions/min_length": 34.875, "epoch": 8.369322412509307, "grad_norm": 0.00428882858298063, "kl": 0.06048583984375, "learning_rate": 6.544451154985548e-08, "loss": 6.042775567038916e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4210, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.625, "completions/mean_length": 68.27083539962769, "completions/min_length": 26.375, "epoch": 8.371308016877638, "grad_norm": 0.007642428985250278, "kl": 0.08489990234375, "learning_rate": 6.528855412293449e-08, "loss": 8.489205356454477e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4211, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 67.23958539962769, "completions/min_length": 27.875, "epoch": 8.373293621245967, "grad_norm": 0.004110090739025641, "kl": 0.053070068359375, "learning_rate": 6.513276976392529e-08, "loss": 5.307815081323497e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4212, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 71.66666889190674, "completions/min_length": 23.125, "epoch": 8.375279225614296, "grad_norm": 0.003948887329439252, "kl": 0.069793701171875, "learning_rate": 6.497715853484898e-08, "loss": 6.986410880926996e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4213, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 84.72916889190674, "completions/min_length": 33.125, "epoch": 8.377264829982625, "grad_norm": 0.0036518461866186305, "kl": 0.07989501953125, "learning_rate": 6.482172049765782e-08, "loss": 7.978919893503189e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4214, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 70.48958587646484, "completions/min_length": 22.0, "epoch": 8.379250434350956, "grad_norm": 0.004215292981769877, "kl": 0.065887451171875, "learning_rate": 6.466645571423484e-08, "loss": 6.589628901565447e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4215, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 70.66666889190674, "completions/min_length": 20.375, "epoch": 8.381236038719285, "grad_norm": 0.005037587241198357, "kl": 0.079742431640625, "learning_rate": 6.451136424639447e-08, "loss": 7.974650361575186e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4216, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.375, "completions/mean_length": 62.60416889190674, "completions/min_length": 21.625, "epoch": 8.383221643087614, "grad_norm": 0.002494850930944186, "kl": 0.0552978515625, "learning_rate": 6.435644615588176e-08, "loss": 5.533179501071572e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4217, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 67.95833492279053, "completions/min_length": 19.625, "epoch": 8.385207247455945, "grad_norm": 0.0029556599577618987, "kl": 0.057647705078125, "learning_rate": 6.420170150437292e-08, "loss": 5.755452366429381e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4218, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.625, "completions/mean_length": 70.85416841506958, "completions/min_length": 20.125, "epoch": 8.387192851824274, "grad_norm": 0.003131981637420512, "kl": 0.06744384765625, "learning_rate": 6.40471303534751e-08, "loss": 6.743191624991596e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4219, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 76.28125333786011, "completions/min_length": 22.125, "epoch": 8.389178456192603, "grad_norm": 0.0030258545100966144, "kl": 0.081634521484375, "learning_rate": 6.389273276472657e-08, "loss": 8.156164403771982e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4220, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 67.17708492279053, "completions/min_length": 22.5, "epoch": 8.391164060560934, "grad_norm": 1.1906277765303357, "kl": 0.07061767578125, "learning_rate": 6.373850879959602e-08, "loss": 0.008760648779571056, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4221, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.875, "completions/mean_length": 74.92708683013916, "completions/min_length": 30.0, "epoch": 8.393149664929263, "grad_norm": 0.004700337332481203, "kl": 0.079437255859375, "learning_rate": 6.358445851948358e-08, "loss": 7.939561328385025e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4222, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 73.81250143051147, "completions/min_length": 21.0, "epoch": 8.395135269297592, "grad_norm": 0.003181765384117037, "kl": 0.073760986328125, "learning_rate": 6.343058198571966e-08, "loss": 7.376407302217558e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4223, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.125, "completions/mean_length": 65.13541841506958, "completions/min_length": 26.5, "epoch": 8.397120873665923, "grad_norm": 0.002593709281589465, "kl": 0.053741455078125, "learning_rate": 6.327687925956616e-08, "loss": 5.3710275096818805e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4224, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 71.37500095367432, "completions/min_length": 27.75, "epoch": 8.399106478034252, "grad_norm": 0.0029549552511358066, "kl": 0.061309814453125, "learning_rate": 6.312335040221512e-08, "loss": 6.12829317105934e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4225, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 69.93750238418579, "completions/min_length": 34.625, "epoch": 8.401092082402581, "grad_norm": 0.0040503309644984555, "kl": 0.089111328125, "learning_rate": 6.29699954747896e-08, "loss": 8.924187568482012e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4226, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.75, "completions/mean_length": 72.50000286102295, "completions/min_length": 22.75, "epoch": 8.40307768677091, "grad_norm": 0.002764478413730987, "kl": 0.058563232421875, "learning_rate": 6.28168145383438e-08, "loss": 5.852927279192954e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4227, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 66.65625095367432, "completions/min_length": 22.0, "epoch": 8.405063291139241, "grad_norm": 0.7286812119471732, "kl": 0.061614990234375, "learning_rate": 6.26638076538622e-08, "loss": 0.007325816433876753, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4228, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.625, "completions/mean_length": 63.12500190734863, "completions/min_length": 24.25, "epoch": 8.40704889550757, "grad_norm": 0.0031088792543383084, "kl": 0.073577880859375, "learning_rate": 6.251097488225993e-08, "loss": 7.355278648901731e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4229, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 67.10416889190674, "completions/min_length": 32.125, "epoch": 8.409034499875899, "grad_norm": 1.5544014210439892, "kl": 0.07586669921875, "learning_rate": 6.23583162843832e-08, "loss": 0.010579612106084824, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4230, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.625, "completions/mean_length": 58.687501430511475, "completions/min_length": 21.875, "epoch": 8.41102010424423, "grad_norm": 0.7775305181757273, "kl": 0.09600830078125, "learning_rate": 6.220583192100848e-08, "loss": -0.007324880920350552, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4231, "train_speed(iter/s)": 0.02264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 66.28125190734863, "completions/min_length": 26.875, "epoch": 8.413005708612559, "grad_norm": 0.0030960546925664955, "kl": 0.056488037109375, "learning_rate": 6.205352185284319e-08, "loss": 5.657611109199934e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4232, "train_speed(iter/s)": 0.022639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.875, "completions/mean_length": 72.77083492279053, "completions/min_length": 22.625, "epoch": 8.414991312980888, "grad_norm": 0.0027934893166460246, "kl": 0.063079833984375, "learning_rate": 6.190138614052515e-08, "loss": 6.310495518846437e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4233, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 72.63542032241821, "completions/min_length": 28.5, "epoch": 8.416976917349219, "grad_norm": 0.004146431643709181, "kl": 0.072967529296875, "learning_rate": 6.174942484462282e-08, "loss": 7.292776717804372e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4234, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 69.58333587646484, "completions/min_length": 24.75, "epoch": 8.418962521717548, "grad_norm": 1.6997985722253017, "kl": 0.073577880859375, "learning_rate": 6.159763802563534e-08, "loss": -0.0006705643609166145, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4235, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.375, "completions/mean_length": 64.72916841506958, "completions/min_length": 27.875, "epoch": 8.420948126085877, "grad_norm": 0.0039164728072086956, "kl": 0.06524658203125, "learning_rate": 6.144602574399227e-08, "loss": 6.52932794764638e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4236, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 82.56250286102295, "completions/min_length": 35.125, "epoch": 8.422933730454208, "grad_norm": 0.003622016252429963, "kl": 0.085418701171875, "learning_rate": 6.129458806005349e-08, "loss": 8.547968172933906e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4237, "train_speed(iter/s)": 0.022638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.625, "completions/mean_length": 81.63541793823242, "completions/min_length": 31.875, "epoch": 8.424919334822537, "grad_norm": 0.003517787247546516, "kl": 0.088775634765625, "learning_rate": 6.11433250341099e-08, "loss": 8.87192363734357e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4238, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.625, "completions/mean_length": 76.4166693687439, "completions/min_length": 25.75, "epoch": 8.426904939190866, "grad_norm": 0.005314102891701261, "kl": 0.065643310546875, "learning_rate": 6.099223672638227e-08, "loss": 6.562695489265025e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4239, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.625, "completions/mean_length": 76.91666841506958, "completions/min_length": 27.0, "epoch": 8.428890543559195, "grad_norm": 0.0025162435901807533, "kl": 0.06201171875, "learning_rate": 6.084132319702212e-08, "loss": 6.199840572662652e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4240, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 66.59375143051147, "completions/min_length": 22.625, "epoch": 8.430876147927526, "grad_norm": 0.0037691266586001877, "kl": 0.054931640625, "learning_rate": 6.069058450611158e-08, "loss": 5.492793206940405e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4241, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 67.23958492279053, "completions/min_length": 20.75, "epoch": 8.432861752295855, "grad_norm": 0.003074204847039791, "kl": 0.075714111328125, "learning_rate": 6.054002071366265e-08, "loss": 7.568797445856035e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4242, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.625, "completions/mean_length": 65.73958539962769, "completions/min_length": 27.25, "epoch": 8.434847356664184, "grad_norm": 0.0027713608369422664, "kl": 0.056732177734375, "learning_rate": 6.038963187961826e-08, "loss": 5.6736327678663656e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4243, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 74.48958587646484, "completions/min_length": 27.0, "epoch": 8.436832961032515, "grad_norm": 0.0027990209739054196, "kl": 0.07598876953125, "learning_rate": 6.023941806385114e-08, "loss": 7.607359293615445e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4244, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 80.41666841506958, "completions/min_length": 25.375, "epoch": 8.438818565400844, "grad_norm": 0.00470007314401911, "kl": 0.0718994140625, "learning_rate": 6.008937932616487e-08, "loss": 7.186474249465391e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4245, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 67.02083492279053, "completions/min_length": 22.375, "epoch": 8.440804169769173, "grad_norm": 0.0034211241649920285, "kl": 0.052581787109375, "learning_rate": 5.993951572629296e-08, "loss": 5.2560772019205615e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4246, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.375, "completions/mean_length": 65.58333539962769, "completions/min_length": 26.125, "epoch": 8.442789774137504, "grad_norm": 0.007679420822045764, "kl": 0.072967529296875, "learning_rate": 5.978982732389914e-08, "loss": 7.304772589122877e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4247, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 63.864586353302, "completions/min_length": 24.0, "epoch": 8.444775378505833, "grad_norm": 0.002941298141620421, "kl": 0.072021484375, "learning_rate": 5.96403141785779e-08, "loss": 7.194951467681676e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4248, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.75, "completions/mean_length": 62.260417461395264, "completions/min_length": 20.125, "epoch": 8.446760982874162, "grad_norm": 0.002982447261665682, "kl": 0.062408447265625, "learning_rate": 5.949097634985345e-08, "loss": 6.244637916097417e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4249, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/mean_length": 65.72916841506958, "completions/min_length": 24.5, "epoch": 8.448746587242493, "grad_norm": 0.004061167575328291, "kl": 0.0643310546875, "learning_rate": 5.9341813897180295e-08, "loss": 6.43485618638806e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4250, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 75.78125190734863, "completions/min_length": 30.75, "epoch": 8.450732191610822, "grad_norm": 0.0036886980081035923, "kl": 0.074249267578125, "learning_rate": 5.919282687994337e-08, "loss": 7.428896060446277e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4251, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 71.39583492279053, "completions/min_length": 26.75, "epoch": 8.45271779597915, "grad_norm": 0.003754215023226811, "kl": 0.06500244140625, "learning_rate": 5.9044015357457344e-08, "loss": 6.497999129351228e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4252, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.625, "completions/mean_length": 70.60416841506958, "completions/min_length": 28.375, "epoch": 8.45470340034748, "grad_norm": 0.004753403156328406, "kl": 0.061370849609375, "learning_rate": 5.8895379388967546e-08, "loss": 6.135796866146848e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4253, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.125, "completions/mean_length": 60.812500953674316, "completions/min_length": 25.75, "epoch": 8.45668900471581, "grad_norm": 0.0029484180583949303, "kl": 0.05322265625, "learning_rate": 5.874691903364887e-08, "loss": 5.318777039065026e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4254, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 63.802085161209106, "completions/min_length": 22.625, "epoch": 8.45867460908414, "grad_norm": 0.006974291887355687, "kl": 0.072845458984375, "learning_rate": 5.859863435060669e-08, "loss": 7.275898678926751e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4255, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.625, "completions/mean_length": 62.61458444595337, "completions/min_length": 22.0, "epoch": 8.460660213452469, "grad_norm": 0.004393716802451101, "kl": 0.06036376953125, "learning_rate": 5.845052539887635e-08, "loss": 6.040423613740131e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4256, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.625, "completions/mean_length": 64.4791693687439, "completions/min_length": 20.0, "epoch": 8.4626458178208, "grad_norm": 0.0028247826577669256, "kl": 0.064788818359375, "learning_rate": 5.8302592237423175e-08, "loss": 6.487766950158402e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4257, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 71.47917032241821, "completions/min_length": 28.5, "epoch": 8.464631422189129, "grad_norm": 0.003860351102665582, "kl": 0.065704345703125, "learning_rate": 5.8154834925142336e-08, "loss": 6.57498458167538e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4258, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 69.27083492279053, "completions/min_length": 30.875, "epoch": 8.466617026557458, "grad_norm": 0.004167788357369793, "kl": 0.075347900390625, "learning_rate": 5.800725352085945e-08, "loss": 7.528180140070617e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4259, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 74.39583539962769, "completions/min_length": 23.5, "epoch": 8.468602630925789, "grad_norm": 0.002724167928408849, "kl": 0.07220458984375, "learning_rate": 5.7859848083329554e-08, "loss": 7.223599823191762e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4260, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.125, "completions/mean_length": 65.72916841506958, "completions/min_length": 23.75, "epoch": 8.470588235294118, "grad_norm": 0.00408320712736982, "kl": 0.068389892578125, "learning_rate": 5.7712618671238e-08, "loss": 6.84212427586317e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4261, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 72.84375190734863, "completions/min_length": 26.375, "epoch": 8.472573839662447, "grad_norm": 0.004481179002260378, "kl": 0.058319091796875, "learning_rate": 5.756556534320012e-08, "loss": 5.8295088820159435e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4262, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 71.37500286102295, "completions/min_length": 29.375, "epoch": 8.474559444030778, "grad_norm": 0.0031078136993953763, "kl": 0.06671142578125, "learning_rate": 5.74186881577608e-08, "loss": 6.670726725133136e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4263, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 71.04166841506958, "completions/min_length": 24.5, "epoch": 8.476545048399107, "grad_norm": 0.002399091317865348, "kl": 0.0645751953125, "learning_rate": 5.72719871733951e-08, "loss": 6.458575080614537e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4264, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.0, "completions/mean_length": 69.63541889190674, "completions/min_length": 26.0, "epoch": 8.478530652767436, "grad_norm": 0.0030160972524807513, "kl": 0.054412841796875, "learning_rate": 5.712546244850774e-08, "loss": 5.437999789137393e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.3916747123003006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4265, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.625, "completions/mean_length": 62.6979193687439, "completions/min_length": 19.875, "epoch": 8.480516257135765, "grad_norm": 0.0039402541023382314, "kl": 0.0728759765625, "learning_rate": 5.69791140414333e-08, "loss": 7.2953145718202e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4266, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.0, "completions/mean_length": 79.16666889190674, "completions/min_length": 29.375, "epoch": 8.482501861504096, "grad_norm": 0.0027388598511840965, "kl": 0.0777587890625, "learning_rate": 5.683294201043626e-08, "loss": 7.77281093178317e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4267, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 70.27083587646484, "completions/min_length": 22.625, "epoch": 8.484487465872425, "grad_norm": 0.0027472378620899854, "kl": 0.061431884765625, "learning_rate": 5.6686946413710915e-08, "loss": 6.136958836577833e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4268, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 78.81250190734863, "completions/min_length": 25.75, "epoch": 8.486473070240754, "grad_norm": 0.005557592653558524, "kl": 0.064544677734375, "learning_rate": 5.654112730938104e-08, "loss": 6.451752415159717e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4269, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.625, "completions/mean_length": 57.84375190734863, "completions/min_length": 18.75, "epoch": 8.488458674609085, "grad_norm": 0.004690608841158578, "kl": 0.059417724609375, "learning_rate": 5.639548475550055e-08, "loss": 5.9457765019033104e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4270, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.875, "completions/mean_length": 68.70833539962769, "completions/min_length": 27.625, "epoch": 8.490444278977414, "grad_norm": 0.005158530661617823, "kl": 0.080047607421875, "learning_rate": 5.62500188100527e-08, "loss": 7.993084727786481e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4271, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 64.50000238418579, "completions/min_length": 23.0, "epoch": 8.492429883345743, "grad_norm": 0.004765370413656595, "kl": 0.07568359375, "learning_rate": 5.6104729530950686e-08, "loss": 7.56825611460954e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4272, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.875, "completions/mean_length": 61.187500953674316, "completions/min_length": 28.0, "epoch": 8.494415487714074, "grad_norm": 0.0033980122903548007, "kl": 0.071319580078125, "learning_rate": 5.595961697603724e-08, "loss": 7.129830919438973e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4273, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 61.98958444595337, "completions/min_length": 23.125, "epoch": 8.496401092082403, "grad_norm": 0.005157100041246674, "kl": 0.066162109375, "learning_rate": 5.581468120308458e-08, "loss": 6.612966535612941e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4274, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 59.947916984558105, "completions/min_length": 20.5, "epoch": 8.498386696450732, "grad_norm": 1.202237066003347, "kl": 0.063873291015625, "learning_rate": 5.566992226979511e-08, "loss": 6.392722571035847e-05, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4275, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.0, "completions/mean_length": 71.96875286102295, "completions/min_length": 26.75, "epoch": 8.500372300819063, "grad_norm": 0.003245217642608059, "kl": 0.05828857421875, "learning_rate": 5.5525340233800236e-08, "loss": 5.830578083987348e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4276, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.0, "completions/mean_length": 63.72916889190674, "completions/min_length": 19.75, "epoch": 8.502357905187392, "grad_norm": 0.0025823708561869088, "kl": 0.068878173828125, "learning_rate": 5.5380935152661066e-08, "loss": 6.880475848447531e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4277, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.125, "completions/mean_length": 71.40625286102295, "completions/min_length": 23.375, "epoch": 8.50434350955572, "grad_norm": 0.0025413177228581544, "kl": 0.0621337890625, "learning_rate": 5.523670708386857e-08, "loss": 6.217691407073289e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4278, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 78.0416693687439, "completions/min_length": 28.25, "epoch": 8.50632911392405, "grad_norm": 0.010576234588147514, "kl": 0.102294921875, "learning_rate": 5.5092656084842816e-08, "loss": 0.0001023239383357577, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4279, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 72.44791793823242, "completions/min_length": 31.25, "epoch": 8.50831471829238, "grad_norm": 0.0030030364125630004, "kl": 0.06475830078125, "learning_rate": 5.4948782212933754e-08, "loss": 6.477470742538571e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4280, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.125, "completions/mean_length": 58.52083444595337, "completions/min_length": 25.0, "epoch": 8.51030032266071, "grad_norm": 0.002976566570308528, "kl": 0.0640411376953125, "learning_rate": 5.480508552542052e-08, "loss": 6.40340440440923e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4281, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.375, "completions/mean_length": 64.64583587646484, "completions/min_length": 24.375, "epoch": 8.512285927029039, "grad_norm": 2.855996228761785, "kl": 0.065338134765625, "learning_rate": 5.4661566079511834e-08, "loss": 0.018806789070367813, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4282, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 69.56250286102295, "completions/min_length": 28.5, "epoch": 8.51427153139737, "grad_norm": 0.002981304820025445, "kl": 0.060394287109375, "learning_rate": 5.451822393234601e-08, "loss": 6.039762229193002e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4283, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 67.2916693687439, "completions/min_length": 26.0, "epoch": 8.516257135765699, "grad_norm": 0.003040293098356286, "kl": 0.078399658203125, "learning_rate": 5.4375059140990386e-08, "loss": 7.839395402697846e-05, "memory(GiB)": 94.21, "reward": 1.625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.625, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4284, "train_speed(iter/s)": 0.022637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.875, "completions/mean_length": 63.4791693687439, "completions/min_length": 19.25, "epoch": 8.518242740134028, "grad_norm": 0.00347720028417795, "kl": 0.0587158203125, "learning_rate": 5.4232071762442154e-08, "loss": 5.863062688149512e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4285, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.25, "completions/mean_length": 66.22916841506958, "completions/min_length": 25.125, "epoch": 8.520228344502359, "grad_norm": 0.0034003435344987436, "kl": 0.070953369140625, "learning_rate": 5.408926185362756e-08, "loss": 7.095082401065156e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4286, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.625, "completions/mean_length": 79.79166793823242, "completions/min_length": 32.375, "epoch": 8.522213948870688, "grad_norm": 0.003123158970219177, "kl": 0.060943603515625, "learning_rate": 5.394662947140216e-08, "loss": 6.092020703363232e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4287, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 71.41666793823242, "completions/min_length": 32.0, "epoch": 8.524199553239017, "grad_norm": 0.003812726552957413, "kl": 0.07879638671875, "learning_rate": 5.3804174672550995e-08, "loss": 7.875763549236581e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4288, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 70.39583587646484, "completions/min_length": 22.875, "epoch": 8.526185157607348, "grad_norm": 0.003215315309765408, "kl": 0.05633544921875, "learning_rate": 5.366189751378858e-08, "loss": 5.6320404837606475e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4289, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 69.57291889190674, "completions/min_length": 28.125, "epoch": 8.528170761975677, "grad_norm": 0.00284477513539326, "kl": 0.071624755859375, "learning_rate": 5.351979805175816e-08, "loss": 7.154759805416688e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4290, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 69.30208587646484, "completions/min_length": 20.375, "epoch": 8.530156366344006, "grad_norm": 0.003522690712312541, "kl": 0.063812255859375, "learning_rate": 5.337787634303287e-08, "loss": 6.390751514118165e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4291, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.375, "completions/mean_length": 71.84375190734863, "completions/min_length": 28.5, "epoch": 8.532141970712335, "grad_norm": 0.0038711904303571753, "kl": 0.072418212890625, "learning_rate": 5.3236132444114565e-08, "loss": 7.230650226119906e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4292, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.625, "completions/mean_length": 71.90625190734863, "completions/min_length": 21.5, "epoch": 8.534127575080666, "grad_norm": 0.006730149456461537, "kl": 0.06573486328125, "learning_rate": 5.3094566411434674e-08, "loss": 6.571359699591994e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4293, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.625, "completions/mean_length": 75.21875238418579, "completions/min_length": 26.75, "epoch": 8.536113179448995, "grad_norm": 0.004244741918720068, "kl": 0.06988525390625, "learning_rate": 5.295317830135354e-08, "loss": 6.992699491092935e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4294, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 75.52083683013916, "completions/min_length": 34.125, "epoch": 8.538098783817324, "grad_norm": 0.0032200952559136263, "kl": 0.0736083984375, "learning_rate": 5.281196817016065e-08, "loss": 7.35716603230685e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4295, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.0, "completions/mean_length": 62.03125238418579, "completions/min_length": 22.5, "epoch": 8.540084388185655, "grad_norm": 0.005503542730934274, "kl": 0.061126708984375, "learning_rate": 5.267093607407514e-08, "loss": 6.12151634413749e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4296, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 69.94791793823242, "completions/min_length": 28.25, "epoch": 8.542069992553984, "grad_norm": 0.004664872690549267, "kl": 0.0621337890625, "learning_rate": 5.25300820692447e-08, "loss": 6.211151776369661e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4297, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 64.56250238418579, "completions/min_length": 26.625, "epoch": 8.544055596922313, "grad_norm": 0.0029709481370372307, "kl": 0.06231689453125, "learning_rate": 5.2389406211746204e-08, "loss": 6.231469888007268e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4298, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.25, "completions/mean_length": 81.16666984558105, "completions/min_length": 19.875, "epoch": 8.546041201290643, "grad_norm": 0.0028812112026739703, "kl": 0.08538818359375, "learning_rate": 5.224890855758596e-08, "loss": 8.541432180209085e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4299, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 64.86458492279053, "completions/min_length": 24.0, "epoch": 8.548026805658973, "grad_norm": 0.003408443506572342, "kl": 0.063995361328125, "learning_rate": 5.2108589162698835e-08, "loss": 6.398290861397982e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4300, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 73.26041841506958, "completions/min_length": 23.875, "epoch": 8.550012410027302, "grad_norm": 0.0031046325957754576, "kl": 0.058563232421875, "learning_rate": 5.196844808294926e-08, "loss": 5.855833296664059e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4301, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 68.50000238418579, "completions/min_length": 16.375, "epoch": 8.551998014395632, "grad_norm": 1.7719131384050684, "kl": 0.07171630859375, "learning_rate": 5.182848537413009e-08, "loss": -0.006205078214406967, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.2444935366511345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4302, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.75, "completions/mean_length": 54.97916841506958, "completions/min_length": 20.375, "epoch": 8.553983618763962, "grad_norm": 0.0054062772106716, "kl": 0.07305908203125, "learning_rate": 5.1688701091963606e-08, "loss": 7.297085539903492e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4303, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 66.94791889190674, "completions/min_length": 23.875, "epoch": 8.55596922313229, "grad_norm": 0.0028579897064800945, "kl": 0.062713623046875, "learning_rate": 5.1549095292101053e-08, "loss": 6.26999099040404e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4304, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 67.14583539962769, "completions/min_length": 21.5, "epoch": 8.55795482750062, "grad_norm": 1.0395606041340089, "kl": 0.0679931640625, "learning_rate": 5.1409668030122366e-08, "loss": 0.003949841484427452, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4305, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 63.812501430511475, "completions/min_length": 19.875, "epoch": 8.55994043186895, "grad_norm": 0.004336387656050706, "kl": 0.10955810546875, "learning_rate": 5.1270419361536366e-08, "loss": 0.00010955502511933446, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4306, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.125, "completions/mean_length": 63.94791889190674, "completions/min_length": 26.0, "epoch": 8.56192603623728, "grad_norm": 0.004381164084689803, "kl": 0.069122314453125, "learning_rate": 5.113134934178121e-08, "loss": 6.911862146807835e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4307, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.625, "completions/mean_length": 65.09375095367432, "completions/min_length": 22.125, "epoch": 8.563911640605609, "grad_norm": 0.002698544257996323, "kl": 0.061187744140625, "learning_rate": 5.099245802622332e-08, "loss": 6.119812314864248e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4308, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.375, "completions/mean_length": 61.58333492279053, "completions/min_length": 27.5, "epoch": 8.56589724497394, "grad_norm": 0.003842949749127718, "kl": 0.052703857421875, "learning_rate": 5.085374547015853e-08, "loss": 5.272622365737334e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4309, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 62.02083492279053, "completions/min_length": 27.5, "epoch": 8.567882849342269, "grad_norm": 0.0037307035861505818, "kl": 0.056610107421875, "learning_rate": 5.071521172881127e-08, "loss": 5.6617318477947265e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4310, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.875, "completions/mean_length": 69.40625238418579, "completions/min_length": 24.75, "epoch": 8.569868453710598, "grad_norm": 0.003585948129568317, "kl": 0.06494140625, "learning_rate": 5.057685685733465e-08, "loss": 6.493809632956982e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4311, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.375, "completions/mean_length": 65.12500143051147, "completions/min_length": 25.375, "epoch": 8.571854058078928, "grad_norm": 0.7021310653246982, "kl": 0.0634765625, "learning_rate": 5.0438680910810885e-08, "loss": -0.007124175317585468, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4312, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.375, "completions/mean_length": 78.69791889190674, "completions/min_length": 30.25, "epoch": 8.573839662447257, "grad_norm": 0.003846357900154206, "kl": 0.076080322265625, "learning_rate": 5.0300683944250634e-08, "loss": 7.605641440022737e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4313, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.5, "completions/mean_length": 74.645836353302, "completions/min_length": 29.5, "epoch": 8.575825266815587, "grad_norm": 0.00285625080560995, "kl": 0.06475830078125, "learning_rate": 5.016286601259334e-08, "loss": 6.47250417387113e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4314, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 67.40625286102295, "completions/min_length": 21.75, "epoch": 8.577810871183917, "grad_norm": 0.0031269006023052137, "kl": 0.062835693359375, "learning_rate": 5.002522717070751e-08, "loss": 6.28582711215131e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4315, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 68.90625238418579, "completions/min_length": 21.5, "epoch": 8.579796475552246, "grad_norm": 0.004004684717315213, "kl": 0.075958251953125, "learning_rate": 4.988776747338985e-08, "loss": 7.59678368922323e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4316, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 67.1354193687439, "completions/min_length": 22.25, "epoch": 8.581782079920576, "grad_norm": 0.0031676358037586554, "kl": 0.083221435546875, "learning_rate": 4.9750486975366156e-08, "loss": 8.315112791024148e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4317, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.5, "completions/mean_length": 60.51041889190674, "completions/min_length": 23.0, "epoch": 8.583767684288905, "grad_norm": 0.0032964343830204725, "kl": 0.067047119140625, "learning_rate": 4.9613385731290814e-08, "loss": 6.703739200020209e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4318, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.375, "completions/mean_length": 66.78125238418579, "completions/min_length": 20.375, "epoch": 8.585753288657235, "grad_norm": 0.005429413123891288, "kl": 0.06689453125, "learning_rate": 4.947646379574655e-08, "loss": 6.688515713904053e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4319, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.125, "completions/mean_length": 67.12500143051147, "completions/min_length": 17.375, "epoch": 8.587738893025564, "grad_norm": 0.003958134800928919, "kl": 0.07861328125, "learning_rate": 4.933972122324509e-08, "loss": 7.857757009333e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4320, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.75, "completions/mean_length": 82.52083539962769, "completions/min_length": 34.0, "epoch": 8.589724497393894, "grad_norm": 0.0025624410794586657, "kl": 0.067108154296875, "learning_rate": 4.920315806822639e-08, "loss": 6.712192407576367e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4321, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 73.54166889190674, "completions/min_length": 26.5, "epoch": 8.591710101762224, "grad_norm": 0.0023238310276203107, "kl": 0.067291259765625, "learning_rate": 4.9066774385059404e-08, "loss": 6.729447341058403e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4322, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.375, "completions/mean_length": 68.78125143051147, "completions/min_length": 22.0, "epoch": 8.593695706130553, "grad_norm": 0.003239949744782852, "kl": 0.068389892578125, "learning_rate": 4.893057022804109e-08, "loss": 6.837428372818977e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4323, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.875, "completions/mean_length": 62.79166793823242, "completions/min_length": 21.0, "epoch": 8.595681310498883, "grad_norm": 0.005528437410291154, "kl": 0.079925537109375, "learning_rate": 4.87945456513974e-08, "loss": 7.989796722540632e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4324, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.875, "completions/mean_length": 57.97916841506958, "completions/min_length": 17.875, "epoch": 8.597666914867213, "grad_norm": 0.00394475641244904, "kl": 0.0464019775390625, "learning_rate": 4.865870070928274e-08, "loss": 4.6373490476980805e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4325, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 64.36458539962769, "completions/min_length": 20.125, "epoch": 8.599652519235542, "grad_norm": 0.0029878306576865505, "kl": 0.056640625, "learning_rate": 4.852303545577974e-08, "loss": 5.660884198732674e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4326, "train_speed(iter/s)": 0.022636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 71.520836353302, "completions/min_length": 26.5, "epoch": 8.601638123603871, "grad_norm": 1.022228262628643, "kl": 0.093536376953125, "learning_rate": 4.838754994489952e-08, "loss": 0.01451178640127182, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4327, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.125, "completions/mean_length": 74.50000190734863, "completions/min_length": 27.0, "epoch": 8.603623727972202, "grad_norm": 0.003898653765595462, "kl": 0.066802978515625, "learning_rate": 4.8252244230581995e-08, "loss": 6.686054985038936e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4328, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.625, "completions/mean_length": 78.68750286102295, "completions/min_length": 24.875, "epoch": 8.605609332340531, "grad_norm": 0.0042089250653228666, "kl": 0.069244384765625, "learning_rate": 4.811711836669507e-08, "loss": 6.914118421263993e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4329, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 70.02083444595337, "completions/min_length": 22.375, "epoch": 8.60759493670886, "grad_norm": 0.0026959948443466596, "kl": 0.069061279296875, "learning_rate": 4.798217240703534e-08, "loss": 6.907604984007776e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4330, "train_speed(iter/s)": 0.022635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 69.10416793823242, "completions/min_length": 27.25, "epoch": 8.60958054107719, "grad_norm": 0.003001994985852151, "kl": 0.059478759765625, "learning_rate": 4.7847406405327694e-08, "loss": 5.946995224803686e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4331, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.375, "completions/mean_length": 59.44791841506958, "completions/min_length": 21.25, "epoch": 8.61156614544552, "grad_norm": 0.002710076595829114, "kl": 0.0653533935546875, "learning_rate": 4.7712820415225286e-08, "loss": 6.538275192724541e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4332, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.75, "completions/mean_length": 76.47916984558105, "completions/min_length": 23.5, "epoch": 8.61355174981385, "grad_norm": 0.003148918017560327, "kl": 0.06231689453125, "learning_rate": 4.757841449030975e-08, "loss": 6.237607885850593e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4333, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 71.03125143051147, "completions/min_length": 21.875, "epoch": 8.615537354182178, "grad_norm": 0.02866596850944244, "kl": 0.099945068359375, "learning_rate": 4.744418868409089e-08, "loss": 9.997825691243634e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4334, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.375, "completions/mean_length": 78.16666841506958, "completions/min_length": 29.375, "epoch": 8.61752295855051, "grad_norm": 0.004532200988931007, "kl": 0.090301513671875, "learning_rate": 4.7310143050006854e-08, "loss": 9.02952961041592e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4335, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.125, "completions/mean_length": 68.63541841506958, "completions/min_length": 25.125, "epoch": 8.619508562918838, "grad_norm": 0.002568692727568496, "kl": 0.0552978515625, "learning_rate": 4.7176277641424015e-08, "loss": 5.525143205886707e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4336, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.375, "completions/mean_length": 61.781251430511475, "completions/min_length": 30.125, "epoch": 8.621494167287167, "grad_norm": 0.006796090742259831, "kl": 0.06402587890625, "learning_rate": 4.704259251163728e-08, "loss": 6.401139398803934e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4337, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.25, "completions/mean_length": 71.70833587646484, "completions/min_length": 24.75, "epoch": 8.623479771655498, "grad_norm": 1.0790322340250689, "kl": 0.0604248046875, "learning_rate": 4.6909087713869314e-08, "loss": -0.0013025652151554823, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4338, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 65.87500190734863, "completions/min_length": 17.5, "epoch": 8.625465376023827, "grad_norm": 0.003158150752199311, "kl": 0.062652587890625, "learning_rate": 4.6775763301271423e-08, "loss": 6.260805093916133e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4339, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 70.4166693687439, "completions/min_length": 26.375, "epoch": 8.627450980392156, "grad_norm": 0.0038178650820808453, "kl": 0.07220458984375, "learning_rate": 4.6642619326922706e-08, "loss": 7.214218931039795e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4340, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 68.91666889190674, "completions/min_length": 21.25, "epoch": 8.629436584760487, "grad_norm": 0.0030149792009534957, "kl": 0.06640625, "learning_rate": 4.650965584383082e-08, "loss": 6.636662146775052e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4341, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 78.81250286102295, "completions/min_length": 28.875, "epoch": 8.631422189128816, "grad_norm": 0.0028463456571123126, "kl": 0.063232421875, "learning_rate": 4.6376872904931307e-08, "loss": 6.317744555417448e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4342, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.875, "completions/mean_length": 70.45833683013916, "completions/min_length": 27.125, "epoch": 8.633407793497145, "grad_norm": 0.007217393926369768, "kl": 0.072662353515625, "learning_rate": 4.6244270563087605e-08, "loss": 7.258918776642531e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4343, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.875, "completions/mean_length": 71.9791693687439, "completions/min_length": 28.75, "epoch": 8.635393397865474, "grad_norm": 0.0030741068151509065, "kl": 0.069244384765625, "learning_rate": 4.611184887109204e-08, "loss": 6.927357026143e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4344, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 62.07291793823242, "completions/min_length": 18.125, "epoch": 8.637379002233805, "grad_norm": 0.006693699779112772, "kl": 0.0615234375, "learning_rate": 4.5979607881664216e-08, "loss": 6.15669705439359e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4345, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 66.29166841506958, "completions/min_length": 23.125, "epoch": 8.639364606602134, "grad_norm": 0.0034996730766743527, "kl": 0.068206787109375, "learning_rate": 4.584754764745208e-08, "loss": 6.822988507337868e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4346, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 68.25000143051147, "completions/min_length": 25.75, "epoch": 8.641350210970463, "grad_norm": 0.0050779400643106995, "kl": 0.073822021484375, "learning_rate": 4.571566822103179e-08, "loss": 7.377025031019002e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4347, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.125, "completions/mean_length": 66.47916889190674, "completions/min_length": 23.625, "epoch": 8.643335815338794, "grad_norm": 0.0028356437793893314, "kl": 0.05548095703125, "learning_rate": 4.558396965490713e-08, "loss": 5.548873741645366e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4348, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 73.82291841506958, "completions/min_length": 23.875, "epoch": 8.645321419707123, "grad_norm": 0.002857309706917698, "kl": 0.06439208984375, "learning_rate": 4.545245200151038e-08, "loss": 6.439985008910298e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4349, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.375, "completions/mean_length": 76.80208539962769, "completions/min_length": 30.375, "epoch": 8.647307024075452, "grad_norm": 0.0034409392505946284, "kl": 0.0726318359375, "learning_rate": 4.532111531320132e-08, "loss": 7.249845657497644e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4350, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.375, "completions/mean_length": 63.01041793823242, "completions/min_length": 21.0, "epoch": 8.649292628443783, "grad_norm": 0.9981713138931096, "kl": 0.06121826171875, "learning_rate": 4.518995964226796e-08, "loss": 0.0010815877467393875, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166669771075, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4351, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 64.89583492279053, "completions/min_length": 21.375, "epoch": 8.651278232812112, "grad_norm": 0.0025715259489696835, "kl": 0.063018798828125, "learning_rate": 4.5058985040926255e-08, "loss": 6.299058441072702e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4352, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 63.052086353302, "completions/min_length": 25.5, "epoch": 8.653263837180441, "grad_norm": 0.004046280471654746, "kl": 0.078399658203125, "learning_rate": 4.492819156131994e-08, "loss": 7.83963332651183e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4353, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.625, "completions/mean_length": 65.76041889190674, "completions/min_length": 25.0, "epoch": 8.655249441548772, "grad_norm": 0.002532960040820793, "kl": 0.066070556640625, "learning_rate": 4.479757925552058e-08, "loss": 6.610581476707011e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4354, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.25, "completions/mean_length": 70.32291841506958, "completions/min_length": 22.375, "epoch": 8.657235045917101, "grad_norm": 0.0026719322266955986, "kl": 0.077911376953125, "learning_rate": 4.466714817552791e-08, "loss": 7.784062472637743e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4355, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.125, "completions/mean_length": 67.37500143051147, "completions/min_length": 25.125, "epoch": 8.65922065028543, "grad_norm": 0.005673084626474441, "kl": 0.064605712890625, "learning_rate": 4.453689837326918e-08, "loss": 6.464245961979032e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4356, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 60.48958468437195, "completions/min_length": 22.25, "epoch": 8.66120625465376, "grad_norm": 0.007509430135032619, "kl": 0.067657470703125, "learning_rate": 4.440682990059963e-08, "loss": 6.757605297025293e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4357, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 72.31250143051147, "completions/min_length": 21.5, "epoch": 8.66319185902209, "grad_norm": 0.002362393488109078, "kl": 0.05548095703125, "learning_rate": 4.427694280930244e-08, "loss": 5.552250877371989e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4358, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 70.27083683013916, "completions/min_length": 25.375, "epoch": 8.66517746339042, "grad_norm": 0.006079095180469086, "kl": 0.06689453125, "learning_rate": 4.41472371510882e-08, "loss": 6.685058178845793e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4359, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 70.2604193687439, "completions/min_length": 20.75, "epoch": 8.667163067758748, "grad_norm": 0.003153394170152237, "kl": 0.060211181640625, "learning_rate": 4.401771297759582e-08, "loss": 6.01303436269518e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4360, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 77.76041889190674, "completions/min_length": 29.625, "epoch": 8.66914867212708, "grad_norm": 0.0033560459080369497, "kl": 0.07391357421875, "learning_rate": 4.388837034039139e-08, "loss": 7.383030606433749e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4361, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.5, "completions/mean_length": 76.96875238418579, "completions/min_length": 20.75, "epoch": 8.671134276495408, "grad_norm": 0.0030432030582164377, "kl": 0.058807373046875, "learning_rate": 4.375920929096899e-08, "loss": 5.881188189960085e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4362, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.875, "completions/mean_length": 64.76041984558105, "completions/min_length": 26.875, "epoch": 8.673119880863737, "grad_norm": 0.002653118266666515, "kl": 0.066436767578125, "learning_rate": 4.363022988075049e-08, "loss": 6.651584408245981e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4363, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 74.16666746139526, "completions/min_length": 30.375, "epoch": 8.675105485232068, "grad_norm": 0.0028939883958346816, "kl": 0.073974609375, "learning_rate": 4.3501432161085204e-08, "loss": 7.400155300274491e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4364, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.375, "completions/mean_length": 66.34375095367432, "completions/min_length": 27.625, "epoch": 8.677091089600397, "grad_norm": 0.005420757829492257, "kl": 0.065948486328125, "learning_rate": 4.3372816183250504e-08, "loss": 6.587877578567713e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4365, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.125, "completions/mean_length": 61.22916889190674, "completions/min_length": 25.5, "epoch": 8.679076693968726, "grad_norm": 0.004118572699474102, "kl": 0.0518798828125, "learning_rate": 4.3244381998450985e-08, "loss": 5.1863105909433216e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4366, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 62.187500953674316, "completions/min_length": 23.375, "epoch": 8.681062298337057, "grad_norm": 0.01360768609035256, "kl": 0.076934814453125, "learning_rate": 4.311612965781902e-08, "loss": 7.697496039327234e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4367, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.125, "completions/mean_length": 80.40625190734863, "completions/min_length": 23.375, "epoch": 8.683047902705386, "grad_norm": 0.0030538540885225564, "kl": 0.0726318359375, "learning_rate": 4.298805921241472e-08, "loss": 7.262681174324825e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4368, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 70.23958396911621, "completions/min_length": 26.25, "epoch": 8.685033507073715, "grad_norm": 0.004360091104944317, "kl": 0.067169189453125, "learning_rate": 4.286017071322551e-08, "loss": 6.708032742608339e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4369, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.625, "completions/mean_length": 65.17708587646484, "completions/min_length": 18.625, "epoch": 8.687019111442044, "grad_norm": 0.0062072339655240664, "kl": 0.063751220703125, "learning_rate": 4.273246421116666e-08, "loss": 6.380338891176507e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4370, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 67.27083492279053, "completions/min_length": 21.75, "epoch": 8.689004715810375, "grad_norm": 2.065877159633669, "kl": 0.065643310546875, "learning_rate": 4.2604939757080795e-08, "loss": 0.00811000820249319, "memory(GiB)": 94.21, "reward": 1.78125, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4371, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.375, "completions/mean_length": 75.91666889190674, "completions/min_length": 27.5, "epoch": 8.690990320178704, "grad_norm": 0.003151320419609507, "kl": 0.06903076171875, "learning_rate": 4.247759740173812e-08, "loss": 6.90828965161927e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4372, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.125, "completions/mean_length": 70.06250190734863, "completions/min_length": 21.875, "epoch": 8.692975924547033, "grad_norm": 0.002664019921460677, "kl": 0.08465576171875, "learning_rate": 4.2350437195836475e-08, "loss": 8.462080586468801e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4373, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.375, "completions/mean_length": 51.04166841506958, "completions/min_length": 19.5, "epoch": 8.694961528915364, "grad_norm": 0.006446605491018157, "kl": 0.057098388671875, "learning_rate": 4.222345919000092e-08, "loss": 5.714341386919841e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4374, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 75.73958539962769, "completions/min_length": 34.25, "epoch": 8.696947133283693, "grad_norm": 0.005169266313044711, "kl": 0.067169189453125, "learning_rate": 4.2096663434784075e-08, "loss": 6.709879380650818e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4375, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.375, "completions/mean_length": 77.30208492279053, "completions/min_length": 24.625, "epoch": 8.698932737652022, "grad_norm": 0.002661207865047449, "kl": 0.06329345703125, "learning_rate": 4.197004998066617e-08, "loss": 6.326305447146297e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4376, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.375, "completions/mean_length": 69.18750190734863, "completions/min_length": 27.875, "epoch": 8.700918342020353, "grad_norm": 0.004350187649417303, "kl": 0.0780029296875, "learning_rate": 4.18436188780546e-08, "loss": 7.804400229360908e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4377, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.25, "completions/mean_length": 71.90625190734863, "completions/min_length": 25.0, "epoch": 8.702903946388682, "grad_norm": 0.0026534479251968178, "kl": 0.063751220703125, "learning_rate": 4.171737017728433e-08, "loss": 6.37941702734679e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4378, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.375, "completions/mean_length": 73.72916841506958, "completions/min_length": 27.25, "epoch": 8.704889550757011, "grad_norm": 0.003167417594718814, "kl": 0.073577880859375, "learning_rate": 4.1591303928617816e-08, "loss": 7.360319432336837e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4379, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.375, "completions/mean_length": 59.93750190734863, "completions/min_length": 22.0, "epoch": 8.706875155125342, "grad_norm": 0.0029642928326458155, "kl": 0.069183349609375, "learning_rate": 4.146542018224447e-08, "loss": 6.91590248607099e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4380, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 70.87500143051147, "completions/min_length": 26.875, "epoch": 8.708860759493671, "grad_norm": 0.003076930544420089, "kl": 0.0693359375, "learning_rate": 4.133971898828148e-08, "loss": 6.934001430636272e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4381, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 73.26041841506958, "completions/min_length": 28.25, "epoch": 8.710846363862, "grad_norm": 0.0026343066816678657, "kl": 0.076690673828125, "learning_rate": 4.121420039677315e-08, "loss": 7.668466423638165e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4382, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 69.39583587646484, "completions/min_length": 24.625, "epoch": 8.71283196823033, "grad_norm": 0.0025134000011850003, "kl": 0.0633544921875, "learning_rate": 4.1088864457691e-08, "loss": 6.340233085211366e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4383, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 71.11458492279053, "completions/min_length": 24.25, "epoch": 8.71481757259866, "grad_norm": 0.5866298080370995, "kl": 0.060516357421875, "learning_rate": 4.096371122093406e-08, "loss": -0.0036966167390346527, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4384, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.625, "completions/mean_length": 71.36458539962769, "completions/min_length": 29.125, "epoch": 8.71680317696699, "grad_norm": 0.005658364354300024, "kl": 0.075836181640625, "learning_rate": 4.0838740736328424e-08, "loss": 7.569265289930627e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4385, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 71.23958492279053, "completions/min_length": 20.625, "epoch": 8.718788781335318, "grad_norm": 0.004745203322434925, "kl": 0.060455322265625, "learning_rate": 4.071395305362757e-08, "loss": 6.045115151209757e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4386, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.875, "completions/mean_length": 71.3229193687439, "completions/min_length": 28.125, "epoch": 8.720774385703649, "grad_norm": 0.0030081968566983337, "kl": 0.0743408203125, "learning_rate": 4.05893482225122e-08, "loss": 7.430857658619061e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4387, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.125, "completions/mean_length": 68.8854193687439, "completions/min_length": 22.125, "epoch": 8.722759990071978, "grad_norm": 0.0032003860535846514, "kl": 0.073699951171875, "learning_rate": 4.046492629259002e-08, "loss": 7.387294317595661e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4388, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.375, "completions/mean_length": 66.57291841506958, "completions/min_length": 27.375, "epoch": 8.724745594440307, "grad_norm": 0.010367991392090112, "kl": 0.0819091796875, "learning_rate": 4.034068731339618e-08, "loss": 8.194800466299057e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4389, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 74.59375095367432, "completions/min_length": 21.75, "epoch": 8.726731198808638, "grad_norm": 0.5760345758111557, "kl": 0.524017333984375, "learning_rate": 4.021663133439279e-08, "loss": 0.014094110578298569, "memory(GiB)": 94.21, "reward": 1.9583333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9583333358168602, "rewards/CineAccuracyORM/std": 0.06154574826359749, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4390, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 78.82291841506958, "completions/min_length": 27.375, "epoch": 8.728716803176967, "grad_norm": 0.0044237752692234365, "kl": 0.08050537109375, "learning_rate": 4.0092758404969175e-08, "loss": 8.051890472415835e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4391, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 67.22916841506958, "completions/min_length": 33.0, "epoch": 8.730702407545296, "grad_norm": 0.0038700111106996136, "kl": 0.056549072265625, "learning_rate": 3.9969068574441824e-08, "loss": 5.657468500430696e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4392, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.75, "completions/mean_length": 74.37500286102295, "completions/min_length": 23.5, "epoch": 8.732688011913627, "grad_norm": 0.002807277627931084, "kl": 0.0853271484375, "learning_rate": 3.9845561892054403e-08, "loss": 8.529365004505962e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4393, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 73.4166693687439, "completions/min_length": 23.75, "epoch": 8.734673616281956, "grad_norm": 0.0029709371056979585, "kl": 0.07586669921875, "learning_rate": 3.972223840697736e-08, "loss": 7.577847281936556e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4394, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 62.10416793823242, "completions/min_length": 22.375, "epoch": 8.736659220650285, "grad_norm": 0.005011853020338243, "kl": 0.068206787109375, "learning_rate": 3.9599098168308584e-08, "loss": 6.820028647780418e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4395, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 68.53125190734863, "completions/min_length": 25.5, "epoch": 8.738644825018614, "grad_norm": 1.1299773243689113, "kl": 0.057586669921875, "learning_rate": 3.94761412250727e-08, "loss": 5.751972639700398e-05, "memory(GiB)": 94.21, "reward": 1.9791666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9791666716337204, "rewards/CineAccuracyORM/std": 0.04865618050098419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4396, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.625, "completions/mean_length": 69.00000286102295, "completions/min_length": 22.625, "epoch": 8.740630429386945, "grad_norm": 0.003192592629962907, "kl": 0.0638427734375, "learning_rate": 3.93533676262216e-08, "loss": 6.379103433573619e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4397, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.875, "completions/mean_length": 65.76041793823242, "completions/min_length": 21.375, "epoch": 8.742616033755274, "grad_norm": 0.006871814102693593, "kl": 0.0655517578125, "learning_rate": 3.9230777420634074e-08, "loss": 6.54618997941725e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4398, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.375, "completions/mean_length": 64.92708539962769, "completions/min_length": 25.75, "epoch": 8.744601638123603, "grad_norm": 1.151784311920379, "kl": 0.0804443359375, "learning_rate": 3.9108370657115694e-08, "loss": 0.007262674625962973, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4399, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 69.07291889190674, "completions/min_length": 25.625, "epoch": 8.746587242491934, "grad_norm": 0.0027087210847725274, "kl": 0.0643310546875, "learning_rate": 3.898614738439954e-08, "loss": 6.434237002395093e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4400, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 71.98958492279053, "completions/min_length": 24.0, "epoch": 8.748572846860263, "grad_norm": 2.4739733532689145, "kl": 0.07244873046875, "learning_rate": 3.886410765114512e-08, "loss": -0.0008494257926940918, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4401, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.125, "completions/mean_length": 81.46875286102295, "completions/min_length": 26.375, "epoch": 8.750558451228592, "grad_norm": 0.0027276815159795664, "kl": 0.059112548828125, "learning_rate": 3.874225150593896e-08, "loss": 5.905494253966026e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4402, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 63.96875238418579, "completions/min_length": 24.25, "epoch": 8.752544055596923, "grad_norm": 0.00242544359179178, "kl": 0.053558349609375, "learning_rate": 3.8620578997294875e-08, "loss": 5.351314030122012e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4403, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 69.58333587646484, "completions/min_length": 22.875, "epoch": 8.754529659965252, "grad_norm": 0.0026694014466304614, "kl": 0.081207275390625, "learning_rate": 3.849909017365299e-08, "loss": 8.125405292958021e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4404, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.5, "completions/mean_length": 71.13541841506958, "completions/min_length": 28.375, "epoch": 8.756515264333581, "grad_norm": 0.004636396487296402, "kl": 0.063873291015625, "learning_rate": 3.8377785083380854e-08, "loss": 6.384911830537021e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4405, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 65.00000190734863, "completions/min_length": 18.875, "epoch": 8.758500868701912, "grad_norm": 0.0036103020447160886, "kl": 0.060943603515625, "learning_rate": 3.825666377477238e-08, "loss": 6.0983551520621404e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4406, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 80.3541693687439, "completions/min_length": 20.25, "epoch": 8.760486473070241, "grad_norm": 0.004808304235861437, "kl": 0.06878662109375, "learning_rate": 3.8135726296048666e-08, "loss": 6.880710134282708e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4407, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 74.50000238418579, "completions/min_length": 26.375, "epoch": 8.76247207743857, "grad_norm": 0.005372567667871199, "kl": 0.067779541015625, "learning_rate": 3.801497269535764e-08, "loss": 6.781538104405627e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4408, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.0, "completions/mean_length": 80.19791841506958, "completions/min_length": 26.375, "epoch": 8.764457681806899, "grad_norm": 0.003009130406111027, "kl": 0.075469970703125, "learning_rate": 3.789440302077362e-08, "loss": 7.546728011220694e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4409, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 57.843751430511475, "completions/min_length": 19.375, "epoch": 8.76644328617523, "grad_norm": 0.004252710038085795, "kl": 0.056884765625, "learning_rate": 3.777401732029822e-08, "loss": 5.687782686436549e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4410, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.875, "completions/mean_length": 74.333336353302, "completions/min_length": 30.125, "epoch": 8.768428890543559, "grad_norm": 0.0025588154312017426, "kl": 0.070098876953125, "learning_rate": 3.765381564185943e-08, "loss": 7.016959716565907e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4411, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 55.89583492279053, "completions/min_length": 15.375, "epoch": 8.770414494911888, "grad_norm": 0.0069810790830688845, "kl": 0.056427001953125, "learning_rate": 3.753379803331197e-08, "loss": 5.644853808917105e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4412, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.375, "completions/mean_length": 62.833335876464844, "completions/min_length": 23.125, "epoch": 8.772400099280219, "grad_norm": 0.0048134086733442275, "kl": 0.058502197265625, "learning_rate": 3.741396454243767e-08, "loss": 5.844461702508852e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4413, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 63.11458492279053, "completions/min_length": 22.5, "epoch": 8.774385703648548, "grad_norm": 0.0037792404684749395, "kl": 0.097259521484375, "learning_rate": 3.729431521694476e-08, "loss": 9.723665425553918e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4414, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.375, "completions/mean_length": 60.14583492279053, "completions/min_length": 19.25, "epoch": 8.776371308016877, "grad_norm": 0.02407754325142591, "kl": 0.059295654296875, "learning_rate": 3.7174850104468027e-08, "loss": 5.933472857577726e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4415, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 65.01041889190674, "completions/min_length": 27.125, "epoch": 8.778356912385208, "grad_norm": 0.004422849079507957, "kl": 0.057342529296875, "learning_rate": 3.705556925256925e-08, "loss": 5.734210935770534e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4416, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.25, "completions/mean_length": 70.52083444595337, "completions/min_length": 26.375, "epoch": 8.780342516753537, "grad_norm": 1.0285375341232266, "kl": 0.061981201171875, "learning_rate": 3.6936472708736567e-08, "loss": 0.018292339518666267, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4417, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.25, "completions/mean_length": 66.67708492279053, "completions/min_length": 22.375, "epoch": 8.782328121121866, "grad_norm": 0.00464340737462757, "kl": 0.08123779296875, "learning_rate": 3.6817560520384926e-08, "loss": 8.124877786030993e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4418, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 67.72916841506958, "completions/min_length": 26.125, "epoch": 8.784313725490197, "grad_norm": 0.0026755899493867154, "kl": 0.05316162109375, "learning_rate": 3.6698832734855744e-08, "loss": 5.3143041441217065e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4419, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 72.12500190734863, "completions/min_length": 30.125, "epoch": 8.786299329858526, "grad_norm": 0.0037001209784178115, "kl": 0.0810546875, "learning_rate": 3.658028939941715e-08, "loss": 8.116249227896333e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4420, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 64.01041984558105, "completions/min_length": 19.125, "epoch": 8.788284934226855, "grad_norm": 0.005853399075497287, "kl": 0.0728302001953125, "learning_rate": 3.646193056126384e-08, "loss": 7.287230982910842e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4421, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 72.98958587646484, "completions/min_length": 22.0, "epoch": 8.790270538595184, "grad_norm": 0.0028696479993686763, "kl": 0.074371337890625, "learning_rate": 3.63437562675169e-08, "loss": 7.443155482178554e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4422, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.125, "completions/mean_length": 62.802085399627686, "completions/min_length": 20.125, "epoch": 8.792256142963515, "grad_norm": 0.003184243215509938, "kl": 0.064910888671875, "learning_rate": 3.622576656522397e-08, "loss": 6.490507803391665e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4423, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 70.16666841506958, "completions/min_length": 27.0, "epoch": 8.794241747331844, "grad_norm": 0.004282120609053288, "kl": 0.077484130859375, "learning_rate": 3.6107961501359475e-08, "loss": 7.750788063276559e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4424, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 65.53125143051147, "completions/min_length": 26.25, "epoch": 8.796227351700173, "grad_norm": 0.004137419031221627, "kl": 0.063446044921875, "learning_rate": 3.59903411228239e-08, "loss": 6.346201553242281e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4425, "train_speed(iter/s)": 0.022634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 64.32291841506958, "completions/min_length": 19.5, "epoch": 8.798212956068504, "grad_norm": 0.0024874070700725667, "kl": 0.062042236328125, "learning_rate": 3.587290547644456e-08, "loss": 6.203790690051392e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4426, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 74.083336353302, "completions/min_length": 24.5, "epoch": 8.800198560436833, "grad_norm": 0.0026815744212083303, "kl": 0.07843017578125, "learning_rate": 3.575565460897511e-08, "loss": 7.8457836934831e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4427, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 68.78125286102295, "completions/min_length": 27.375, "epoch": 8.802184164805162, "grad_norm": 0.003388962211476853, "kl": 0.059234619140625, "learning_rate": 3.563858856709556e-08, "loss": 5.917256203247234e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4428, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 66.80208492279053, "completions/min_length": 21.125, "epoch": 8.804169769173493, "grad_norm": 1.3006128245096253, "kl": 0.0682373046875, "learning_rate": 3.55217073974125e-08, "loss": -0.004407038446515799, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4429, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.625, "completions/mean_length": 73.71875381469727, "completions/min_length": 22.75, "epoch": 8.806155373541822, "grad_norm": 0.0028610550770152713, "kl": 0.06719970703125, "learning_rate": 3.540501114645872e-08, "loss": 6.713801121804863e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4430, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 67.81250190734863, "completions/min_length": 21.75, "epoch": 8.808140977910151, "grad_norm": 0.0035833374803448483, "kl": 0.071502685546875, "learning_rate": 3.5288499860693486e-08, "loss": 7.149644079618156e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4431, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.375, "completions/mean_length": 69.75000143051147, "completions/min_length": 27.875, "epoch": 8.810126582278482, "grad_norm": 0.003997033114191241, "kl": 0.06939697265625, "learning_rate": 3.517217358650254e-08, "loss": 6.936653517186642e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4432, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.875, "completions/mean_length": 69.45833492279053, "completions/min_length": 23.875, "epoch": 8.81211218664681, "grad_norm": 0.004025274146046803, "kl": 0.0753173828125, "learning_rate": 3.5056032370197665e-08, "loss": 7.519741484429687e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4433, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.875, "completions/mean_length": 65.29166889190674, "completions/min_length": 21.375, "epoch": 8.81409779101514, "grad_norm": 0.005281899129190168, "kl": 0.092071533203125, "learning_rate": 3.494007625801731e-08, "loss": 9.209560084855184e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4434, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 68.13541889190674, "completions/min_length": 21.875, "epoch": 8.816083395383469, "grad_norm": 0.002804457619560133, "kl": 0.05792236328125, "learning_rate": 3.482430529612612e-08, "loss": 5.7928751630242914e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4435, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.125, "completions/mean_length": 68.02083444595337, "completions/min_length": 27.0, "epoch": 8.8180689997518, "grad_norm": 0.0032410795444556086, "kl": 0.064422607421875, "learning_rate": 3.4708719530614826e-08, "loss": 6.43706662231125e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4436, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 62.958335876464844, "completions/min_length": 22.0, "epoch": 8.820054604120129, "grad_norm": 2.485340066728084, "kl": 0.082275390625, "learning_rate": 3.459331900750073e-08, "loss": -0.006537230685353279, "memory(GiB)": 94.21, "reward": 1.9166666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4437, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 67.81250095367432, "completions/min_length": 22.0, "epoch": 8.822040208488458, "grad_norm": 0.0031650147717010935, "kl": 0.061614990234375, "learning_rate": 3.447810377272725e-08, "loss": 6.164857768453658e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4438, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 66.01041841506958, "completions/min_length": 24.25, "epoch": 8.824025812856789, "grad_norm": 0.004814892592620895, "kl": 0.06744384765625, "learning_rate": 3.436307387216386e-08, "loss": 6.746799772372469e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4439, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 71.22916793823242, "completions/min_length": 38.375, "epoch": 8.826011417225118, "grad_norm": 0.0022152839167825073, "kl": 0.05316162109375, "learning_rate": 3.424822935160654e-08, "loss": 5.3128544095670804e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4440, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.25, "completions/mean_length": 71.45833492279053, "completions/min_length": 24.0, "epoch": 8.827997021593447, "grad_norm": 0.003946494427743627, "kl": 0.08221435546875, "learning_rate": 3.413357025677743e-08, "loss": 8.225941564887762e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4441, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.25, "completions/mean_length": 64.50000238418579, "completions/min_length": 22.875, "epoch": 8.829982625961778, "grad_norm": 0.0025197729664963964, "kl": 0.064361572265625, "learning_rate": 3.401909663332464e-08, "loss": 6.435364775825292e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4442, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.5, "completions/mean_length": 68.42708587646484, "completions/min_length": 27.25, "epoch": 8.831968230330107, "grad_norm": 0.0038238566717355284, "kl": 0.057952880859375, "learning_rate": 3.3904808526822594e-08, "loss": 5.7944835134549066e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4443, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.125, "completions/mean_length": 66.22916889190674, "completions/min_length": 22.5, "epoch": 8.833953834698436, "grad_norm": 0.0058701856290664265, "kl": 0.0704345703125, "learning_rate": 3.379070598277184e-08, "loss": 7.049596024444327e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4444, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 70.01041889190674, "completions/min_length": 30.375, "epoch": 8.835939439066767, "grad_norm": 0.005089433792711441, "kl": 0.069305419921875, "learning_rate": 3.367678904659904e-08, "loss": 6.925516936462373e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4445, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 72.94791793823242, "completions/min_length": 28.0, "epoch": 8.837925043435096, "grad_norm": 0.004667625571964998, "kl": 0.075897216796875, "learning_rate": 3.356305776365692e-08, "loss": 7.590887253172696e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4446, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.75, "completions/mean_length": 68.47916841506958, "completions/min_length": 26.625, "epoch": 8.839910647803425, "grad_norm": 0.006422566536172555, "kl": 0.078399658203125, "learning_rate": 3.344951217922437e-08, "loss": 7.840842590667307e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4447, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 63.30208420753479, "completions/min_length": 20.375, "epoch": 8.841896252171754, "grad_norm": 0.00516401138414613, "kl": 0.061767578125, "learning_rate": 3.333615233850634e-08, "loss": 6.173625297378749e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4448, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 72.44791889190674, "completions/min_length": 21.625, "epoch": 8.843881856540085, "grad_norm": 0.006124480711216913, "kl": 0.063812255859375, "learning_rate": 3.322297828663373e-08, "loss": 6.38872297713533e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4449, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 66.70833539962769, "completions/min_length": 27.125, "epoch": 8.845867460908414, "grad_norm": 0.004467890066077925, "kl": 0.068695068359375, "learning_rate": 3.310999006866366e-08, "loss": 6.871936784591526e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4450, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.5, "completions/mean_length": 63.92708444595337, "completions/min_length": 25.0, "epoch": 8.847853065276743, "grad_norm": 0.002714490151785596, "kl": 0.078887939453125, "learning_rate": 3.29971877295791e-08, "loss": 7.883248326834291e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4451, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.375, "completions/mean_length": 58.90625238418579, "completions/min_length": 20.5, "epoch": 8.849838669645074, "grad_norm": 2.2109730144131143, "kl": 2.998443603515625, "learning_rate": 3.288457131428895e-08, "loss": 0.0029898881912231445, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4452, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 75.65625238418579, "completions/min_length": 24.0, "epoch": 8.851824274013403, "grad_norm": 0.0038133840964780495, "kl": 0.064971923828125, "learning_rate": 3.2772140867628417e-08, "loss": 6.495663546957076e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4453, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 58.968751430511475, "completions/min_length": 24.125, "epoch": 8.853809878381732, "grad_norm": 0.0029025345100421912, "kl": 0.053070068359375, "learning_rate": 3.265989643435829e-08, "loss": 5.30916076968424e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4454, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.875, "completions/mean_length": 72.22916889190674, "completions/min_length": 25.625, "epoch": 8.855795482750063, "grad_norm": 0.0026259061090380424, "kl": 0.054290771484375, "learning_rate": 3.25478380591655e-08, "loss": 5.428859367384575e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4455, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.625, "completions/mean_length": 70.14583444595337, "completions/min_length": 18.375, "epoch": 8.857781087118392, "grad_norm": 0.7393255715903666, "kl": 0.0880126953125, "learning_rate": 3.243596578666302e-08, "loss": -0.0036449681501835585, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4456, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.875, "completions/mean_length": 66.65625190734863, "completions/min_length": 24.25, "epoch": 8.85976669148672, "grad_norm": 0.006839111909673298, "kl": 0.080902099609375, "learning_rate": 3.2324279661389456e-08, "loss": 8.082977728918195e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4457, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 63.91666841506958, "completions/min_length": 23.625, "epoch": 8.861752295855052, "grad_norm": 0.00429611916964378, "kl": 0.059844970703125, "learning_rate": 3.2212779727809504e-08, "loss": 5.985811003483832e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4458, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 68.33333587646484, "completions/min_length": 24.5, "epoch": 8.86373790022338, "grad_norm": 0.003965196732980484, "kl": 0.057403564453125, "learning_rate": 3.2101466030313715e-08, "loss": 5.743458677898161e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4459, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 67.33333539962769, "completions/min_length": 24.875, "epoch": 8.86572350459171, "grad_norm": 0.004674087290420187, "kl": 0.076202392578125, "learning_rate": 3.199033861321826e-08, "loss": 7.612827175762504e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4460, "train_speed(iter/s)": 0.022633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 64.38541841506958, "completions/min_length": 23.5, "epoch": 8.867709108960039, "grad_norm": 0.003531753849818053, "kl": 0.06182861328125, "learning_rate": 3.187939752076546e-08, "loss": 6.178292096592486e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4461, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.125, "completions/mean_length": 76.15625143051147, "completions/min_length": 25.75, "epoch": 8.86969471332837, "grad_norm": 0.0034799343435747095, "kl": 0.066497802734375, "learning_rate": 3.176864279712338e-08, "loss": 6.646820838795975e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4462, "train_speed(iter/s)": 0.022632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 74.10416793823242, "completions/min_length": 25.75, "epoch": 8.871680317696699, "grad_norm": 0.003168066818873465, "kl": 0.063873291015625, "learning_rate": 3.165807448638574e-08, "loss": 6.370982009684667e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4463, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.0, "completions/mean_length": 71.46875238418579, "completions/min_length": 30.875, "epoch": 8.873665922065028, "grad_norm": 1.2047363273355765, "kl": 0.07232666015625, "learning_rate": 3.1547692632572285e-08, "loss": -0.013890378177165985, "memory(GiB)": 94.21, "reward": 1.7291666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4464, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 69.16666889190674, "completions/min_length": 25.125, "epoch": 8.875651526433359, "grad_norm": 0.0030995843958764964, "kl": 0.084930419921875, "learning_rate": 3.143749727962824e-08, "loss": 8.48943818709813e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4465, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.375, "completions/mean_length": 76.41666889190674, "completions/min_length": 26.375, "epoch": 8.877637130801688, "grad_norm": 0.0028964845835545857, "kl": 0.060882568359375, "learning_rate": 3.13274884714248e-08, "loss": 6.088173540774733e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4466, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.125, "completions/mean_length": 78.45833492279053, "completions/min_length": 27.75, "epoch": 8.879622735170017, "grad_norm": 0.002830668593501242, "kl": 0.05718994140625, "learning_rate": 3.1217666251758866e-08, "loss": 5.7176857808372006e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4467, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.125, "completions/mean_length": 65.16666889190674, "completions/min_length": 24.75, "epoch": 8.881608339538348, "grad_norm": 0.003762163116174367, "kl": 0.06549072265625, "learning_rate": 3.11080306643528e-08, "loss": 6.54999166727066e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4468, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 62.218751430511475, "completions/min_length": 19.875, "epoch": 8.883593943906677, "grad_norm": 0.0038819703398893365, "kl": 0.0613250732421875, "learning_rate": 3.099858175285519e-08, "loss": 6.128475069999695e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4469, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.375, "completions/mean_length": 79.77083444595337, "completions/min_length": 27.25, "epoch": 8.885579548275006, "grad_norm": 0.00418831111783306, "kl": 0.062774658203125, "learning_rate": 3.088931956083979e-08, "loss": 6.281497917370871e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4470, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 65.27083492279053, "completions/min_length": 24.25, "epoch": 8.887565152643337, "grad_norm": 0.0036727808494226638, "kl": 0.07403564453125, "learning_rate": 3.078024413180619e-08, "loss": 7.40958348615095e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4471, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 79.72916889190674, "completions/min_length": 30.375, "epoch": 8.889550757011666, "grad_norm": 0.0037651245301109047, "kl": 0.066009521484375, "learning_rate": 3.067135550917976e-08, "loss": 6.60418882034719e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4472, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.0, "completions/mean_length": 76.17708587646484, "completions/min_length": 26.5, "epoch": 8.891536361379995, "grad_norm": 0.003927564094897929, "kl": 0.0704345703125, "learning_rate": 3.056265373631128e-08, "loss": 7.03707555658184e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4473, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 73.54166889190674, "completions/min_length": 23.25, "epoch": 8.893521965748324, "grad_norm": 0.0050882071580019795, "kl": 0.07208251953125, "learning_rate": 3.045413885647735e-08, "loss": 7.215599907794967e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4474, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 69.84375238418579, "completions/min_length": 22.375, "epoch": 8.895507570116655, "grad_norm": 0.0038093511005711463, "kl": 0.070556640625, "learning_rate": 3.034581091287996e-08, "loss": 7.051750435493886e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4475, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.125, "completions/mean_length": 66.43750143051147, "completions/min_length": 20.125, "epoch": 8.897493174484984, "grad_norm": 0.0022130293660831413, "kl": 0.06146240234375, "learning_rate": 3.023766994864679e-08, "loss": 6.145128281787038e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4476, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 65.31250238418579, "completions/min_length": 24.375, "epoch": 8.899478778853313, "grad_norm": 0.0022639850738775543, "kl": 0.0540771484375, "learning_rate": 3.012971600683123e-08, "loss": 5.4102794820209965e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4477, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.375, "completions/mean_length": 75.77083396911621, "completions/min_length": 24.25, "epoch": 8.901464383221644, "grad_norm": 0.003339713302168338, "kl": 0.06396484375, "learning_rate": 3.0021949130411895e-08, "loss": 6.39510981272906e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4478, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 67.50000238418579, "completions/min_length": 29.625, "epoch": 8.903449987589973, "grad_norm": 0.0033473584408936904, "kl": 0.064300537109375, "learning_rate": 2.9914369362293026e-08, "loss": 6.428691995097324e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4479, "train_speed(iter/s)": 0.022631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.875, "completions/mean_length": 80.13541793823242, "completions/min_length": 26.375, "epoch": 8.905435591958302, "grad_norm": 0.002741732426725497, "kl": 0.06915283203125, "learning_rate": 2.980697674530458e-08, "loss": 6.910170486662537e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4480, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.0, "completions/mean_length": 68.87500238418579, "completions/min_length": 25.75, "epoch": 8.907421196326633, "grad_norm": 0.002350342760807624, "kl": 0.054473876953125, "learning_rate": 2.969977132220175e-08, "loss": 5.447406874736771e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4481, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 81.06250238418579, "completions/min_length": 37.125, "epoch": 8.909406800694962, "grad_norm": 0.006171679161739611, "kl": 0.077606201171875, "learning_rate": 2.9592753135665283e-08, "loss": 7.752048259135336e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4482, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 73.19791984558105, "completions/min_length": 29.875, "epoch": 8.91139240506329, "grad_norm": 0.0024595631069271534, "kl": 0.060394287109375, "learning_rate": 2.9485922228301596e-08, "loss": 6.0439033404691145e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4483, "train_speed(iter/s)": 0.022628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 64.30208492279053, "completions/min_length": 26.125, "epoch": 8.913378009431622, "grad_norm": 0.003235444483632454, "kl": 0.06634521484375, "learning_rate": 2.9379278642642058e-08, "loss": 6.62503152852878e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4484, "train_speed(iter/s)": 0.022628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.125, "completions/mean_length": 62.83333492279053, "completions/min_length": 23.625, "epoch": 8.91536361379995, "grad_norm": 0.004710183969948051, "kl": 0.064178466796875, "learning_rate": 2.927282242114404e-08, "loss": 6.42067680018954e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4485, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 61.87500190734863, "completions/min_length": 24.0, "epoch": 8.91734921816828, "grad_norm": 0.0032660739398445615, "kl": 0.06805419921875, "learning_rate": 2.9166553606189858e-08, "loss": 6.806520104873925e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4486, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 62.66666841506958, "completions/min_length": 19.75, "epoch": 8.919334822536609, "grad_norm": 0.0033170650922295063, "kl": 0.05517578125, "learning_rate": 2.9060472240087507e-08, "loss": 5.518686884897761e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4487, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.5, "completions/mean_length": 73.43750238418579, "completions/min_length": 28.625, "epoch": 8.92132042690494, "grad_norm": 0.004296993126054609, "kl": 0.0579833984375, "learning_rate": 2.895457836507015e-08, "loss": 5.796835102955811e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4488, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 70.08333539962769, "completions/min_length": 27.125, "epoch": 8.923306031273269, "grad_norm": 0.003963143165516153, "kl": 0.06988525390625, "learning_rate": 2.884887202329639e-08, "loss": 6.986509106354788e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4489, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.125, "completions/mean_length": 54.69791841506958, "completions/min_length": 21.125, "epoch": 8.925291635641598, "grad_norm": 0.0043471785608314994, "kl": 0.056396484375, "learning_rate": 2.8743353256850345e-08, "loss": 5.6419750762870535e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4490, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.75, "completions/mean_length": 66.95833539962769, "completions/min_length": 17.375, "epoch": 8.927277240009929, "grad_norm": 0.0036562730798006997, "kl": 0.057525634765625, "learning_rate": 2.8638022107741134e-08, "loss": 5.74771074752789e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4491, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 71.39583539962769, "completions/min_length": 18.625, "epoch": 8.929262844378258, "grad_norm": 0.005919954304850223, "kl": 0.0604248046875, "learning_rate": 2.8532878617903377e-08, "loss": 6.0426111303968355e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4492, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 68.73958444595337, "completions/min_length": 25.375, "epoch": 8.931248448746587, "grad_norm": 0.002599222969608864, "kl": 0.063446044921875, "learning_rate": 2.842792282919698e-08, "loss": 6.343086715787649e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4493, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.125, "completions/mean_length": 69.89583539962769, "completions/min_length": 27.625, "epoch": 8.933234053114917, "grad_norm": 0.0037290223287503323, "kl": 0.065521240234375, "learning_rate": 2.8323154783406967e-08, "loss": 6.557120650541037e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4494, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/mean_length": 59.760417461395264, "completions/min_length": 26.0, "epoch": 8.935219657483247, "grad_norm": 0.002729830477485634, "kl": 0.05963134765625, "learning_rate": 2.8218574522243798e-08, "loss": 5.9629186580423266e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4495, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.875, "completions/mean_length": 69.35416841506958, "completions/min_length": 19.0, "epoch": 8.937205261851576, "grad_norm": 2.2930029865566146, "kl": 0.08111572265625, "learning_rate": 2.811418208734323e-08, "loss": 0.0035009586717933416, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4496, "train_speed(iter/s)": 0.02263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.25, "completions/mean_length": 71.10416793823242, "completions/min_length": 25.5, "epoch": 8.939190866219906, "grad_norm": 0.002977519574416739, "kl": 0.06390380859375, "learning_rate": 2.800997752026596e-08, "loss": 6.391222996171564e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4497, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 69.19791841506958, "completions/min_length": 21.125, "epoch": 8.941176470588236, "grad_norm": 0.0028814438434514775, "kl": 0.069244384765625, "learning_rate": 2.790596086249819e-08, "loss": 6.917622522450984e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4498, "train_speed(iter/s)": 0.022629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 56.520835399627686, "completions/min_length": 17.125, "epoch": 8.943162074956565, "grad_norm": 0.007042173266484335, "kl": 0.08233642578125, "learning_rate": 2.7802132155451075e-08, "loss": 8.241958857979625e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4499, "train_speed(iter/s)": 0.022628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.125, "completions/mean_length": 62.250001430511475, "completions/min_length": 24.125, "epoch": 8.945147679324894, "grad_norm": 0.0038935001105113195, "kl": 0.05487060546875, "learning_rate": 2.7698491440460992e-08, "loss": 5.484827488544397e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4500, "train_speed(iter/s)": 0.022627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.0, "completions/mean_length": 70.73958587646484, "completions/min_length": 19.625, "epoch": 8.947133283693224, "grad_norm": 0.9812121683987876, "kl": 0.055023193359375, "learning_rate": 2.7595038758789656e-08, "loss": -0.008326666429638863, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4501, "train_speed(iter/s)": 0.022624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.375, "completions/mean_length": 65.03125190734863, "completions/min_length": 16.0, "epoch": 8.949118888061554, "grad_norm": 0.005085516034028818, "kl": 0.069305419921875, "learning_rate": 2.7491774151623682e-08, "loss": 6.928383663762361e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4502, "train_speed(iter/s)": 0.022623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.625, "completions/mean_length": 67.38541793823242, "completions/min_length": 16.75, "epoch": 8.951104492429883, "grad_norm": 0.0028909097368808533, "kl": 0.055267333984375, "learning_rate": 2.7388697660074956e-08, "loss": 5.5298994993790984e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4503, "train_speed(iter/s)": 0.022622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 77.00000286102295, "completions/min_length": 28.25, "epoch": 8.953090096798213, "grad_norm": 0.003004109526001115, "kl": 0.065277099609375, "learning_rate": 2.7285809325180487e-08, "loss": 6.529617530759424e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4504, "train_speed(iter/s)": 0.022623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.25, "completions/mean_length": 56.458335876464844, "completions/min_length": 19.375, "epoch": 8.955075701166543, "grad_norm": 0.00517998306879406, "kl": 0.055572509765625, "learning_rate": 2.718310918790223e-08, "loss": 5.5568874813616276e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4505, "train_speed(iter/s)": 0.022623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 66.03125238418579, "completions/min_length": 25.625, "epoch": 8.957061305534872, "grad_norm": 0.002693555649533178, "kl": 0.0514068603515625, "learning_rate": 2.708059728912737e-08, "loss": 5.1350198191357777e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4506, "train_speed(iter/s)": 0.022623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 80.77083587646484, "completions/min_length": 32.25, "epoch": 8.959046909903202, "grad_norm": 0.005137775433693113, "kl": 0.080780029296875, "learning_rate": 2.697827366966804e-08, "loss": 8.076422091107816e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4507, "train_speed(iter/s)": 0.022623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.875, "completions/mean_length": 75.22916793823242, "completions/min_length": 24.375, "epoch": 8.961032514271531, "grad_norm": 0.0027825772492929735, "kl": 0.066741943359375, "learning_rate": 2.6876138370261424e-08, "loss": 6.667750130873173e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4508, "train_speed(iter/s)": 0.022623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 76.41666984558105, "completions/min_length": 26.625, "epoch": 8.96301811863986, "grad_norm": 0.006234419382710262, "kl": 0.087188720703125, "learning_rate": 2.6774191431569838e-08, "loss": 8.721857739146799e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4509, "train_speed(iter/s)": 0.022622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 74.01041889190674, "completions/min_length": 26.375, "epoch": 8.965003723008191, "grad_norm": 0.005492073554030496, "kl": 0.062591552734375, "learning_rate": 2.6672432894180585e-08, "loss": 6.2644096033182e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4510, "train_speed(iter/s)": 0.022621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.625, "completions/mean_length": 77.55208396911621, "completions/min_length": 26.0, "epoch": 8.96698932737652, "grad_norm": 0.0028167848714367267, "kl": 0.069549560546875, "learning_rate": 2.657086279860571e-08, "loss": 6.952951662242413e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4511, "train_speed(iter/s)": 0.022621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 60.60416889190674, "completions/min_length": 27.875, "epoch": 8.96897493174485, "grad_norm": 0.006133395738297823, "kl": 0.055084228515625, "learning_rate": 2.6469481185282695e-08, "loss": 5.5125594371929765e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4512, "train_speed(iter/s)": 0.02262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.375, "completions/mean_length": 73.89583587646484, "completions/min_length": 27.125, "epoch": 8.970960536113179, "grad_norm": 0.0024352518387654575, "kl": 0.06829833984375, "learning_rate": 2.6368288094573532e-08, "loss": 6.826894241385162e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4513, "train_speed(iter/s)": 0.02262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 70.19791889190674, "completions/min_length": 19.875, "epoch": 8.97294614048151, "grad_norm": 1.050667784594347, "kl": 0.076568603515625, "learning_rate": 2.6267283566765442e-08, "loss": 0.004599431063979864, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4514, "train_speed(iter/s)": 0.022619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 75.37500143051147, "completions/min_length": 28.625, "epoch": 8.974931744849838, "grad_norm": 0.0026029328144589535, "kl": 0.06512451171875, "learning_rate": 2.6166467642070476e-08, "loss": 6.50845468044281e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4515, "train_speed(iter/s)": 0.022618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.125, "completions/mean_length": 66.89583539962769, "completions/min_length": 20.75, "epoch": 8.976917349218168, "grad_norm": 0.8841244484512857, "kl": 0.0635986328125, "learning_rate": 2.606584036062548e-08, "loss": 0.004274980630725622, "memory(GiB)": 94.21, "reward": 1.8958333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8958333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4516, "train_speed(iter/s)": 0.022619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 68.75000190734863, "completions/min_length": 27.5, "epoch": 8.978902953586498, "grad_norm": 0.004487045753204406, "kl": 0.076202392578125, "learning_rate": 2.5965401762492567e-08, "loss": 7.627331069670618e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4517, "train_speed(iter/s)": 0.022618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.875, "completions/mean_length": 66.02083492279053, "completions/min_length": 23.875, "epoch": 8.980888557954827, "grad_norm": 0.008405509807377455, "kl": 0.069122314453125, "learning_rate": 2.586515188765831e-08, "loss": 6.913597462698817e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4518, "train_speed(iter/s)": 0.022618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.125, "completions/mean_length": 70.86458539962769, "completions/min_length": 25.0, "epoch": 8.982874162323156, "grad_norm": 0.0024611780120682943, "kl": 0.061370849609375, "learning_rate": 2.5765090776034336e-08, "loss": 6.138101889519021e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4519, "train_speed(iter/s)": 0.022618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.625, "completions/mean_length": 64.90625238418579, "completions/min_length": 26.875, "epoch": 8.984859766691487, "grad_norm": 0.003284181310148787, "kl": 0.063568115234375, "learning_rate": 2.566521846745723e-08, "loss": 6.351979391183704e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4520, "train_speed(iter/s)": 0.022617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 66.97916841506958, "completions/min_length": 19.375, "epoch": 8.986845371059816, "grad_norm": 0.9651339550355862, "kl": 0.105560302734375, "learning_rate": 2.556553500168812e-08, "loss": -0.0008393513853661716, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4521, "train_speed(iter/s)": 0.022617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.875, "completions/mean_length": 67.56250286102295, "completions/min_length": 28.0, "epoch": 8.988830975428145, "grad_norm": 0.051794966933195435, "kl": 0.0892333984375, "learning_rate": 2.546604041841327e-08, "loss": 8.941303531173617e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4522, "train_speed(iter/s)": 0.022618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.375, "completions/mean_length": 58.96875190734863, "completions/min_length": 25.625, "epoch": 8.990816579796476, "grad_norm": 0.00306461668849533, "kl": 0.0633697509765625, "learning_rate": 2.536673475724349e-08, "loss": 6.335699436021969e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4523, "train_speed(iter/s)": 0.022618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 67.08333539962769, "completions/min_length": 25.5, "epoch": 8.992802184164805, "grad_norm": 1.0298309989948347, "kl": 0.097442626953125, "learning_rate": 2.5267618057714544e-08, "loss": 0.0046271877363324165, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.26020343601703644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4524, "train_speed(iter/s)": 0.022618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 68.53125190734863, "completions/min_length": 22.375, "epoch": 8.994787788533134, "grad_norm": 0.021011481703648357, "kl": 0.08575439453125, "learning_rate": 2.5168690359286927e-08, "loss": 8.559702837374061e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4525, "train_speed(iter/s)": 0.022618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.0, "completions/mean_length": 63.42708683013916, "completions/min_length": 22.0, "epoch": 8.996773392901463, "grad_norm": 0.005276637889362204, "kl": 0.074127197265625, "learning_rate": 2.506995170134579e-08, "loss": 7.416962762363255e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4526, "train_speed(iter/s)": 0.022618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 73.27083587646484, "completions/min_length": 21.5, "epoch": 8.998758997269794, "grad_norm": 0.005081345464096999, "kl": 0.07293701171875, "learning_rate": 2.4971402123201258e-08, "loss": 7.289180939551443e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4527, "train_speed(iter/s)": 0.022618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.375, "completions/mean_length": 68.45833587646484, "completions/min_length": 27.125, "epoch": 9.001985604368329, "grad_norm": 0.0032218905720488335, "kl": 0.08721923828125, "learning_rate": 2.487304166408788e-08, "loss": 8.7178879766725e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4528, "train_speed(iter/s)": 0.022618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.75, "completions/mean_length": 69.28125190734863, "completions/min_length": 19.5, "epoch": 9.00397120873666, "grad_norm": 0.004029272536043304, "kl": 0.05450439453125, "learning_rate": 2.4774870363165002e-08, "loss": 5.447335934150033e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4529, "train_speed(iter/s)": 0.022617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.875, "completions/mean_length": 78.53125286102295, "completions/min_length": 22.75, "epoch": 9.005956813104989, "grad_norm": 0.002614961219304078, "kl": 0.05950927734375, "learning_rate": 2.467688825951686e-08, "loss": 5.948838224867359e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4530, "train_speed(iter/s)": 0.022617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 74.67708587646484, "completions/min_length": 24.625, "epoch": 9.007942417473318, "grad_norm": 0.003909280033284893, "kl": 0.077056884765625, "learning_rate": 2.4579095392152195e-08, "loss": 7.701186405029148e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4531, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 65.03125238418579, "completions/min_length": 19.0, "epoch": 9.009928021841649, "grad_norm": 0.0028361100477399576, "kl": 0.076446533203125, "learning_rate": 2.448149180000436e-08, "loss": 7.63984426157549e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4532, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 77.37500143051147, "completions/min_length": 26.5, "epoch": 9.011913626209978, "grad_norm": 0.002492242390435187, "kl": 0.063995361328125, "learning_rate": 2.4384077521931557e-08, "loss": 6.40070647932589e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4533, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 70.1354193687439, "completions/min_length": 19.625, "epoch": 9.013899230578307, "grad_norm": 0.003963207931070446, "kl": 0.059417724609375, "learning_rate": 2.4286852596716312e-08, "loss": 5.939879338257015e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4534, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.375, "completions/mean_length": 66.51041889190674, "completions/min_length": 30.375, "epoch": 9.015884834946636, "grad_norm": 0.034503494588418344, "kl": 0.11724853515625, "learning_rate": 2.4189817063066164e-08, "loss": 0.000117438830784522, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4535, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.375, "completions/mean_length": 60.68750190734863, "completions/min_length": 19.875, "epoch": 9.017870439314967, "grad_norm": 0.022775426908068233, "kl": 0.113494873046875, "learning_rate": 2.409297095961288e-08, "loss": 0.00011347609688527882, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4536, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.875, "completions/mean_length": 61.031250953674316, "completions/min_length": 23.5, "epoch": 9.019856043683296, "grad_norm": 0.004040718725406422, "kl": 0.0609130859375, "learning_rate": 2.3996314324912835e-08, "loss": 6.0948863392695785e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4537, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.875, "completions/mean_length": 66.94791746139526, "completions/min_length": 26.375, "epoch": 9.021841648051625, "grad_norm": 0.0037970360092925777, "kl": 0.072357177734375, "learning_rate": 2.389984719744742e-08, "loss": 7.227309106383473e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4538, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 63.281251668930054, "completions/min_length": 20.875, "epoch": 9.023827252419956, "grad_norm": 0.010496840266945652, "kl": 0.072174072265625, "learning_rate": 2.380356961562213e-08, "loss": 7.216067024273798e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4539, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.625, "completions/mean_length": 66.90625143051147, "completions/min_length": 22.75, "epoch": 9.025812856788285, "grad_norm": 0.007005044207731525, "kl": 0.0645751953125, "learning_rate": 2.370748161776698e-08, "loss": 6.449769716709852e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4540, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 69.08333539962769, "completions/min_length": 19.375, "epoch": 9.027798461156614, "grad_norm": 0.0036370782122761977, "kl": 0.06817626953125, "learning_rate": 2.3611583242136858e-08, "loss": 6.825003947596997e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4541, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 66.01041889190674, "completions/min_length": 21.5, "epoch": 9.029784065524945, "grad_norm": 0.0028755037142842513, "kl": 0.053009033203125, "learning_rate": 2.351587452691084e-08, "loss": 5.2990955737186596e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4542, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 62.802085876464844, "completions/min_length": 15.5, "epoch": 9.031769669893274, "grad_norm": 0.004861215987040813, "kl": 0.06512451171875, "learning_rate": 2.3420355510192612e-08, "loss": 6.518846930703148e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4543, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 70.31250190734863, "completions/min_length": 26.375, "epoch": 9.033755274261603, "grad_norm": 0.0035362016591708087, "kl": 0.065338134765625, "learning_rate": 2.3325026230010368e-08, "loss": 6.531730468850583e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4544, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 62.18750238418579, "completions/min_length": 14.375, "epoch": 9.035740878629934, "grad_norm": 0.030740655365082796, "kl": 0.1055908203125, "learning_rate": 2.3229886724316693e-08, "loss": 0.00010559442307567224, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4545, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 64.90625143051147, "completions/min_length": 20.625, "epoch": 9.037726482998263, "grad_norm": 0.00522289505410599, "kl": 0.06939697265625, "learning_rate": 2.3134937030988788e-08, "loss": 6.940049206605181e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4546, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 66.85416841506958, "completions/min_length": 18.75, "epoch": 9.039712087366592, "grad_norm": 0.005965280774300982, "kl": 0.05792236328125, "learning_rate": 2.3040177187828024e-08, "loss": 5.787128611700609e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4547, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 62.17708492279053, "completions/min_length": 18.25, "epoch": 9.041697691734921, "grad_norm": 0.003355668945559222, "kl": 0.05377197265625, "learning_rate": 2.2945607232560284e-08, "loss": 5.381919618230313e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4548, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 60.72916865348816, "completions/min_length": 24.0, "epoch": 9.043683296103252, "grad_norm": 0.002504701579232985, "kl": 0.070037841796875, "learning_rate": 2.2851227202836e-08, "loss": 6.995261355768889e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4549, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.125, "completions/mean_length": 60.96875190734863, "completions/min_length": 18.0, "epoch": 9.045668900471581, "grad_norm": 0.0036162041069074204, "kl": 0.079010009765625, "learning_rate": 2.2757037136229783e-08, "loss": 7.90992853580974e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4550, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 63.9791693687439, "completions/min_length": 25.375, "epoch": 9.04765450483991, "grad_norm": 0.005387595030086053, "kl": 0.056793212890625, "learning_rate": 2.2663037070240754e-08, "loss": 5.6754892284516245e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4551, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.0, "completions/mean_length": 68.64583492279053, "completions/min_length": 22.5, "epoch": 9.04964010920824, "grad_norm": 0.0035011935211644927, "kl": 0.090850830078125, "learning_rate": 2.2569227042292416e-08, "loss": 9.09223745111376e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4552, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 68.03125190734863, "completions/min_length": 24.0, "epoch": 9.05162571357657, "grad_norm": 0.0028053485352475043, "kl": 0.07098388671875, "learning_rate": 2.2475607089732397e-08, "loss": 7.098112837411463e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4553, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.75, "completions/mean_length": 79.70833396911621, "completions/min_length": 26.5, "epoch": 9.053611317944899, "grad_norm": 0.0033546600809278354, "kl": 0.066619873046875, "learning_rate": 2.2382177249832988e-08, "loss": 6.663070962531492e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4554, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 67.28125143051147, "completions/min_length": 20.875, "epoch": 9.05559692231323, "grad_norm": 0.0028327416967405777, "kl": 0.063201904296875, "learning_rate": 2.2288937559790445e-08, "loss": 6.311328615993261e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4555, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 70.1666693687439, "completions/min_length": 25.125, "epoch": 9.057582526681559, "grad_norm": 0.002684138567880789, "kl": 0.0650634765625, "learning_rate": 2.219588805672551e-08, "loss": 6.499581650132313e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4556, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 63.78125190734863, "completions/min_length": 22.25, "epoch": 9.059568131049888, "grad_norm": 0.004482321213332049, "kl": 0.0640869140625, "learning_rate": 2.2103028777683162e-08, "loss": 6.410179048543796e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4557, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.375, "completions/mean_length": 63.13541793823242, "completions/min_length": 20.75, "epoch": 9.061553735418219, "grad_norm": 0.00469748423120213, "kl": 0.073455810546875, "learning_rate": 2.201035975963278e-08, "loss": 7.346504571614787e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4558, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.0, "completions/mean_length": 79.40625238418579, "completions/min_length": 26.25, "epoch": 9.063539339786548, "grad_norm": 0.0025776055027771166, "kl": 0.064300537109375, "learning_rate": 2.1917881039467735e-08, "loss": 6.438494892790914e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4559, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/mean_length": 60.41666889190674, "completions/min_length": 25.875, "epoch": 9.065524944154877, "grad_norm": 0.003989193192029717, "kl": 0.064361572265625, "learning_rate": 2.182559265400591e-08, "loss": 6.436978583224118e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4560, "train_speed(iter/s)": 0.022617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 69.82291889190674, "completions/min_length": 21.5, "epoch": 9.067510548523206, "grad_norm": 0.004735728389694131, "kl": 0.060546875, "learning_rate": 2.1733494639989136e-08, "loss": 6.0589329223148525e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4561, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.375, "completions/mean_length": 72.40625286102295, "completions/min_length": 27.25, "epoch": 9.069496152891537, "grad_norm": 1.4864792675427287, "kl": 0.07177734375, "learning_rate": 2.1641587034083752e-08, "loss": -0.019910480827093124, "memory(GiB)": 94.21, "reward": 1.8854166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4562, "train_speed(iter/s)": 0.022616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 68.67708492279053, "completions/min_length": 22.875, "epoch": 9.071481757259866, "grad_norm": 0.003481732394948342, "kl": 0.062957763671875, "learning_rate": 2.15498698728801e-08, "loss": 6.299871893133968e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4563, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 64.96875238418579, "completions/min_length": 27.625, "epoch": 9.073467361628195, "grad_norm": 0.005030429797118001, "kl": 0.056182861328125, "learning_rate": 2.145834319289258e-08, "loss": 5.615845293505117e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4564, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 68.15625190734863, "completions/min_length": 22.625, "epoch": 9.075452965996526, "grad_norm": 0.0037198045930339624, "kl": 0.06719970703125, "learning_rate": 2.136700703056016e-08, "loss": 6.719467637594789e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4565, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 70.39583492279053, "completions/min_length": 24.875, "epoch": 9.077438570364855, "grad_norm": 0.002239190889134339, "kl": 0.066680908203125, "learning_rate": 2.1275861422245524e-08, "loss": 6.668046262348071e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4566, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 63.708335876464844, "completions/min_length": 21.125, "epoch": 9.079424174733184, "grad_norm": 0.008713956080790999, "kl": 0.058929443359375, "learning_rate": 2.1184906404235814e-08, "loss": 5.887925726710819e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4567, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.0, "completions/mean_length": 74.53125238418579, "completions/min_length": 22.875, "epoch": 9.081409779101515, "grad_norm": 0.0045706902185289715, "kl": 0.075164794921875, "learning_rate": 2.1094142012742177e-08, "loss": 7.511264266213402e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4568, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 74.54166841506958, "completions/min_length": 25.25, "epoch": 9.083395383469844, "grad_norm": 0.00254277419603636, "kl": 0.057098388671875, "learning_rate": 2.1003568283899707e-08, "loss": 5.7095250667771325e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4569, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 74.84375190734863, "completions/min_length": 22.875, "epoch": 9.085380987838173, "grad_norm": 0.002910854512896116, "kl": 0.073883056640625, "learning_rate": 2.0913185253767894e-08, "loss": 7.384567288681865e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4570, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.625, "completions/mean_length": 76.58333587646484, "completions/min_length": 25.625, "epoch": 9.087366592206504, "grad_norm": 0.0032179189228860507, "kl": 0.069671630859375, "learning_rate": 2.082299295833001e-08, "loss": 6.966745422687382e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4571, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.125, "completions/mean_length": 67.70833587646484, "completions/min_length": 21.875, "epoch": 9.089352196574833, "grad_norm": 0.0028057939434665505, "kl": 0.055572509765625, "learning_rate": 2.0732991433493607e-08, "loss": 5.556229007197544e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4572, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 64.96875143051147, "completions/min_length": 19.125, "epoch": 9.091337800943162, "grad_norm": 0.0029340588696122193, "kl": 0.052886962890625, "learning_rate": 2.0643180715090304e-08, "loss": 5.2954317652620375e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4573, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.125, "completions/mean_length": 81.68750286102295, "completions/min_length": 32.375, "epoch": 9.09332340531149, "grad_norm": 0.0027812154371746247, "kl": 0.07904052734375, "learning_rate": 2.0553560838875496e-08, "loss": 7.895859016571194e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4574, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.125, "completions/mean_length": 69.16666889190674, "completions/min_length": 23.375, "epoch": 9.095309009679822, "grad_norm": 0.005964695224091516, "kl": 0.0714111328125, "learning_rate": 2.0464131840528974e-08, "loss": 7.138065120670944e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4575, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 72.30208539962769, "completions/min_length": 24.375, "epoch": 9.09729461404815, "grad_norm": 0.0029827520822214813, "kl": 0.05621337890625, "learning_rate": 2.0374893755654154e-08, "loss": 5.621215677820146e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4576, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 63.739585876464844, "completions/min_length": 24.75, "epoch": 9.09928021841648, "grad_norm": 0.0024626586627420613, "kl": 0.07623291015625, "learning_rate": 2.0285846619778667e-08, "loss": 7.622718840138987e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4577, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 81.55208587646484, "completions/min_length": 27.625, "epoch": 9.10126582278481, "grad_norm": 0.005056008869765523, "kl": 0.0562744140625, "learning_rate": 2.0196990468354102e-08, "loss": 5.622143726213835e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4578, "train_speed(iter/s)": 0.022613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.25, "completions/mean_length": 75.55208444595337, "completions/min_length": 25.5, "epoch": 9.10325142715314, "grad_norm": 0.9418934316246953, "kl": 0.0668792724609375, "learning_rate": 2.0108325336755992e-08, "loss": 0.0036358728539198637, "memory(GiB)": 94.21, "reward": 1.7916666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.11393529921770096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4579, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.0, "completions/mean_length": 74.58333587646484, "completions/min_length": 31.5, "epoch": 9.105237031521469, "grad_norm": 0.0033061453495803967, "kl": 0.06890869140625, "learning_rate": 2.001985126028377e-08, "loss": 6.883487367304042e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4580, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 63.343750953674316, "completions/min_length": 24.625, "epoch": 9.1072226358898, "grad_norm": 0.00383220039253361, "kl": 0.0650634765625, "learning_rate": 1.993156827416098e-08, "loss": 6.502013275166973e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4581, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.625, "completions/mean_length": 62.47916841506958, "completions/min_length": 21.25, "epoch": 9.109208240258129, "grad_norm": 0.0028641601289803938, "kl": 0.06146240234375, "learning_rate": 1.9843476413534843e-08, "loss": 6.148928514448926e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4582, "train_speed(iter/s)": 0.022613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.125, "completions/mean_length": 84.06250286102295, "completions/min_length": 34.0, "epoch": 9.111193844626458, "grad_norm": 0.0027977702187432056, "kl": 0.066436767578125, "learning_rate": 1.9755575713476692e-08, "loss": 6.640343053732067e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4583, "train_speed(iter/s)": 0.022613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 73.78125238418579, "completions/min_length": 26.0, "epoch": 9.113179448994789, "grad_norm": 0.0044494915823661335, "kl": 0.0797119140625, "learning_rate": 1.966786620898164e-08, "loss": 7.963976531755179e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4584, "train_speed(iter/s)": 0.022613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.125, "completions/mean_length": 66.8854193687439, "completions/min_length": 23.5, "epoch": 9.115165053363118, "grad_norm": 0.0027245810923055007, "kl": 0.0587615966796875, "learning_rate": 1.9580347934968545e-08, "loss": 5.874328053323552e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4585, "train_speed(iter/s)": 0.022613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 68.93750143051147, "completions/min_length": 32.375, "epoch": 9.117150657731447, "grad_norm": 0.00782548537045295, "kl": 0.09356689453125, "learning_rate": 1.9493020926280633e-08, "loss": 9.359908290207386e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4586, "train_speed(iter/s)": 0.022613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 73.15625190734863, "completions/min_length": 30.25, "epoch": 9.119136262099776, "grad_norm": 0.0026276625630214006, "kl": 0.0736083984375, "learning_rate": 1.940588521768449e-08, "loss": 7.360795279964805e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4587, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 66.91666793823242, "completions/min_length": 22.375, "epoch": 9.121121866468107, "grad_norm": 0.0025107384852920897, "kl": 0.053192138671875, "learning_rate": 1.931894084387059e-08, "loss": 5.319520641933195e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4588, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 65.88541889190674, "completions/min_length": 19.5, "epoch": 9.123107470836436, "grad_norm": 0.003262802645193201, "kl": 0.067138671875, "learning_rate": 1.9232187839453518e-08, "loss": 6.71864181640558e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4589, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 65.87500286102295, "completions/min_length": 20.25, "epoch": 9.125093075204765, "grad_norm": 0.002502658612662069, "kl": 0.0660400390625, "learning_rate": 1.914562623897137e-08, "loss": 6.607950490433723e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4590, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 66.31250190734863, "completions/min_length": 20.25, "epoch": 9.127078679573096, "grad_norm": 0.003643917542610691, "kl": 0.0711669921875, "learning_rate": 1.90592560768863e-08, "loss": 7.114700565580279e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4591, "train_speed(iter/s)": 0.022615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 66.62500047683716, "completions/min_length": 20.875, "epoch": 9.129064283941425, "grad_norm": 0.004700313693096275, "kl": 0.061798095703125, "learning_rate": 1.8973077387583968e-08, "loss": 6.183073128340766e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4592, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/mean_length": 58.68750190734863, "completions/min_length": 20.5, "epoch": 9.131049888309754, "grad_norm": 0.0035000341850366266, "kl": 0.063629150390625, "learning_rate": 1.8887090205374045e-08, "loss": 6.364900036714971e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4593, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.375, "completions/mean_length": 69.04166841506958, "completions/min_length": 24.5, "epoch": 9.133035492678085, "grad_norm": 0.0025512166587473156, "kl": 0.06805419921875, "learning_rate": 1.8801294564489922e-08, "loss": 6.807439785916358e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4594, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.125, "completions/mean_length": 63.81250190734863, "completions/min_length": 23.125, "epoch": 9.135021097046414, "grad_norm": 0.002736071233659325, "kl": 0.056488037109375, "learning_rate": 1.8715690499088555e-08, "loss": 5.649589002132416e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4595, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.875, "completions/mean_length": 67.73958492279053, "completions/min_length": 24.75, "epoch": 9.137006701414743, "grad_norm": 0.7635883136435511, "kl": 0.08392333984375, "learning_rate": 1.8630278043250734e-08, "loss": 0.008837152272462845, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 0.9895833358168602, "rewards/Format/std": 0.03608439117670059, "step": 4596, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.25, "completions/mean_length": 77.68750238418579, "completions/min_length": 29.875, "epoch": 9.138992305783074, "grad_norm": 0.006265500595996801, "kl": 0.07061767578125, "learning_rate": 1.8545057230981153e-08, "loss": 7.076094334479421e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4597, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 79.59375286102295, "completions/min_length": 25.25, "epoch": 9.140977910151403, "grad_norm": 0.0044094143422299245, "kl": 0.06573486328125, "learning_rate": 1.8460028096207835e-08, "loss": 6.579120235983282e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4598, "train_speed(iter/s)": 0.022613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 78.91666889190674, "completions/min_length": 27.875, "epoch": 9.142963514519732, "grad_norm": 0.0033495335612856187, "kl": 0.06658935546875, "learning_rate": 1.8375190672782757e-08, "loss": 6.65129191474989e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4599, "train_speed(iter/s)": 0.022613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.875, "completions/mean_length": 73.14583539962769, "completions/min_length": 28.875, "epoch": 9.14494911888806, "grad_norm": 0.004946087528936944, "kl": 0.056060791015625, "learning_rate": 1.829054499448163e-08, "loss": 5.602114833891392e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4600, "train_speed(iter/s)": 0.022614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.625, "completions/mean_length": 77.22916793823242, "completions/min_length": 25.5, "epoch": 9.146934723256392, "grad_norm": 0.004219057498605359, "kl": 0.06793212890625, "learning_rate": 1.820609109500354e-08, "loss": 6.788845348637551e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4601, "train_speed(iter/s)": 0.022613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 67.96875095367432, "completions/min_length": 20.875, "epoch": 9.14892032762472, "grad_norm": 0.005520091075769352, "kl": 0.065032958984375, "learning_rate": 1.8121829007971546e-08, "loss": 6.501689495053142e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4602, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 72.22916841506958, "completions/min_length": 26.5, "epoch": 9.15090593199305, "grad_norm": 0.028207177369508406, "kl": 0.10748291015625, "learning_rate": 1.8037758766932143e-08, "loss": 0.00010735404066508636, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4603, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.625, "completions/mean_length": 67.43750190734863, "completions/min_length": 24.5, "epoch": 9.15289153636138, "grad_norm": 0.0024923026180727827, "kl": 0.05438232421875, "learning_rate": 1.7953880405355337e-08, "loss": 5.4392890888266265e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4604, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.125, "completions/mean_length": 65.98958683013916, "completions/min_length": 21.125, "epoch": 9.15487714072971, "grad_norm": 1.6541820062026134, "kl": 0.073333740234375, "learning_rate": 1.7870193956635082e-08, "loss": -0.006934754084795713, "memory(GiB)": 94.21, "reward": 1.7395833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.29720086604356766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4605, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 68.93750190734863, "completions/min_length": 20.5, "epoch": 9.156862745098039, "grad_norm": 0.007253816748055403, "kl": 0.08563232421875, "learning_rate": 1.7786699454088562e-08, "loss": 8.563135634176433e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4606, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.375, "completions/mean_length": 71.47916841506958, "completions/min_length": 21.75, "epoch": 9.15884834946637, "grad_norm": 0.003069411617542107, "kl": 0.095855712890625, "learning_rate": 1.7703396930956906e-08, "loss": 9.584397776052356e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4607, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.125, "completions/mean_length": 71.35416841506958, "completions/min_length": 23.375, "epoch": 9.160833953834699, "grad_norm": 0.0035596101484501263, "kl": 0.07928466796875, "learning_rate": 1.7620286420404528e-08, "loss": 7.924398960312828e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4608, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.625, "completions/mean_length": 67.04166841506958, "completions/min_length": 22.5, "epoch": 9.162819558203028, "grad_norm": 0.0026477269280414115, "kl": 0.065948486328125, "learning_rate": 1.7537367955519522e-08, "loss": 6.592989666387439e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4609, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.875, "completions/mean_length": 70.78125143051147, "completions/min_length": 18.0, "epoch": 9.164805162571358, "grad_norm": 0.003292495796972032, "kl": 0.069549560546875, "learning_rate": 1.7454641569313533e-08, "loss": 6.944018241483718e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4610, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 71.68750238418579, "completions/min_length": 25.75, "epoch": 9.166790766939688, "grad_norm": 0.002757706026183243, "kl": 0.06024169921875, "learning_rate": 1.7372107294721606e-08, "loss": 6.030036456650123e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4611, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.125, "completions/mean_length": 72.35416746139526, "completions/min_length": 26.5, "epoch": 9.168776371308017, "grad_norm": 0.0034196619416398152, "kl": 0.064605712890625, "learning_rate": 1.728976516460251e-08, "loss": 6.461337034124881e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4612, "train_speed(iter/s)": 0.022612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.625, "completions/mean_length": 66.46875143051147, "completions/min_length": 23.375, "epoch": 9.170761975676346, "grad_norm": 0.0035347421915786874, "kl": 0.054351806640625, "learning_rate": 1.7207615211738302e-08, "loss": 5.439599044620991e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4613, "train_speed(iter/s)": 0.022611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 66.34375190734863, "completions/min_length": 23.125, "epoch": 9.172747580044676, "grad_norm": 0.0024673838866274005, "kl": 0.054290771484375, "learning_rate": 1.7125657468834653e-08, "loss": 5.4242696933215484e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4614, "train_speed(iter/s)": 0.022611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.75, "completions/mean_length": 72.77083539962769, "completions/min_length": 24.875, "epoch": 9.174733184413006, "grad_norm": 0.005636467698132973, "kl": 0.06390380859375, "learning_rate": 1.704389196852085e-08, "loss": 6.389366171788424e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4615, "train_speed(iter/s)": 0.022611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 71.35416841506958, "completions/min_length": 22.25, "epoch": 9.176718788781335, "grad_norm": 1.2825463305934373, "kl": 0.109039306640625, "learning_rate": 1.69623187433493e-08, "loss": 0.012987978756427765, "memory(GiB)": 94.21, "reward": 1.9791666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.9791666716337204, "rewards/CineAccuracyORM/std": 0.04865618050098419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4616, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 63.94791865348816, "completions/min_length": 21.875, "epoch": 9.178704393149665, "grad_norm": 0.8109076823176605, "kl": 0.06829833984375, "learning_rate": 1.688093782579608e-08, "loss": -0.005175780039280653, "memory(GiB)": 94.21, "reward": 1.7604166716337204, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1296451985836029, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4617, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.875, "completions/mean_length": 87.63541889190674, "completions/min_length": 29.0, "epoch": 9.180689997517995, "grad_norm": 1.77821381117017, "kl": 0.08428955078125, "learning_rate": 1.679974924826072e-08, "loss": 0.00458592688664794, "memory(GiB)": 94.21, "reward": 1.6354166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6354166669771075, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4618, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.125, "completions/mean_length": 73.16666889190674, "completions/min_length": 31.625, "epoch": 9.182675601886324, "grad_norm": 0.0035441475216441733, "kl": 0.056671142578125, "learning_rate": 1.671875304306608e-08, "loss": 5.667291407007724e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4619, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.375, "completions/mean_length": 75.39583492279053, "completions/min_length": 23.25, "epoch": 9.184661206254654, "grad_norm": 0.002458063614154341, "kl": 0.0523681640625, "learning_rate": 1.6637949242458483e-08, "loss": 5.237808363744989e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4620, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 68.81250238418579, "completions/min_length": 21.0, "epoch": 9.186646810622983, "grad_norm": 0.0030955618031601397, "kl": 0.06298828125, "learning_rate": 1.655733787860769e-08, "loss": 6.294525519479066e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4621, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 70.37500190734863, "completions/min_length": 22.375, "epoch": 9.188632414991313, "grad_norm": 0.002781929755595527, "kl": 0.05462646484375, "learning_rate": 1.647691898360676e-08, "loss": 5.4624215408694e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4622, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 67.44791793823242, "completions/min_length": 20.25, "epoch": 9.190618019359643, "grad_norm": 0.09012914312666065, "kl": 0.14984130859375, "learning_rate": 1.6396692589472293e-08, "loss": 0.00015010683273430914, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4623, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 69.50000286102295, "completions/min_length": 28.5, "epoch": 9.192603623727972, "grad_norm": 0.0028628400578205614, "kl": 0.066558837890625, "learning_rate": 1.6316658728143972e-08, "loss": 6.65376428514719e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4624, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 69.34375143051147, "completions/min_length": 23.0, "epoch": 9.194589228096302, "grad_norm": 0.004908068458422123, "kl": 0.062835693359375, "learning_rate": 1.623681743148503e-08, "loss": 6.280232628341764e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4625, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 70.48958492279053, "completions/min_length": 21.125, "epoch": 9.19657483246463, "grad_norm": 0.003296981886737747, "kl": 0.056427001953125, "learning_rate": 1.6157168731282035e-08, "loss": 5.6483047956135124e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4626, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.625, "completions/mean_length": 69.97916889190674, "completions/min_length": 22.375, "epoch": 9.198560436832961, "grad_norm": 0.0037947461749276686, "kl": 0.062896728515625, "learning_rate": 1.607771265924479e-08, "loss": 6.281337846303359e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4627, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 61.48958444595337, "completions/min_length": 20.0, "epoch": 9.20054604120129, "grad_norm": 1.800353267363252, "kl": 0.06561279296875, "learning_rate": 1.599844924700644e-08, "loss": -0.007479371037334204, "memory(GiB)": 94.21, "reward": 1.9895833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.9895833358168602, "rewards/CineAccuracyORM/std": 0.03608439117670059, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4628, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.25, "completions/mean_length": 71.85416841506958, "completions/min_length": 22.0, "epoch": 9.20253164556962, "grad_norm": 0.0030381041800342404, "kl": 0.05810546875, "learning_rate": 1.5919378526123573e-08, "loss": 5.815225813421421e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4629, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.375, "completions/mean_length": 69.53125190734863, "completions/min_length": 28.875, "epoch": 9.20451724993795, "grad_norm": 0.004775111908317754, "kl": 0.092376708984375, "learning_rate": 1.5840500528075728e-08, "loss": 9.240680810762569e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4630, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 70.88541793823242, "completions/min_length": 22.875, "epoch": 9.20650285430628, "grad_norm": 0.004090347353094927, "kl": 0.086456298828125, "learning_rate": 1.5761815284266123e-08, "loss": 8.648833318147808e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4631, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.875, "completions/mean_length": 66.4791693687439, "completions/min_length": 21.875, "epoch": 9.208488458674609, "grad_norm": 0.005509423466539693, "kl": 0.064788818359375, "learning_rate": 1.5683322826020974e-08, "loss": 6.483864854089916e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4632, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.375, "completions/mean_length": 64.41666841506958, "completions/min_length": 22.5, "epoch": 9.21047406304294, "grad_norm": 0.005990543129378365, "kl": 0.0611572265625, "learning_rate": 1.5605023184589616e-08, "loss": 6.118452438386157e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4633, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.25, "completions/mean_length": 72.30208444595337, "completions/min_length": 24.875, "epoch": 9.212459667411268, "grad_norm": 0.0028097457505736594, "kl": 0.060455322265625, "learning_rate": 1.5526916391145062e-08, "loss": 6.036656486685388e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4634, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.125, "completions/mean_length": 57.86458444595337, "completions/min_length": 20.125, "epoch": 9.214445271779597, "grad_norm": 0.0028717730378125853, "kl": 0.073699951171875, "learning_rate": 1.544900247678321e-08, "loss": 7.37295049475506e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4635, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 73.97916841506958, "completions/min_length": 18.875, "epoch": 9.216430876147928, "grad_norm": 0.004402854505414873, "kl": 0.073516845703125, "learning_rate": 1.5371281472523202e-08, "loss": 7.35412904759869e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4636, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.75, "completions/mean_length": 73.48958492279053, "completions/min_length": 28.625, "epoch": 9.218416480516257, "grad_norm": 0.0044671089767675245, "kl": 0.06768798828125, "learning_rate": 1.5293753409307554e-08, "loss": 6.768741150153801e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4637, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.375, "completions/mean_length": 63.489585399627686, "completions/min_length": 19.5, "epoch": 9.220402084884586, "grad_norm": 0.00530022185195528, "kl": 0.097259521484375, "learning_rate": 1.5216418318001635e-08, "loss": 9.731283353175968e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4638, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 76.37500190734863, "completions/min_length": 29.0, "epoch": 9.222387689252916, "grad_norm": 2.4100379078859566, "kl": 0.078887939453125, "learning_rate": 1.5139276229394425e-08, "loss": -0.01215621642768383, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.2523707337677479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4639, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.25, "completions/mean_length": 63.43750238418579, "completions/min_length": 26.75, "epoch": 9.224373293621246, "grad_norm": 0.0028845302873262706, "kl": 0.06353759765625, "learning_rate": 1.506232717419764e-08, "loss": 6.358255632221699e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4640, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 70.15625190734863, "completions/min_length": 21.5, "epoch": 9.226358897989575, "grad_norm": 0.004400338843049586, "kl": 0.061492919921875, "learning_rate": 1.4985571183046498e-08, "loss": 6.15089520579204e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4641, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 68.4479193687439, "completions/min_length": 25.0, "epoch": 9.228344502357904, "grad_norm": 0.00286768406648142, "kl": 0.094024658203125, "learning_rate": 1.4909008286499104e-08, "loss": 9.388741455040872e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4642, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 69.89583587646484, "completions/min_length": 21.375, "epoch": 9.230330106726235, "grad_norm": 0.0025233465019816447, "kl": 0.05328369140625, "learning_rate": 1.4832638515036855e-08, "loss": 5.32776684849523e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4643, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.375, "completions/mean_length": 56.375001430511475, "completions/min_length": 20.875, "epoch": 9.232315711094564, "grad_norm": 0.0041635362582778356, "kl": 0.05438232421875, "learning_rate": 1.4756461899064098e-08, "loss": 5.441024404717609e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4644, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.125, "completions/mean_length": 61.968751430511475, "completions/min_length": 20.875, "epoch": 9.234301315462893, "grad_norm": 0.0033742331395539425, "kl": 0.0572509765625, "learning_rate": 1.468047846890852e-08, "loss": 5.7179709983756766e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4645, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 75.94791889190674, "completions/min_length": 25.5, "epoch": 9.236286919831224, "grad_norm": 0.002747866799211444, "kl": 0.072021484375, "learning_rate": 1.460468825482053e-08, "loss": 7.202931010397151e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4646, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 64.62500095367432, "completions/min_length": 27.625, "epoch": 9.238272524199553, "grad_norm": 0.003328410056861108, "kl": 0.068939208984375, "learning_rate": 1.4529091286973993e-08, "loss": 6.895697151776403e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4647, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.875, "completions/mean_length": 72.48958587646484, "completions/min_length": 22.375, "epoch": 9.240258128567882, "grad_norm": 0.003374029888985708, "kl": 0.074249267578125, "learning_rate": 1.4453687595465668e-08, "loss": 7.420840120175853e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4648, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 71.89583587646484, "completions/min_length": 21.75, "epoch": 9.242243732936213, "grad_norm": 0.0026353953920421553, "kl": 0.058197021484375, "learning_rate": 1.4378477210315265e-08, "loss": 5.8207337133353576e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4649, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 71.62500238418579, "completions/min_length": 28.125, "epoch": 9.244229337304542, "grad_norm": 0.0036660295666054695, "kl": 0.069610595703125, "learning_rate": 1.4303460161465775e-08, "loss": 6.963525083847344e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4650, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.875, "completions/mean_length": 74.520836353302, "completions/min_length": 25.125, "epoch": 9.246214941672871, "grad_norm": 0.006198906693427003, "kl": 0.083587646484375, "learning_rate": 1.4228636478783029e-08, "loss": 8.350690768565983e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4651, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.625, "completions/mean_length": 76.56250238418579, "completions/min_length": 26.0, "epoch": 9.2482005460412, "grad_norm": 0.004739991098556912, "kl": 0.07421875, "learning_rate": 1.4154006192055922e-08, "loss": 7.420215115416795e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4652, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 68.26041889190674, "completions/min_length": 24.125, "epoch": 9.250186150409531, "grad_norm": 0.0034464099780213927, "kl": 0.055450439453125, "learning_rate": 1.407956933099641e-08, "loss": 5.5444335885113105e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4653, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 59.781251430511475, "completions/min_length": 20.625, "epoch": 9.25217175477786, "grad_norm": 0.005455917102142847, "kl": 0.05889892578125, "learning_rate": 1.4005325925239286e-08, "loss": 5.8897461713058874e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4654, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 76.25000190734863, "completions/min_length": 29.5, "epoch": 9.25415735914619, "grad_norm": 0.0023763978636056633, "kl": 0.060638427734375, "learning_rate": 1.3931276004342574e-08, "loss": 6.063852197257802e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4655, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.625, "completions/mean_length": 72.76041841506958, "completions/min_length": 25.5, "epoch": 9.25614296351452, "grad_norm": 0.0032395535386128657, "kl": 0.07879638671875, "learning_rate": 1.3857419597787134e-08, "loss": 7.876742165535688e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4656, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 74.94791889190674, "completions/min_length": 26.625, "epoch": 9.25812856788285, "grad_norm": 0.004504762660977818, "kl": 0.07012939453125, "learning_rate": 1.3783756734976616e-08, "loss": 7.008872489677742e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4657, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 76.39583587646484, "completions/min_length": 27.0, "epoch": 9.260114172251178, "grad_norm": 0.0028642774654113433, "kl": 0.07293701171875, "learning_rate": 1.3710287445237944e-08, "loss": 7.288622873602435e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4658, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.625, "completions/mean_length": 68.12500190734863, "completions/min_length": 23.625, "epoch": 9.26209977661951, "grad_norm": 0.0026484065149032314, "kl": 0.0621337890625, "learning_rate": 1.3637011757820726e-08, "loss": 6.21078215772286e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4659, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.375, "completions/mean_length": 61.208335399627686, "completions/min_length": 23.625, "epoch": 9.264085380987838, "grad_norm": 0.005715638637676868, "kl": 0.062255859375, "learning_rate": 1.3563929701897626e-08, "loss": 6.226979894563556e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4660, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 68.01041746139526, "completions/min_length": 24.875, "epoch": 9.266070985356167, "grad_norm": 0.011858685130890681, "kl": 0.056884765625, "learning_rate": 1.3491041306564088e-08, "loss": 5.6855817092582583e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4661, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.125, "completions/mean_length": 67.82291889190674, "completions/min_length": 19.625, "epoch": 9.268056589724498, "grad_norm": 0.0038126816445703485, "kl": 0.066375732421875, "learning_rate": 1.341834660083857e-08, "loss": 6.646734982496127e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4662, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 66.01041889190674, "completions/min_length": 24.5, "epoch": 9.270042194092827, "grad_norm": 0.00291322523257202, "kl": 0.061370849609375, "learning_rate": 1.3345845613662476e-08, "loss": 6.139225297374651e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4663, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 66.87500190734863, "completions/min_length": 20.625, "epoch": 9.272027798461156, "grad_norm": 0.006682560022615333, "kl": 0.067626953125, "learning_rate": 1.3273538373899995e-08, "loss": 6.763551209587604e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4664, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.0, "completions/mean_length": 63.520835399627686, "completions/min_length": 25.25, "epoch": 9.274013402829485, "grad_norm": 0.004315624528626272, "kl": 0.06414794921875, "learning_rate": 1.3201424910337987e-08, "loss": 6.41354126855731e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4665, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.875, "completions/mean_length": 65.90625238418579, "completions/min_length": 20.75, "epoch": 9.275999007197816, "grad_norm": 0.002979036685849788, "kl": 0.064239501953125, "learning_rate": 1.31295052516866e-08, "loss": 6.42929517198354e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4666, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 68.46875190734863, "completions/min_length": 23.375, "epoch": 9.277984611566145, "grad_norm": 0.0030776501434818166, "kl": 0.064971923828125, "learning_rate": 1.305777942657843e-08, "loss": 6.501008465420455e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4667, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 68.9791693687439, "completions/min_length": 23.875, "epoch": 9.279970215934474, "grad_norm": 0.004760245503712212, "kl": 0.06268310546875, "learning_rate": 1.2986247463569134e-08, "loss": 6.27110421191901e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4668, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 71.87500238418579, "completions/min_length": 24.875, "epoch": 9.281955820302805, "grad_norm": 0.019004660454931054, "kl": 0.059356689453125, "learning_rate": 1.2914909391137097e-08, "loss": 5.9363570471759886e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4669, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 70.40625095367432, "completions/min_length": 34.0, "epoch": 9.283941424671134, "grad_norm": 1.658693067486625, "kl": 0.063323974609375, "learning_rate": 1.2843765237683547e-08, "loss": 0.0020861129742115736, "memory(GiB)": 94.21, "reward": 1.65625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.65625, "rewards/CineAccuracyORM/std": 0.3829289712011814, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4670, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.75, "completions/mean_length": 72.93750095367432, "completions/min_length": 26.125, "epoch": 9.285927029039463, "grad_norm": 0.004543723462675412, "kl": 0.0657958984375, "learning_rate": 1.2772815031532436e-08, "loss": 6.585180381080136e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4671, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 61.000000953674316, "completions/min_length": 23.125, "epoch": 9.287912633407794, "grad_norm": 0.0036002204952407684, "kl": 0.061431884765625, "learning_rate": 1.270205880093067e-08, "loss": 6.150422268547118e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4672, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 63.37500190734863, "completions/min_length": 23.875, "epoch": 9.289898237776123, "grad_norm": 0.004161064341715211, "kl": 0.075347900390625, "learning_rate": 1.2631496574047662e-08, "loss": 7.526666740886867e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4673, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 69.29166841506958, "completions/min_length": 28.625, "epoch": 9.291883842144452, "grad_norm": 2.1218900262433533, "kl": 0.0576171875, "learning_rate": 1.2561128378975827e-08, "loss": 0.013383294455707073, "memory(GiB)": 94.21, "reward": 1.9270833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.9270833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4674, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 62.60416841506958, "completions/min_length": 23.0, "epoch": 9.293869446512783, "grad_norm": 0.004089983648510965, "kl": 0.053009033203125, "learning_rate": 1.2490954243730256e-08, "loss": 5.3028357797302306e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4675, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.375, "completions/mean_length": 67.07291746139526, "completions/min_length": 20.25, "epoch": 9.295855050881112, "grad_norm": 0.004408340773121167, "kl": 0.073699951171875, "learning_rate": 1.2420974196248712e-08, "loss": 7.364851626334712e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4676, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 70.47916841506958, "completions/min_length": 19.125, "epoch": 9.297840655249441, "grad_norm": 0.0027681584412542403, "kl": 0.061981201171875, "learning_rate": 1.2351188264391855e-08, "loss": 6.200503412401304e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4677, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 73.92708444595337, "completions/min_length": 21.75, "epoch": 9.29982625961777, "grad_norm": 0.0022999499715569303, "kl": 0.063201904296875, "learning_rate": 1.2281596475942791e-08, "loss": 6.317153747659177e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4678, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.375, "completions/mean_length": 66.06250238418579, "completions/min_length": 25.125, "epoch": 9.301811863986101, "grad_norm": 0.002809121300505957, "kl": 0.057891845703125, "learning_rate": 1.2212198858607691e-08, "loss": 5.780795618193224e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4679, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.625, "completions/mean_length": 77.85416889190674, "completions/min_length": 27.0, "epoch": 9.30379746835443, "grad_norm": 0.006530546420004666, "kl": 0.11407470703125, "learning_rate": 1.2142995440015125e-08, "loss": 0.00011410249135224149, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4680, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.375, "completions/mean_length": 64.07291841506958, "completions/min_length": 21.125, "epoch": 9.30578307272276, "grad_norm": 0.0026898165411776745, "kl": 0.05908203125, "learning_rate": 1.2073986247716383e-08, "loss": 5.902966222492978e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4681, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.875, "completions/mean_length": 68.34375190734863, "completions/min_length": 26.375, "epoch": 9.30776867709109, "grad_norm": 0.005286213403348124, "kl": 0.09185791015625, "learning_rate": 1.2005171309185601e-08, "loss": 9.193408186547458e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4682, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 73.03125238418579, "completions/min_length": 26.5, "epoch": 9.30975428145942, "grad_norm": 0.0027852237749129358, "kl": 0.06634521484375, "learning_rate": 1.1936550651819478e-08, "loss": 6.622633372899145e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4683, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 76.44791984558105, "completions/min_length": 27.0, "epoch": 9.311739885827748, "grad_norm": 0.003326787790127927, "kl": 0.068359375, "learning_rate": 1.186812430293732e-08, "loss": 6.83020189171657e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4684, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 69.42708539962769, "completions/min_length": 32.25, "epoch": 9.313725490196079, "grad_norm": 0.003682943475043186, "kl": 0.062713623046875, "learning_rate": 1.1799892289781122e-08, "loss": 6.274000043049455e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4685, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 56.37500238418579, "completions/min_length": 20.875, "epoch": 9.315711094564408, "grad_norm": 2.3259162376705733, "kl": 0.0579833984375, "learning_rate": 1.1731854639515537e-08, "loss": -0.00476849963888526, "memory(GiB)": 94.21, "reward": 1.8333333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.19210398569703102, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4686, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.75, "completions/mean_length": 71.57291746139526, "completions/min_length": 24.25, "epoch": 9.317696698932737, "grad_norm": 0.0030422988140760532, "kl": 0.055511474609375, "learning_rate": 1.1664011379227789e-08, "loss": 5.5543983762618154e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4687, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 66.04166841506958, "completions/min_length": 22.0, "epoch": 9.319682303301068, "grad_norm": 2.1199713266306306, "kl": 0.057281494140625, "learning_rate": 1.1596362535927718e-08, "loss": 5.731731653213501e-05, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4688, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.25, "completions/mean_length": 62.57291889190674, "completions/min_length": 21.25, "epoch": 9.321667907669397, "grad_norm": 0.004521773177240197, "kl": 0.055633544921875, "learning_rate": 1.1528908136547722e-08, "loss": 5.5581323977094144e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4689, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.375, "completions/mean_length": 60.614585399627686, "completions/min_length": 20.875, "epoch": 9.323653512037726, "grad_norm": 0.002860042648146483, "kl": 0.06781005859375, "learning_rate": 1.1461648207942986e-08, "loss": 6.774544453946874e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4690, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.375, "completions/mean_length": 85.19791889190674, "completions/min_length": 28.75, "epoch": 9.325639116406055, "grad_norm": 0.002962467463999657, "kl": 0.072509765625, "learning_rate": 1.1394582776890982e-08, "loss": 7.261200516950339e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4691, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 60.80208444595337, "completions/min_length": 22.25, "epoch": 9.327624720774386, "grad_norm": 0.0029881200130602494, "kl": 0.052825927734375, "learning_rate": 1.1327711870091961e-08, "loss": 5.276673982734792e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4692, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 73.84375286102295, "completions/min_length": 28.875, "epoch": 9.329610325142715, "grad_norm": 0.0025675677347992082, "kl": 0.063995361328125, "learning_rate": 1.1261035514168681e-08, "loss": 6.402004510164261e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4693, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 69.81250095367432, "completions/min_length": 26.25, "epoch": 9.331595929511044, "grad_norm": 0.012555309886568123, "kl": 0.08013916015625, "learning_rate": 1.1194553735666357e-08, "loss": 8.008559234440327e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4694, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.75, "completions/mean_length": 57.875001430511475, "completions/min_length": 21.375, "epoch": 9.333581533879375, "grad_norm": 0.006292320686275326, "kl": 0.065582275390625, "learning_rate": 1.1128266561052812e-08, "loss": 6.566046795342118e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4695, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 61.76041889190674, "completions/min_length": 25.125, "epoch": 9.335567138247704, "grad_norm": 0.006171530791326735, "kl": 0.069854736328125, "learning_rate": 1.1062174016718385e-08, "loss": 6.988817767705768e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4696, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.875, "completions/mean_length": 69.70833539962769, "completions/min_length": 18.75, "epoch": 9.337552742616033, "grad_norm": 0.0025086037698560403, "kl": 0.0615234375, "learning_rate": 1.0996276128975968e-08, "loss": 6.139362812973559e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4697, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.375, "completions/mean_length": 76.54166889190674, "completions/min_length": 25.375, "epoch": 9.339538346984364, "grad_norm": 0.02737432467249086, "kl": 0.077301025390625, "learning_rate": 1.0930572924060965e-08, "loss": 7.734097016509622e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4698, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.625, "completions/mean_length": 63.468751430511475, "completions/min_length": 21.625, "epoch": 9.341523951352693, "grad_norm": 0.006241313812205991, "kl": 0.05694580078125, "learning_rate": 1.0865064428131121e-08, "loss": 5.693789717042819e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4699, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.125, "completions/mean_length": 61.91666793823242, "completions/min_length": 19.625, "epoch": 9.343509555721022, "grad_norm": 0.0036902125402077783, "kl": 0.0611572265625, "learning_rate": 1.0799750667266793e-08, "loss": 6.114657298894599e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4700, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.75, "completions/mean_length": 77.114586353302, "completions/min_length": 29.5, "epoch": 9.345495160089353, "grad_norm": 0.0038632889387378755, "kl": 0.061309814453125, "learning_rate": 1.0734631667470851e-08, "loss": 6.125131767475978e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4701, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 68.614586353302, "completions/min_length": 24.25, "epoch": 9.347480764457682, "grad_norm": 0.010004792351162892, "kl": 0.075225830078125, "learning_rate": 1.0669707454668386e-08, "loss": 7.525092223659158e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4702, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 72.66666889190674, "completions/min_length": 23.375, "epoch": 9.349466368826011, "grad_norm": 0.00234345875696059, "kl": 0.0521240234375, "learning_rate": 1.0604978054707336e-08, "loss": 5.212753239902668e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4703, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.375, "completions/mean_length": 72.03125238418579, "completions/min_length": 19.5, "epoch": 9.35145197319434, "grad_norm": 0.005823573025601861, "kl": 0.063385009765625, "learning_rate": 1.0540443493357809e-08, "loss": 6.330433097900823e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4704, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.375, "completions/mean_length": 57.218751430511475, "completions/min_length": 21.125, "epoch": 9.353437577562671, "grad_norm": 0.004899766590165031, "kl": 0.077667236328125, "learning_rate": 1.0476103796312252e-08, "loss": 7.764453766867518e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4705, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.125, "completions/mean_length": 73.44791841506958, "completions/min_length": 26.0, "epoch": 9.355423181931, "grad_norm": 0.0041840631502931555, "kl": 0.05816650390625, "learning_rate": 1.0411958989185787e-08, "loss": 5.819627403980121e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4706, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 61.34375190734863, "completions/min_length": 19.25, "epoch": 9.35740878629933, "grad_norm": 0.01497782313239926, "kl": 0.0701904296875, "learning_rate": 1.0348009097515764e-08, "loss": 7.023427315289155e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4707, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 60.88541865348816, "completions/min_length": 25.0, "epoch": 9.35939439066766, "grad_norm": 0.004822248479687538, "kl": 0.057281494140625, "learning_rate": 1.0284254146762095e-08, "loss": 5.732703721150756e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4708, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 63.864586353302, "completions/min_length": 20.625, "epoch": 9.361379995035989, "grad_norm": 0.003641523648466179, "kl": 0.069671630859375, "learning_rate": 1.0220694162306919e-08, "loss": 6.972481787670404e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4709, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 72.84375190734863, "completions/min_length": 30.625, "epoch": 9.363365599404318, "grad_norm": 0.003934393481644837, "kl": 0.061309814453125, "learning_rate": 1.0157329169454831e-08, "loss": 6.131265399744734e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4710, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.625, "completions/mean_length": 65.489586353302, "completions/min_length": 19.375, "epoch": 9.365351203772649, "grad_norm": 0.0027480278037472407, "kl": 0.056884765625, "learning_rate": 1.0094159193432817e-08, "loss": 5.6855002185329795e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4711, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 62.937501430511475, "completions/min_length": 24.5, "epoch": 9.367336808140978, "grad_norm": 0.005520510805968863, "kl": 0.06121826171875, "learning_rate": 1.0031184259390201e-08, "loss": 6.12553849350661e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4712, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.625, "completions/mean_length": 68.89583492279053, "completions/min_length": 25.0, "epoch": 9.369322412509307, "grad_norm": 0.003988632374351008, "kl": 0.076995849609375, "learning_rate": 9.968404392398545e-09, "loss": 7.705154712311924e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4713, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.625, "completions/mean_length": 65.03125143051147, "completions/min_length": 22.0, "epoch": 9.371308016877638, "grad_norm": 0.0042690457094859075, "kl": 0.063934326171875, "learning_rate": 9.905819617452015e-09, "loss": 6.390989437932149e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4714, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 70.98958539962769, "completions/min_length": 29.5, "epoch": 9.373293621245967, "grad_norm": 0.018463913800104363, "kl": 0.087982177734375, "learning_rate": 9.843429959466797e-09, "loss": 8.794294262770563e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4715, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 68.28125190734863, "completions/min_length": 23.0, "epoch": 9.375279225614296, "grad_norm": 0.0023519606164086864, "kl": 0.0545654296875, "learning_rate": 9.781235443281632e-09, "loss": 5.451842298498377e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4716, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 69.01041793823242, "completions/min_length": 26.625, "epoch": 9.377264829982625, "grad_norm": 0.002444943005011755, "kl": 0.070098876953125, "learning_rate": 9.71923609365749e-09, "loss": 7.008370448602363e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4717, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.375, "completions/mean_length": 69.18750238418579, "completions/min_length": 21.125, "epoch": 9.379250434350956, "grad_norm": 0.0033534467252879425, "kl": 0.07012939453125, "learning_rate": 9.657431935277627e-09, "loss": 7.007951353443787e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4718, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 72.58333468437195, "completions/min_length": 31.625, "epoch": 9.381236038719285, "grad_norm": 0.0025345169842983226, "kl": 0.06121826171875, "learning_rate": 9.595822992747582e-09, "loss": 6.116722943261266e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4719, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.25, "completions/mean_length": 74.47916746139526, "completions/min_length": 25.875, "epoch": 9.383221643087614, "grad_norm": 0.0030452810210975035, "kl": 0.075775146484375, "learning_rate": 9.53440929059518e-09, "loss": 7.574297342216596e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4720, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.875, "completions/mean_length": 69.96875143051147, "completions/min_length": 29.875, "epoch": 9.385207247455945, "grad_norm": 0.004986557874511063, "kl": 0.06732177734375, "learning_rate": 9.47319085327053e-09, "loss": 6.73053291393444e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4721, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 61.67708444595337, "completions/min_length": 18.25, "epoch": 9.387192851824274, "grad_norm": 0.00542078759092515, "kl": 0.065216064453125, "learning_rate": 9.412167705146024e-09, "loss": 6.51925802230835e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4722, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.125, "completions/mean_length": 81.77083587646484, "completions/min_length": 29.5, "epoch": 9.389178456192603, "grad_norm": 0.004792953102784712, "kl": 0.087127685546875, "learning_rate": 9.351339870516229e-09, "loss": 8.715003787074238e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4723, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.25, "completions/mean_length": 70.01041841506958, "completions/min_length": 20.5, "epoch": 9.391164060560934, "grad_norm": 0.002398671907439135, "kl": 0.061004638671875, "learning_rate": 9.29070737359805e-09, "loss": 6.0993246734142303e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4724, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.625, "completions/mean_length": 62.364585399627686, "completions/min_length": 25.25, "epoch": 9.393149664929263, "grad_norm": 0.004064201325455677, "kl": 0.0501861572265625, "learning_rate": 9.230270238530625e-09, "loss": 5.020498065277934e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4725, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.875, "completions/mean_length": 65.00000238418579, "completions/min_length": 27.75, "epoch": 9.395135269297592, "grad_norm": 0.0030962776888813653, "kl": 0.067352294921875, "learning_rate": 9.170028489375147e-09, "loss": 6.734954513376579e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4726, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.125, "completions/mean_length": 67.42708492279053, "completions/min_length": 19.75, "epoch": 9.397120873665923, "grad_norm": 0.0028067142862925596, "kl": 0.05645751953125, "learning_rate": 9.10998215011527e-09, "loss": 5.645436976919882e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4727, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 70.15625286102295, "completions/min_length": 27.75, "epoch": 9.399106478034252, "grad_norm": 0.0038383717763806387, "kl": 0.06597900390625, "learning_rate": 9.050131244656644e-09, "loss": 6.601517816307023e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4728, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 65.64583492279053, "completions/min_length": 26.25, "epoch": 9.401092082402581, "grad_norm": 0.004151834635545548, "kl": 0.06622314453125, "learning_rate": 8.990475796827269e-09, "loss": 6.628276605624706e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4729, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.875, "completions/mean_length": 71.15625238418579, "completions/min_length": 19.625, "epoch": 9.40307768677091, "grad_norm": 0.007674940628711286, "kl": 0.07598876953125, "learning_rate": 8.931015830377142e-09, "loss": 7.603838457725942e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4730, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/mean_length": 64.61458539962769, "completions/min_length": 17.5, "epoch": 9.405063291139241, "grad_norm": 0.0026441533361155033, "kl": 0.051788330078125, "learning_rate": 8.871751368978553e-09, "loss": 5.176360355108045e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4731, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.375, "completions/mean_length": 74.46875238418579, "completions/min_length": 23.875, "epoch": 9.40704889550757, "grad_norm": 0.004012489639981499, "kl": 0.070587158203125, "learning_rate": 8.812682436226126e-09, "loss": 7.064524106681347e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4732, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.125, "completions/mean_length": 73.56250238418579, "completions/min_length": 17.375, "epoch": 9.409034499875899, "grad_norm": 0.004597349706150479, "kl": 0.062469482421875, "learning_rate": 8.753809055636274e-09, "loss": 6.2429258832708e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4733, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 75.16666793823242, "completions/min_length": 18.75, "epoch": 9.41102010424423, "grad_norm": 0.0034495975398877387, "kl": 0.058807373046875, "learning_rate": 8.695131250647802e-09, "loss": 5.885846621822566e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4734, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 69.05208587646484, "completions/min_length": 24.125, "epoch": 9.413005708612559, "grad_norm": 0.0029705571962952175, "kl": 0.064056396484375, "learning_rate": 8.636649044621635e-09, "loss": 6.407409091480076e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4735, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 69.70833492279053, "completions/min_length": 22.75, "epoch": 9.414991312980888, "grad_norm": 0.004414790572474074, "kl": 0.07684326171875, "learning_rate": 8.578362460840705e-09, "loss": 7.691128848819062e-05, "memory(GiB)": 94.21, "reward": 1.5625, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5625, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4736, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 72.40625190734863, "completions/min_length": 22.875, "epoch": 9.416976917349219, "grad_norm": 0.0034457476140950876, "kl": 0.05279541015625, "learning_rate": 8.520271522510225e-09, "loss": 5.2802231948589906e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4737, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.875, "completions/mean_length": 61.35416841506958, "completions/min_length": 18.5, "epoch": 9.418962521717548, "grad_norm": 0.0030778779039292072, "kl": 0.048736572265625, "learning_rate": 8.462376252757474e-09, "loss": 4.8792069719638675e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4738, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.875, "completions/mean_length": 73.56250143051147, "completions/min_length": 27.625, "epoch": 9.420948126085877, "grad_norm": 0.0035116356370842126, "kl": 0.069671630859375, "learning_rate": 8.404676674631684e-09, "loss": 6.960570317460224e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4739, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.125, "completions/mean_length": 67.29166841506958, "completions/min_length": 21.0, "epoch": 9.422933730454208, "grad_norm": 0.0044157598974075216, "kl": 0.05859375, "learning_rate": 8.347172811104364e-09, "loss": 5.867490472155623e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4740, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.875, "completions/mean_length": 71.88541889190674, "completions/min_length": 24.0, "epoch": 9.424919334822537, "grad_norm": 0.005147732832712355, "kl": 0.07659912109375, "learning_rate": 8.289864685069038e-09, "loss": 7.666927558602765e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4741, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 65.87500143051147, "completions/min_length": 24.25, "epoch": 9.426904939190866, "grad_norm": 0.0031724021113877414, "kl": 0.061981201171875, "learning_rate": 8.232752319341174e-09, "loss": 6.193173612700775e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4742, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.25, "completions/mean_length": 65.2604193687439, "completions/min_length": 20.875, "epoch": 9.428890543559195, "grad_norm": 0.9964460368223541, "kl": 0.06390380859375, "learning_rate": 8.175835736658587e-09, "loss": -0.008546522818505764, "memory(GiB)": 94.21, "reward": 1.90625, "reward_std": 0.03423266112804413, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12181249633431435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4743, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 69.46875238418579, "completions/min_length": 26.25, "epoch": 9.430876147927526, "grad_norm": 0.00432103319440194, "kl": 0.0723876953125, "learning_rate": 8.119114959680929e-09, "loss": 7.235478551592678e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4744, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 65.36458539962769, "completions/min_length": 25.5, "epoch": 9.432861752295855, "grad_norm": 0.0024778085971514656, "kl": 0.0513153076171875, "learning_rate": 8.062590010989856e-09, "loss": 5.134732782607898e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4745, "train_speed(iter/s)": 0.022611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 69.26041793823242, "completions/min_length": 21.625, "epoch": 9.434847356664184, "grad_norm": 0.0036252210065991974, "kl": 0.07421875, "learning_rate": 8.006260913089314e-09, "loss": 7.41733965696767e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4746, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.5, "completions/mean_length": 81.91666841506958, "completions/min_length": 27.0, "epoch": 9.436832961032515, "grad_norm": 0.005196864088847535, "kl": 0.064544677734375, "learning_rate": 7.950127688405028e-09, "loss": 6.452913657994941e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4747, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.375, "completions/mean_length": 72.18750190734863, "completions/min_length": 25.875, "epoch": 9.438818565400844, "grad_norm": 0.0022245019094381596, "kl": 0.06964111328125, "learning_rate": 7.894190359284847e-09, "loss": 6.965249485801905e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4748, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 66.62500238418579, "completions/min_length": 23.75, "epoch": 9.440804169769173, "grad_norm": 0.004900162859226985, "kl": 0.077392578125, "learning_rate": 7.838448947998622e-09, "loss": 7.730567449470982e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4749, "train_speed(iter/s)": 0.02261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 60.364585399627686, "completions/min_length": 25.0, "epoch": 9.442789774137504, "grad_norm": 0.005911987450447452, "kl": 0.060821533203125, "learning_rate": 7.782903476738156e-09, "loss": 6.0835867770947516e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4750, "train_speed(iter/s)": 0.022609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 78.60416889190674, "completions/min_length": 22.5, "epoch": 9.444775378505833, "grad_norm": 0.0023933015481466626, "kl": 0.063018798828125, "learning_rate": 7.727553967617373e-09, "loss": 6.30479771643877e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4751, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 74.62500190734863, "completions/min_length": 23.875, "epoch": 9.446760982874162, "grad_norm": 0.003740260315444024, "kl": 0.0643310546875, "learning_rate": 7.672400442672088e-09, "loss": 6.43393286736682e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4752, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 69.37500238418579, "completions/min_length": 22.5, "epoch": 9.448746587242493, "grad_norm": 0.016130180695109995, "kl": 0.078369140625, "learning_rate": 7.617442923859962e-09, "loss": 7.831267430447042e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4753, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.625, "completions/mean_length": 69.44791889190674, "completions/min_length": 22.0, "epoch": 9.450732191610822, "grad_norm": 0.004251603683274947, "kl": 0.062957763671875, "learning_rate": 7.562681433060936e-09, "loss": 6.290737655945122e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4754, "train_speed(iter/s)": 0.022608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 59.77083444595337, "completions/min_length": 18.125, "epoch": 9.45271779597915, "grad_norm": 0.003274038669974271, "kl": 0.0611572265625, "learning_rate": 7.508115992076625e-09, "loss": 6.109652167651802e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4755, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.875, "completions/mean_length": 67.31250238418579, "completions/min_length": 28.125, "epoch": 9.45470340034748, "grad_norm": 0.0024363351046544665, "kl": 0.06524658203125, "learning_rate": 7.453746622630707e-09, "loss": 6.52419839752838e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4756, "train_speed(iter/s)": 0.022607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.0, "completions/mean_length": 86.57291984558105, "completions/min_length": 25.5, "epoch": 9.45668900471581, "grad_norm": 0.0030635568883373735, "kl": 0.07037353515625, "learning_rate": 7.3995733463688704e-09, "loss": 7.033345173113048e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4757, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.125, "completions/mean_length": 69.05208492279053, "completions/min_length": 25.25, "epoch": 9.45867460908414, "grad_norm": 0.0033999767446773474, "kl": 0.052490234375, "learning_rate": 7.345596184858472e-09, "loss": 5.249096648185514e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4758, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.75, "completions/mean_length": 70.82291984558105, "completions/min_length": 26.375, "epoch": 9.460660213452469, "grad_norm": 0.006809337357309042, "kl": 0.074462890625, "learning_rate": 7.291815159589154e-09, "loss": 7.44737044442445e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4759, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.625, "completions/mean_length": 61.583335399627686, "completions/min_length": 26.125, "epoch": 9.4626458178208, "grad_norm": 0.005747356194290514, "kl": 0.052032470703125, "learning_rate": 7.238230291972236e-09, "loss": 5.196863639866933e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4760, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 78.22916889190674, "completions/min_length": 27.75, "epoch": 9.464631422189129, "grad_norm": 0.004646863536684325, "kl": 0.058380126953125, "learning_rate": 7.18484160334093e-09, "loss": 5.8331352192908525e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4761, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 77.45833683013916, "completions/min_length": 27.5, "epoch": 9.466617026557458, "grad_norm": 0.0024295272145912865, "kl": 0.060028076171875, "learning_rate": 7.131649114950511e-09, "loss": 5.995645187795162e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4762, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.625, "completions/mean_length": 75.20833539962769, "completions/min_length": 25.125, "epoch": 9.468602630925789, "grad_norm": 0.006257636012124017, "kl": 0.080169677734375, "learning_rate": 7.078652847977984e-09, "loss": 8.007712312974036e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4763, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.375, "completions/mean_length": 68.77083587646484, "completions/min_length": 27.875, "epoch": 9.470588235294118, "grad_norm": 0.6966685689182638, "kl": 0.0635986328125, "learning_rate": 7.02585282352236e-09, "loss": -0.0060100010596215725, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.10136350989341736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4764, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 66.1354193687439, "completions/min_length": 28.625, "epoch": 9.472573839662447, "grad_norm": 0.002478926389017352, "kl": 0.070587158203125, "learning_rate": 6.97324906260438e-09, "loss": 7.055184687487781e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4765, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.875, "completions/mean_length": 67.35416889190674, "completions/min_length": 26.5, "epoch": 9.474559444030778, "grad_norm": 0.0028014915464666655, "kl": 0.069366455078125, "learning_rate": 6.920841586166737e-09, "loss": 6.921897147549316e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4766, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 63.23958492279053, "completions/min_length": 18.625, "epoch": 9.476545048399107, "grad_norm": 0.00392661371114939, "kl": 0.06787109375, "learning_rate": 6.868630415074017e-09, "loss": 6.784753350075334e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4767, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 64.0104193687439, "completions/min_length": 20.75, "epoch": 9.478530652767436, "grad_norm": 0.009148566525518756, "kl": 0.06280517578125, "learning_rate": 6.816615570112538e-09, "loss": 6.287980795605108e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4768, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 72.46875286102295, "completions/min_length": 33.625, "epoch": 9.480516257135765, "grad_norm": 0.002558831474404926, "kl": 0.066131591796875, "learning_rate": 6.764797071990569e-09, "loss": 6.613253935938701e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4769, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 70.63541889190674, "completions/min_length": 26.875, "epoch": 9.482501861504096, "grad_norm": 0.003475030913036586, "kl": 0.066680908203125, "learning_rate": 6.713174941338162e-09, "loss": 6.667665729764849e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4770, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.125, "completions/mean_length": 64.40625238418579, "completions/min_length": 20.0, "epoch": 9.484487465872425, "grad_norm": 0.006578560369885898, "kl": 0.0794677734375, "learning_rate": 6.661749198707156e-09, "loss": 7.930537685751915e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4771, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 78.01041984558105, "completions/min_length": 29.5, "epoch": 9.486473070240754, "grad_norm": 0.004318152639343838, "kl": 0.05926513671875, "learning_rate": 6.610519864571229e-09, "loss": 5.9220179537078366e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4772, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 74.70833683013916, "completions/min_length": 20.875, "epoch": 9.488458674609085, "grad_norm": 0.0028507302305977194, "kl": 0.061279296875, "learning_rate": 6.559486959325955e-09, "loss": 6.129234679974616e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4773, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.625, "completions/mean_length": 67.145836353302, "completions/min_length": 25.125, "epoch": 9.490444278977414, "grad_norm": 0.004541246729001895, "kl": 0.06744384765625, "learning_rate": 6.508650503288526e-09, "loss": 6.748643500031903e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4774, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.25, "completions/mean_length": 79.69791984558105, "completions/min_length": 30.875, "epoch": 9.492429883345743, "grad_norm": 0.002368792328803445, "kl": 0.0590972900390625, "learning_rate": 6.458010516698087e-09, "loss": 5.90520903642755e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4775, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.375, "completions/mean_length": 73.19791746139526, "completions/min_length": 23.25, "epoch": 9.494415487714074, "grad_norm": 0.0032097416247676733, "kl": 0.076904296875, "learning_rate": 6.407567019715454e-09, "loss": 7.692919461987913e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4776, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.125, "completions/mean_length": 73.0104193687439, "completions/min_length": 28.0, "epoch": 9.496401092082403, "grad_norm": 0.005706181421722418, "kl": 0.08062744140625, "learning_rate": 6.357320032423285e-09, "loss": 8.064581197686493e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4777, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 75.65625286102295, "completions/min_length": 28.625, "epoch": 9.498386696450732, "grad_norm": 0.0030724518483858892, "kl": 0.061187744140625, "learning_rate": 6.307269574825969e-09, "loss": 6.129412213340402e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4778, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.375, "completions/mean_length": 78.34375190734863, "completions/min_length": 27.75, "epoch": 9.500372300819063, "grad_norm": 0.002760299889754672, "kl": 0.057586669921875, "learning_rate": 6.257415666849674e-09, "loss": 5.761609645560384e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4779, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.375, "completions/mean_length": 61.60416841506958, "completions/min_length": 18.5, "epoch": 9.502357905187392, "grad_norm": 0.002451652146583488, "kl": 0.0584716796875, "learning_rate": 6.2077583283423055e-09, "loss": 5.843268445460126e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4780, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.125, "completions/mean_length": 60.76041841506958, "completions/min_length": 21.375, "epoch": 9.50434350955572, "grad_norm": 0.0037879893359206015, "kl": 0.056854248046875, "learning_rate": 6.158297579073546e-09, "loss": 5.685799624188803e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4781, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 66.59375190734863, "completions/min_length": 21.875, "epoch": 9.50632911392405, "grad_norm": 0.009577424421303878, "kl": 0.07562255859375, "learning_rate": 6.109033438734646e-09, "loss": 7.558182551292703e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4782, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 70.86458539962769, "completions/min_length": 19.75, "epoch": 9.50831471829238, "grad_norm": 0.006677649818823071, "kl": 0.064178466796875, "learning_rate": 6.059965926938859e-09, "loss": 6.418031989596784e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4783, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 68.2604193687439, "completions/min_length": 26.625, "epoch": 9.51030032266071, "grad_norm": 0.0046917213130720755, "kl": 0.058197021484375, "learning_rate": 6.01109506322095e-09, "loss": 5.8211277064401656e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4784, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.375, "completions/mean_length": 62.57291793823242, "completions/min_length": 19.875, "epoch": 9.512285927029039, "grad_norm": 0.003941936159635688, "kl": 0.059051513671875, "learning_rate": 5.96242086703741e-09, "loss": 5.8996807638322935e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4785, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.125, "completions/mean_length": 72.28125286102295, "completions/min_length": 26.75, "epoch": 9.51427153139737, "grad_norm": 0.0027272384283332333, "kl": 0.059722900390625, "learning_rate": 5.913943357766571e-09, "loss": 5.975658859824762e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4786, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.875, "completions/mean_length": 64.64583492279053, "completions/min_length": 22.0, "epoch": 9.516257135765699, "grad_norm": 0.003102199991682703, "kl": 0.0665283203125, "learning_rate": 5.8656625547082725e-09, "loss": 6.657680933130905e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4787, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 68.07291984558105, "completions/min_length": 29.75, "epoch": 9.518242740134028, "grad_norm": 0.004569857660072722, "kl": 0.06243896484375, "learning_rate": 5.817578477084251e-09, "loss": 6.2411098042503e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4788, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.25, "completions/mean_length": 59.07291793823242, "completions/min_length": 22.75, "epoch": 9.520228344502359, "grad_norm": 0.00261731264564144, "kl": 0.05914306640625, "learning_rate": 5.769691144037692e-09, "loss": 5.9095902543049306e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4789, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.125, "completions/mean_length": 62.510417461395264, "completions/min_length": 18.875, "epoch": 9.522213948870688, "grad_norm": 0.0032678584605705784, "kl": 0.067779541015625, "learning_rate": 5.722000574633568e-09, "loss": 6.770661275368184e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4790, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.625, "completions/mean_length": 68.34375286102295, "completions/min_length": 18.625, "epoch": 9.524199553239017, "grad_norm": 0.0033231228196449296, "kl": 0.059814453125, "learning_rate": 5.6745067878585796e-09, "loss": 5.9849495301023126e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4791, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.875, "completions/mean_length": 70.19791984558105, "completions/min_length": 24.0, "epoch": 9.526185157607348, "grad_norm": 0.002759660461810293, "kl": 0.06658935546875, "learning_rate": 5.627209802620935e-09, "loss": 6.660568033112213e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4792, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 70.35416841506958, "completions/min_length": 22.0, "epoch": 9.528170761975677, "grad_norm": 0.0032470034251468743, "kl": 0.060150146484375, "learning_rate": 5.580109637750685e-09, "loss": 6.012981612002477e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4793, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.0, "completions/mean_length": 65.62500190734863, "completions/min_length": 25.625, "epoch": 9.530156366344006, "grad_norm": 0.0025214673038298674, "kl": 0.074920654296875, "learning_rate": 5.533206311999328e-09, "loss": 7.481173815904185e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4794, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.75, "completions/mean_length": 62.10416889190674, "completions/min_length": 17.75, "epoch": 9.532141970712335, "grad_norm": 0.005986730512673897, "kl": 0.093017578125, "learning_rate": 5.486499844040093e-09, "loss": 9.28775843931362e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4795, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 59.843751430511475, "completions/min_length": 19.0, "epoch": 9.534127575080666, "grad_norm": 0.0026378644399682803, "kl": 0.0726318359375, "learning_rate": 5.4399902524678855e-09, "loss": 7.262932194862515e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4796, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.375, "completions/mean_length": 62.76041889190674, "completions/min_length": 27.25, "epoch": 9.536113179448995, "grad_norm": 1.7010649115289804, "kl": 0.0594482421875, "learning_rate": 5.393677555799114e-09, "loss": 0.004848122596740723, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.770833333954215, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4797, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 71.84375238418579, "completions/min_length": 27.5, "epoch": 9.538098783817324, "grad_norm": 0.0037426321961469547, "kl": 0.070343017578125, "learning_rate": 5.347561772471809e-09, "loss": 7.031289715087041e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4798, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.875, "completions/mean_length": 60.52083492279053, "completions/min_length": 22.75, "epoch": 9.540084388185655, "grad_norm": 0.0040342674013660925, "kl": 0.06365966796875, "learning_rate": 5.301642920845672e-09, "loss": 6.362433487083763e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4799, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.125, "completions/mean_length": 63.270836353302, "completions/min_length": 24.0, "epoch": 9.542069992553984, "grad_norm": 0.0023877154705503655, "kl": 0.05242919921875, "learning_rate": 5.255921019202081e-09, "loss": 5.241192411631346e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4800, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.625, "completions/mean_length": 68.73958539962769, "completions/min_length": 17.5, "epoch": 9.544055596922313, "grad_norm": 0.002967951181710472, "kl": 0.064453125, "learning_rate": 5.210396085743751e-09, "loss": 6.44532628939487e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4801, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.5, "completions/mean_length": 77.59375190734863, "completions/min_length": 22.625, "epoch": 9.546041201290643, "grad_norm": 0.003174240797514496, "kl": 0.0640869140625, "learning_rate": 5.165068138595241e-09, "loss": 6.404802843462676e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4802, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.125, "completions/mean_length": 70.4479193687439, "completions/min_length": 23.125, "epoch": 9.548026805658973, "grad_norm": 0.008426793584497974, "kl": 0.08935546875, "learning_rate": 5.119937195802504e-09, "loss": 8.934881043387577e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4803, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 64.03125095367432, "completions/min_length": 22.5, "epoch": 9.550012410027302, "grad_norm": 0.005121012783382403, "kl": 0.087554931640625, "learning_rate": 5.0750032753331674e-09, "loss": 8.746125968173146e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4804, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.125, "completions/mean_length": 62.08333492279053, "completions/min_length": 21.625, "epoch": 9.551998014395632, "grad_norm": 0.0030184847959814185, "kl": 0.052642822265625, "learning_rate": 5.0302663950764216e-09, "loss": 5.265386062092148e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4805, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.625, "completions/mean_length": 67.39583539962769, "completions/min_length": 23.625, "epoch": 9.553983618763962, "grad_norm": 0.0025969432879313775, "kl": 0.07159423828125, "learning_rate": 4.985726572842852e-09, "loss": 7.152646139729768e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4806, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.875, "completions/mean_length": 69.31250143051147, "completions/min_length": 20.625, "epoch": 9.55596922313229, "grad_norm": 0.004716517405570303, "kl": 0.06732177734375, "learning_rate": 4.941383826364831e-09, "loss": 6.741842662449926e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4807, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.125, "completions/mean_length": 64.54166793823242, "completions/min_length": 15.375, "epoch": 9.55795482750062, "grad_norm": 0.0048574903224478045, "kl": 0.055694580078125, "learning_rate": 4.8972381732961256e-09, "loss": 5.5724311096128076e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4808, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.875, "completions/mean_length": 69.65625143051147, "completions/min_length": 18.5, "epoch": 9.55994043186895, "grad_norm": 0.0023546975398983793, "kl": 0.052581787109375, "learning_rate": 4.853289631212065e-09, "loss": 5.252580012893304e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4809, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 60.343751430511475, "completions/min_length": 22.875, "epoch": 9.56192603623728, "grad_norm": 0.004855094272041856, "kl": 0.053863525390625, "learning_rate": 4.809538217609488e-09, "loss": 5.37940941285342e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4810, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.125, "completions/mean_length": 75.95833683013916, "completions/min_length": 21.0, "epoch": 9.563911640605609, "grad_norm": 0.0028386396233875248, "kl": 0.064697265625, "learning_rate": 4.7659839499067934e-09, "loss": 6.476941052824259e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4811, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.25, "completions/mean_length": 75.52083683013916, "completions/min_length": 22.125, "epoch": 9.56589724497394, "grad_norm": 0.004491564962713142, "kl": 0.063262939453125, "learning_rate": 4.722626845443778e-09, "loss": 6.32830779068172e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4812, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.25, "completions/mean_length": 79.48958539962769, "completions/min_length": 30.875, "epoch": 9.567882849342269, "grad_norm": 0.005130581328552408, "kl": 0.06463623046875, "learning_rate": 4.679466921481912e-09, "loss": 6.46094122203067e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4813, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.875, "completions/mean_length": 72.56250190734863, "completions/min_length": 23.125, "epoch": 9.569868453710598, "grad_norm": 0.006116430422971592, "kl": 0.084075927734375, "learning_rate": 4.636504195204061e-09, "loss": 8.414929470745847e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4814, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.625, "completions/mean_length": 78.26041984558105, "completions/min_length": 29.625, "epoch": 9.571854058078928, "grad_norm": 0.002695661182693227, "kl": 0.0675048828125, "learning_rate": 4.593738683714654e-09, "loss": 6.75319679430686e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4815, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.25, "completions/mean_length": 64.0416693687439, "completions/min_length": 21.5, "epoch": 9.573839662447257, "grad_norm": 0.007946555617805547, "kl": 0.06597900390625, "learning_rate": 4.5511704040394615e-09, "loss": 6.597238825634122e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4816, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.625, "completions/mean_length": 84.29166793823242, "completions/min_length": 28.625, "epoch": 9.575825266815587, "grad_norm": 0.0034478098868416904, "kl": 0.082550048828125, "learning_rate": 4.5087993731259266e-09, "loss": 8.244122727774084e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4817, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.125, "completions/mean_length": 68.38541841506958, "completions/min_length": 18.375, "epoch": 9.577810871183917, "grad_norm": 0.0027447318827082757, "kl": 0.0618896484375, "learning_rate": 4.4666256078427775e-09, "loss": 6.19066267972812e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4818, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.5, "completions/mean_length": 72.42708587646484, "completions/min_length": 15.875, "epoch": 9.579796475552246, "grad_norm": 0.006937354291975717, "kl": 0.079833984375, "learning_rate": 4.424649124980307e-09, "loss": 7.977255154401064e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4819, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 65.1041693687439, "completions/min_length": 26.125, "epoch": 9.581782079920576, "grad_norm": 0.0025989805262227332, "kl": 0.081787109375, "learning_rate": 4.382869941250311e-09, "loss": 8.165553299477324e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4820, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.25, "completions/mean_length": 81.14583587646484, "completions/min_length": 21.75, "epoch": 9.583767684288905, "grad_norm": 0.002489918925010924, "kl": 0.059967041015625, "learning_rate": 4.341288073285876e-09, "loss": 5.999734275974333e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4821, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 70.56250238418579, "completions/min_length": 21.75, "epoch": 9.585753288657235, "grad_norm": 0.004836301211722175, "kl": 0.072784423828125, "learning_rate": 4.299903537641703e-09, "loss": 7.275366806425154e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4822, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 69.11458587646484, "completions/min_length": 28.125, "epoch": 9.587738893025564, "grad_norm": 0.003333732682628272, "kl": 0.0552978515625, "learning_rate": 4.258716350793834e-09, "loss": 5.5283759138546884e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4823, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.625, "completions/mean_length": 62.708335399627686, "completions/min_length": 17.75, "epoch": 9.589724497393894, "grad_norm": 0.00888327888230167, "kl": 0.079925537109375, "learning_rate": 4.2177265291397646e-09, "loss": 7.981668022694066e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4824, "train_speed(iter/s)": 0.022599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.875, "completions/mean_length": 72.80208492279053, "completions/min_length": 23.875, "epoch": 9.591710101762224, "grad_norm": 0.005645911417661935, "kl": 0.0799560546875, "learning_rate": 4.176934088998496e-09, "loss": 7.997317879926413e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4825, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.75, "completions/mean_length": 62.59375238418579, "completions/min_length": 22.875, "epoch": 9.593695706130553, "grad_norm": 0.0038043380761648513, "kl": 0.058746337890625, "learning_rate": 4.13633904661026e-09, "loss": 5.867323125130497e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4826, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.375, "completions/mean_length": 71.72916889190674, "completions/min_length": 21.875, "epoch": 9.595681310498883, "grad_norm": 0.0027613000382040735, "kl": 0.062347412109375, "learning_rate": 4.095941418136795e-09, "loss": 6.241798109840602e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4827, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.125, "completions/mean_length": 71.30208539962769, "completions/min_length": 26.5, "epoch": 9.597666914867213, "grad_norm": 1.2831206839109994, "kl": 0.06915283203125, "learning_rate": 4.0557412196613464e-09, "loss": 0.0050938064232468605, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4828, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 64.54166889190674, "completions/min_length": 26.125, "epoch": 9.599652519235542, "grad_norm": 0.0024774840526502646, "kl": 0.0501708984375, "learning_rate": 4.015738467188501e-09, "loss": 5.011974644730799e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4829, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.125, "completions/mean_length": 70.34375333786011, "completions/min_length": 23.0, "epoch": 9.601638123603871, "grad_norm": 0.00409935102665403, "kl": 0.065887451171875, "learning_rate": 3.975933176644075e-09, "loss": 6.592516729142517e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4830, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.625, "completions/mean_length": 62.93750238418579, "completions/min_length": 27.0, "epoch": 9.603623727972202, "grad_norm": 0.00564334083552613, "kl": 0.07342529296875, "learning_rate": 3.936325363875503e-09, "loss": 7.3415765655227e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4831, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.0, "completions/mean_length": 78.87500143051147, "completions/min_length": 21.5, "epoch": 9.605609332340531, "grad_norm": 0.018192002257694618, "kl": 0.064453125, "learning_rate": 3.896915044651505e-09, "loss": 6.435334216803312e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4832, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.625, "completions/mean_length": 69.54166793823242, "completions/min_length": 25.5, "epoch": 9.60759493670886, "grad_norm": 0.004123358615490844, "kl": 0.066864013671875, "learning_rate": 3.8577022346621415e-09, "loss": 6.688522262265906e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4833, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 65.20833587646484, "completions/min_length": 23.375, "epoch": 9.60958054107719, "grad_norm": 0.008269054210156446, "kl": 0.073883056640625, "learning_rate": 3.818686949518812e-09, "loss": 7.384506898233667e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4834, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 64.20833444595337, "completions/min_length": 20.75, "epoch": 9.61156614544552, "grad_norm": 0.0023979478233232825, "kl": 0.054718017578125, "learning_rate": 3.779869204754427e-09, "loss": 5.4725631343899295e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4835, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.625, "completions/mean_length": 67.83333492279053, "completions/min_length": 27.25, "epoch": 9.61355174981385, "grad_norm": 1.6619016595638105, "kl": 0.0772705078125, "learning_rate": 3.741249015823178e-09, "loss": 0.010840097442269325, "memory(GiB)": 94.21, "reward": 1.8541666716337204, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.17921441793441772, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4836, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 69.59375333786011, "completions/min_length": 27.875, "epoch": 9.615537354182178, "grad_norm": 0.003926457680621946, "kl": 0.062530517578125, "learning_rate": 3.7028263981005446e-09, "loss": 6.253214087337255e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4837, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.0, "completions/mean_length": 50.30208492279053, "completions/min_length": 21.25, "epoch": 9.61752295855051, "grad_norm": 0.006517851772604092, "kl": 0.053436279296875, "learning_rate": 3.664601366883291e-09, "loss": 5.342508666217327e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4838, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 77.68750143051147, "completions/min_length": 26.25, "epoch": 9.619508562918838, "grad_norm": 0.002781012486535216, "kl": 0.073577880859375, "learning_rate": 3.6265739373897985e-09, "loss": 7.359175651799887e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4839, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.125, "completions/mean_length": 60.427085638046265, "completions/min_length": 21.25, "epoch": 9.621494167287167, "grad_norm": 0.0028123731067994664, "kl": 0.06341552734375, "learning_rate": 3.5887441247594574e-09, "loss": 6.342627602862194e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4840, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.875, "completions/mean_length": 73.66666841506958, "completions/min_length": 23.0, "epoch": 9.623479771655498, "grad_norm": 0.006917475881395366, "kl": 0.073272705078125, "learning_rate": 3.5511119440531644e-09, "loss": 7.335221744142473e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4841, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 72.46875238418579, "completions/min_length": 25.625, "epoch": 9.625465376023827, "grad_norm": 0.00229037342357923, "kl": 0.06781005859375, "learning_rate": 3.5136774102531574e-09, "loss": 6.784041761420667e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4842, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.75, "completions/mean_length": 67.79166889190674, "completions/min_length": 24.0, "epoch": 9.627450980392156, "grad_norm": 0.0030190256454811354, "kl": 0.061920166015625, "learning_rate": 3.4764405382628483e-09, "loss": 6.18806152488105e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4843, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 73.45833539962769, "completions/min_length": 29.125, "epoch": 9.629436584760487, "grad_norm": 0.0051352166923351915, "kl": 0.070159912109375, "learning_rate": 3.4394013429071e-09, "loss": 7.005871157161891e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4844, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.25, "completions/mean_length": 61.01041793823242, "completions/min_length": 28.875, "epoch": 9.631422189128816, "grad_norm": 0.004315634692808378, "kl": 0.088134765625, "learning_rate": 3.402559838931951e-09, "loss": 8.816139597911388e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4845, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 62.90625333786011, "completions/min_length": 26.5, "epoch": 9.633407793497145, "grad_norm": 0.005037263652277187, "kl": 0.05731201171875, "learning_rate": 3.3659160410047792e-09, "loss": 5.7321150961797684e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4846, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.375, "completions/mean_length": 76.45833587646484, "completions/min_length": 27.75, "epoch": 9.635393397865474, "grad_norm": 0.005254992123200667, "kl": 0.06842041015625, "learning_rate": 3.329469963714249e-09, "loss": 6.844510062364861e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4847, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.625, "completions/mean_length": 57.88541841506958, "completions/min_length": 17.25, "epoch": 9.637379002233805, "grad_norm": 0.005647927697036055, "kl": 0.061187744140625, "learning_rate": 3.293221621570419e-09, "loss": 6.114768621046096e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4848, "train_speed(iter/s)": 0.022606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.125, "completions/mean_length": 67.00000333786011, "completions/min_length": 20.0, "epoch": 9.639364606602134, "grad_norm": 0.012355383865799277, "kl": 0.08575439453125, "learning_rate": 3.2571710290044684e-09, "loss": 8.574880484957248e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4849, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 65.61458444595337, "completions/min_length": 18.125, "epoch": 9.641350210970463, "grad_norm": 0.002453766636556973, "kl": 0.059783935546875, "learning_rate": 3.2213182003689164e-09, "loss": 5.9788675571326166e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4850, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 64.86458396911621, "completions/min_length": 20.0, "epoch": 9.643335815338794, "grad_norm": 0.0034688709493888945, "kl": 0.074554443359375, "learning_rate": 3.185663149937512e-09, "loss": 7.457609171979129e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4851, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.125, "completions/mean_length": 68.53125238418579, "completions/min_length": 26.875, "epoch": 9.645321419707123, "grad_norm": 0.005565369656437917, "kl": 0.056610107421875, "learning_rate": 3.150205891905344e-09, "loss": 5.6664528528926894e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4852, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 67.333336353302, "completions/min_length": 18.625, "epoch": 9.647307024075452, "grad_norm": 0.0028327988180825068, "kl": 0.050994873046875, "learning_rate": 3.114946440388677e-09, "loss": 5.107714969199151e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4853, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.5, "completions/mean_length": 67.70833492279053, "completions/min_length": 26.75, "epoch": 9.649292628443783, "grad_norm": 0.005824612746408752, "kl": 0.067352294921875, "learning_rate": 3.079884809425004e-09, "loss": 6.741640390828252e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4854, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 70.38541841506958, "completions/min_length": 24.375, "epoch": 9.651278232812112, "grad_norm": 0.0035747472146432646, "kl": 0.0963134765625, "learning_rate": 3.0450210129732147e-09, "loss": 9.628474799683318e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4855, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 71.37500333786011, "completions/min_length": 22.25, "epoch": 9.653263837180441, "grad_norm": 0.011097882465219129, "kl": 0.080352783203125, "learning_rate": 3.0103550649132616e-09, "loss": 8.038054511416703e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4856, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 68.73958492279053, "completions/min_length": 26.875, "epoch": 9.655249441548772, "grad_norm": 0.003530127286092695, "kl": 0.057464599609375, "learning_rate": 2.9758869790463827e-09, "loss": 5.7426324929110706e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4857, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.375, "completions/mean_length": 59.97916793823242, "completions/min_length": 20.0, "epoch": 9.657235045917101, "grad_norm": 0.0041520817658141215, "kl": 0.05999755859375, "learning_rate": 2.941616769095101e-09, "loss": 6.0041958931833506e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4858, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.125, "completions/mean_length": 66.73958539962769, "completions/min_length": 30.0, "epoch": 9.65922065028543, "grad_norm": 0.0024100362084596956, "kl": 0.05291748046875, "learning_rate": 2.9075444487031142e-09, "loss": 5.293916547088884e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4859, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.375, "completions/mean_length": 69.11458492279053, "completions/min_length": 21.875, "epoch": 9.66120625465376, "grad_norm": 0.0035572645378273977, "kl": 0.059783935546875, "learning_rate": 2.873670031435349e-09, "loss": 5.982501897960901e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4860, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 71.87500286102295, "completions/min_length": 20.0, "epoch": 9.66319185902209, "grad_norm": 0.0024291370363734585, "kl": 0.060791015625, "learning_rate": 2.839993530777851e-09, "loss": 6.076978024793789e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4861, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 77.58333492279053, "completions/min_length": 25.875, "epoch": 9.66517746339042, "grad_norm": 0.002513044364039413, "kl": 0.080718994140625, "learning_rate": 2.806514960138062e-09, "loss": 8.070183685049415e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4862, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 70.84375190734863, "completions/min_length": 24.25, "epoch": 9.667163067758748, "grad_norm": 0.0029312276875060783, "kl": 0.0899658203125, "learning_rate": 2.773234332844487e-09, "loss": 8.996226824820042e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4863, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 72.58333492279053, "completions/min_length": 22.25, "epoch": 9.66914867212708, "grad_norm": 0.0030330969875835712, "kl": 0.06146240234375, "learning_rate": 2.7401516621468057e-09, "loss": 6.14297459833324e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4864, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 70.91666889190674, "completions/min_length": 32.125, "epoch": 9.671134276495408, "grad_norm": 0.002601525870039028, "kl": 0.064453125, "learning_rate": 2.7072669612159816e-09, "loss": 6.442826270358637e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4865, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.125, "completions/mean_length": 68.67708492279053, "completions/min_length": 20.0, "epoch": 9.673119880863737, "grad_norm": 0.002564356415336455, "kl": 0.0621490478515625, "learning_rate": 2.674580243144153e-09, "loss": 6.210828723851591e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4866, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 67.33333539962769, "completions/min_length": 27.25, "epoch": 9.675105485232068, "grad_norm": 0.00246052440736179, "kl": 0.060516357421875, "learning_rate": 2.642091520944523e-09, "loss": 6.050724186934531e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4867, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 74.89583492279053, "completions/min_length": 27.125, "epoch": 9.677091089600397, "grad_norm": 0.002951842850527218, "kl": 0.095184326171875, "learning_rate": 2.609800807551521e-09, "loss": 9.522426989860833e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4868, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.625, "completions/mean_length": 67.01041984558105, "completions/min_length": 24.75, "epoch": 9.679076693968726, "grad_norm": 0.02051451334704056, "kl": 0.060943603515625, "learning_rate": 2.5777081158209203e-09, "loss": 6.084067717893049e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4869, "train_speed(iter/s)": 0.022605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.875, "completions/mean_length": 79.61458683013916, "completions/min_length": 23.375, "epoch": 9.681062298337057, "grad_norm": 0.00452776540368591, "kl": 0.07049560546875, "learning_rate": 2.5458134585293333e-09, "loss": 7.046831160550937e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4870, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 64.86458492279053, "completions/min_length": 19.0, "epoch": 9.683047902705386, "grad_norm": 0.024464361869878246, "kl": 0.095489501953125, "learning_rate": 2.5141168483748253e-09, "loss": 9.550269896863028e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4871, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.625, "completions/mean_length": 68.28125190734863, "completions/min_length": 24.375, "epoch": 9.685033507073715, "grad_norm": 0.002328423368921501, "kl": 0.063751220703125, "learning_rate": 2.4826182979764686e-09, "loss": 6.374135409714654e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4872, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 65.77083492279053, "completions/min_length": 26.125, "epoch": 9.687019111442044, "grad_norm": 0.0033201898603226433, "kl": 0.053680419921875, "learning_rate": 2.4513178198744542e-09, "loss": 5.368553684093058e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4873, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.375, "completions/mean_length": 65.64583539962769, "completions/min_length": 19.75, "epoch": 9.689004715810375, "grad_norm": 0.0031306706094000893, "kl": 0.054962158203125, "learning_rate": 2.420215426530259e-09, "loss": 5.499834514921531e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4874, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 67.43750143051147, "completions/min_length": 25.625, "epoch": 9.690990320178704, "grad_norm": 0.8022520596962549, "kl": 0.073455810546875, "learning_rate": 2.3893111303262547e-09, "loss": -0.005667926277965307, "memory(GiB)": 94.21, "reward": 1.8229166716337204, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.19492431730031967, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4875, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.125, "completions/mean_length": 66.91666793823242, "completions/min_length": 19.125, "epoch": 9.692975924547033, "grad_norm": 0.31908655292375704, "kl": 0.52294921875, "learning_rate": 2.3586049435663224e-09, "loss": 0.0005212367395870388, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4876, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 60.8854193687439, "completions/min_length": 18.25, "epoch": 9.694961528915364, "grad_norm": 0.002394973473424306, "kl": 0.05340576171875, "learning_rate": 2.328096878475072e-09, "loss": 5.3393385314848274e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4877, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.375, "completions/mean_length": 78.25000238418579, "completions/min_length": 29.875, "epoch": 9.696947133283693, "grad_norm": 0.002913896362997115, "kl": 0.07086181640625, "learning_rate": 2.29778694719851e-09, "loss": 7.087054109433666e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4878, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 69.87500190734863, "completions/min_length": 23.125, "epoch": 9.698932737652022, "grad_norm": 0.0032494313577079194, "kl": 0.05859375, "learning_rate": 2.2676751618036505e-09, "loss": 5.8602083299774677e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4879, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 74.56250190734863, "completions/min_length": 24.5, "epoch": 9.700918342020353, "grad_norm": 0.002577562131957097, "kl": 0.067047119140625, "learning_rate": 2.2377615342785705e-09, "loss": 6.701111851725727e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4880, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.875, "completions/mean_length": 67.26041984558105, "completions/min_length": 24.25, "epoch": 9.702903946388682, "grad_norm": 0.0046430579500008895, "kl": 0.0819854736328125, "learning_rate": 2.2080460765326325e-09, "loss": 8.187860657926649e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4881, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 75.04166793823242, "completions/min_length": 26.125, "epoch": 9.704889550757011, "grad_norm": 0.0037047414384949323, "kl": 0.0662841796875, "learning_rate": 2.1785288003960954e-09, "loss": 6.629362178500742e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4882, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 67.04166841506958, "completions/min_length": 23.625, "epoch": 9.706875155125342, "grad_norm": 0.0033560374313336104, "kl": 0.06488037109375, "learning_rate": 2.1492097176205036e-09, "loss": 6.487013160949573e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4883, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 70.77083539962769, "completions/min_length": 21.75, "epoch": 9.708860759493671, "grad_norm": 0.004543794419514337, "kl": 0.063690185546875, "learning_rate": 2.1200888398783532e-09, "loss": 6.36466356809251e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4884, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.5, "completions/mean_length": 73.79166889190674, "completions/min_length": 24.125, "epoch": 9.710846363862, "grad_norm": 0.003299978949370532, "kl": 0.055419921875, "learning_rate": 2.0911661787633152e-09, "loss": 5.542510189116001e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4885, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.5, "completions/mean_length": 75.3229193687439, "completions/min_length": 26.125, "epoch": 9.71283196823033, "grad_norm": 0.0044365228436324036, "kl": 0.08062744140625, "learning_rate": 2.0624417457900667e-09, "loss": 8.057904778979719e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4886, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 65.364586353302, "completions/min_length": 24.875, "epoch": 9.71481757259866, "grad_norm": 0.002772278052466705, "kl": 0.057098388671875, "learning_rate": 2.033915552394516e-09, "loss": 5.712054553441703e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4887, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.375, "completions/mean_length": 67.05208492279053, "completions/min_length": 21.125, "epoch": 9.71680317696699, "grad_norm": 1.7824189762645273, "kl": 0.061309814453125, "learning_rate": 2.005587609933468e-09, "loss": 0.012681696563959122, "memory(GiB)": 94.21, "reward": 1.8020833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.2319217473268509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4888, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 56.98958492279053, "completions/min_length": 26.375, "epoch": 9.718788781335318, "grad_norm": 0.0039207437734277715, "kl": 0.049163818359375, "learning_rate": 1.9774579296849004e-09, "loss": 4.919374987366609e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4889, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.875, "completions/mean_length": 60.48958492279053, "completions/min_length": 26.125, "epoch": 9.720774385703649, "grad_norm": 0.010444253234628572, "kl": 0.07244873046875, "learning_rate": 1.9495265228478553e-09, "loss": 7.251766510307789e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4890, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 63.9166693687439, "completions/min_length": 15.375, "epoch": 9.722759990071978, "grad_norm": 0.0024499987047419632, "kl": 0.06903076171875, "learning_rate": 1.921793400542382e-09, "loss": 6.894676334923133e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4891, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 72.39583492279053, "completions/min_length": 24.5, "epoch": 9.724745594440307, "grad_norm": 0.002566391580491315, "kl": 0.061614990234375, "learning_rate": 1.894258573809704e-09, "loss": 6.15893368376419e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4892, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 59.770835638046265, "completions/min_length": 21.375, "epoch": 9.726731198808638, "grad_norm": 0.004223567298165255, "kl": 0.0660858154296875, "learning_rate": 1.866922053611941e-09, "loss": 6.609825504710898e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4893, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.5, "completions/mean_length": 64.55208539962769, "completions/min_length": 20.375, "epoch": 9.728716803176967, "grad_norm": 0.004624187021800558, "kl": 0.059967041015625, "learning_rate": 1.8397838508323881e-09, "loss": 5.990253703203052e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4894, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.0, "completions/mean_length": 64.54166793823242, "completions/min_length": 22.625, "epoch": 9.730702407545296, "grad_norm": 0.007816360037811628, "kl": 0.06573486328125, "learning_rate": 1.8128439762754022e-09, "loss": 6.576003943337128e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4895, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.125, "completions/mean_length": 68.68750238418579, "completions/min_length": 25.75, "epoch": 9.732688011913627, "grad_norm": 1.1492892909109909, "kl": 0.081817626953125, "learning_rate": 1.7861024406661817e-09, "loss": 8.186760533135384e-05, "memory(GiB)": 94.21, "reward": 1.6770833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.3254825547337532, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4896, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.625, "completions/mean_length": 60.08333444595337, "completions/min_length": 21.75, "epoch": 9.734673616281956, "grad_norm": 0.003555358582824764, "kl": 0.0537109375, "learning_rate": 1.7595592546512106e-09, "loss": 5.3691223001806065e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4897, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.875, "completions/mean_length": 71.70833539962769, "completions/min_length": 27.0, "epoch": 9.736659220650285, "grad_norm": 0.002806854217593576, "kl": 0.079376220703125, "learning_rate": 1.733214428797869e-09, "loss": 7.926135003799573e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4898, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 60.44791889190674, "completions/min_length": 25.125, "epoch": 9.738644825018614, "grad_norm": 0.004106409016724521, "kl": 0.06591796875, "learning_rate": 1.7070679735946002e-09, "loss": 6.585016672033817e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4899, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.375, "completions/mean_length": 67.70833683013916, "completions/min_length": 19.125, "epoch": 9.740630429386945, "grad_norm": 0.0032727363537347555, "kl": 0.07171630859375, "learning_rate": 1.6811198994508557e-09, "loss": 7.17202783562243e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4900, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 67.55208539962769, "completions/min_length": 27.125, "epoch": 9.742616033755274, "grad_norm": 0.0031718823813094133, "kl": 0.060821533203125, "learning_rate": 1.6553702166971495e-09, "loss": 6.073419353924692e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4901, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.375, "completions/mean_length": 67.16666889190674, "completions/min_length": 23.25, "epoch": 9.744601638123603, "grad_norm": 0.005897440937601853, "kl": 0.0758056640625, "learning_rate": 1.6298189355849478e-09, "loss": 7.571832975372672e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4902, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.125, "completions/mean_length": 78.77083587646484, "completions/min_length": 24.625, "epoch": 9.746587242491934, "grad_norm": 0.002937303468559925, "kl": 0.0721435546875, "learning_rate": 1.6044660662867248e-09, "loss": 7.219894177978858e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4903, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.625, "completions/mean_length": 65.72916793823242, "completions/min_length": 21.625, "epoch": 9.748572846860263, "grad_norm": 0.003237457740005051, "kl": 0.059478759765625, "learning_rate": 1.579311618896073e-09, "loss": 5.946962482994422e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4904, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 72.90625238418579, "completions/min_length": 24.25, "epoch": 9.750558451228592, "grad_norm": 0.0028302712073289523, "kl": 0.060699462890625, "learning_rate": 1.5543556034274818e-09, "loss": 6.071483949199319e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4905, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.125, "completions/mean_length": 66.60416889190674, "completions/min_length": 22.125, "epoch": 9.752544055596923, "grad_norm": 0.0038497218597631647, "kl": 0.060821533203125, "learning_rate": 1.5295980298165035e-09, "loss": 6.070291419746354e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4906, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.375, "completions/mean_length": 68.45833444595337, "completions/min_length": 26.875, "epoch": 9.754529659965252, "grad_norm": 0.0026325006622195846, "kl": 0.0587158203125, "learning_rate": 1.505038907919587e-09, "loss": 5.8668760175351053e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4907, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.875, "completions/mean_length": 71.14583587646484, "completions/min_length": 30.0, "epoch": 9.756515264333581, "grad_norm": 0.004062945118989607, "kl": 0.068328857421875, "learning_rate": 1.4806782475142999e-09, "loss": 6.834537634858862e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4908, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.375, "completions/mean_length": 57.75000190734863, "completions/min_length": 17.5, "epoch": 9.758500868701912, "grad_norm": 0.0023128513146530435, "kl": 0.061248779296875, "learning_rate": 1.456516058299162e-09, "loss": 6.127214874140918e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4909, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.875, "completions/mean_length": 73.53125190734863, "completions/min_length": 27.25, "epoch": 9.760486473070241, "grad_norm": 0.00235816750721465, "kl": 0.057769775390625, "learning_rate": 1.43255234989359e-09, "loss": 5.774916644440964e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4910, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.625, "completions/mean_length": 77.54166984558105, "completions/min_length": 29.875, "epoch": 9.76247207743857, "grad_norm": 0.0026992264437089557, "kl": 0.07342529296875, "learning_rate": 1.4087871318380628e-09, "loss": 7.336567068705335e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4911, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.875, "completions/mean_length": 71.333336353302, "completions/min_length": 24.875, "epoch": 9.764457681806899, "grad_norm": 0.0028740920228540064, "kl": 0.0633544921875, "learning_rate": 1.3852204135940682e-09, "loss": 6.34082971373573e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4912, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 68.34375143051147, "completions/min_length": 26.25, "epoch": 9.76644328617523, "grad_norm": 0.004442286294750017, "kl": 0.07794189453125, "learning_rate": 1.3618522045439896e-09, "loss": 7.791213283780962e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4913, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 61.114585399627686, "completions/min_length": 24.0, "epoch": 9.768428890543559, "grad_norm": 0.9693114689966057, "kl": 0.08258056640625, "learning_rate": 1.3386825139912184e-09, "loss": 0.004584218375384808, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515517219901085, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4914, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 62.69791889190674, "completions/min_length": 19.0, "epoch": 9.770414494911888, "grad_norm": 0.003953011698298157, "kl": 0.063873291015625, "learning_rate": 1.315711351160098e-09, "loss": 6.383230356732383e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4915, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.125, "completions/mean_length": 69.33333539962769, "completions/min_length": 23.625, "epoch": 9.772400099280219, "grad_norm": 0.002988304915735757, "kl": 0.052978515625, "learning_rate": 1.2929387251959244e-09, "loss": 5.2948726079193875e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4916, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 70.82291841506958, "completions/min_length": 27.125, "epoch": 9.774385703648548, "grad_norm": 0.0025273976544686943, "kl": 0.057861328125, "learning_rate": 1.2703646451650007e-09, "loss": 5.792102820123546e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4917, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.125, "completions/mean_length": 65.98958539962769, "completions/min_length": 17.75, "epoch": 9.776371308016877, "grad_norm": 0.0027761983235578855, "kl": 0.060089111328125, "learning_rate": 1.2479891200544712e-09, "loss": 6.004751776345074e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4918, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.875, "completions/mean_length": 75.28125190734863, "completions/min_length": 24.625, "epoch": 9.778356912385208, "grad_norm": 0.0032506017737627545, "kl": 0.06988525390625, "learning_rate": 1.2258121587726545e-09, "loss": 6.982425111345947e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4919, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.875, "completions/mean_length": 62.01041841506958, "completions/min_length": 21.5, "epoch": 9.780342516753537, "grad_norm": 0.004341238898533917, "kl": 0.064849853515625, "learning_rate": 1.2038337701485435e-09, "loss": 6.488948565674946e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4920, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 66.50000238418579, "completions/min_length": 24.125, "epoch": 9.782328121121866, "grad_norm": 0.002727750978269722, "kl": 0.0704345703125, "learning_rate": 1.1820539629322501e-09, "loss": 7.044737867545336e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4921, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.875, "completions/mean_length": 65.29166889190674, "completions/min_length": 21.5, "epoch": 9.784313725490197, "grad_norm": 0.9147985395637744, "kl": 0.076416015625, "learning_rate": 1.1604727457947828e-09, "loss": -0.007903304882347584, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4922, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 65.15625190734863, "completions/min_length": 18.0, "epoch": 9.786299329858526, "grad_norm": 0.0038524035881175485, "kl": 0.0762939453125, "learning_rate": 1.1390901273280462e-09, "loss": 7.630523759871721e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4923, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 69.18750333786011, "completions/min_length": 23.0, "epoch": 9.788284934226855, "grad_norm": 0.004470685652187916, "kl": 0.053466796875, "learning_rate": 1.1179061160450088e-09, "loss": 5.347974001779221e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4924, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 70.09375238418579, "completions/min_length": 26.625, "epoch": 9.790270538595184, "grad_norm": 0.0025386537513485915, "kl": 0.073272705078125, "learning_rate": 1.0969207203793685e-09, "loss": 7.335464761126786e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4925, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.375, "completions/mean_length": 69.17708492279053, "completions/min_length": 17.125, "epoch": 9.792256142963515, "grad_norm": 0.004527896516384505, "kl": 0.08233642578125, "learning_rate": 1.0761339486859422e-09, "loss": 8.237495785579085e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4926, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/mean_length": 65.95833396911621, "completions/min_length": 24.375, "epoch": 9.794241747331844, "grad_norm": 0.0033412006027033793, "kl": 0.0540771484375, "learning_rate": 1.0555458092403325e-09, "loss": 5.405517731560394e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4927, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.375, "completions/mean_length": 73.00000143051147, "completions/min_length": 26.25, "epoch": 9.796227351700173, "grad_norm": 0.0030554930211831146, "kl": 0.0791015625, "learning_rate": 1.0351563102392048e-09, "loss": 7.910649583209306e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4928, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.5, "completions/mean_length": 74.52083444595337, "completions/min_length": 27.75, "epoch": 9.798212956068504, "grad_norm": 0.003961981033338274, "kl": 0.0574951171875, "learning_rate": 1.0149654597999545e-09, "loss": 5.749250703956932e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4929, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.375, "completions/mean_length": 61.87500238418579, "completions/min_length": 21.375, "epoch": 9.800198560436833, "grad_norm": 0.0038670682705185125, "kl": 0.07818603515625, "learning_rate": 9.949732659609854e-10, "loss": 7.819194433977827e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4930, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 72.80208444595337, "completions/min_length": 32.0, "epoch": 9.802184164805162, "grad_norm": 0.0035007702358087417, "kl": 0.0740966796875, "learning_rate": 9.751797366817083e-10, "loss": 7.414381252601743e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4931, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.25, "completions/mean_length": 75.177086353302, "completions/min_length": 28.625, "epoch": 9.804169769173493, "grad_norm": 0.0028782338321037843, "kl": 0.073333740234375, "learning_rate": 9.555848798423195e-10, "loss": 7.33111664885655e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4932, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.375, "completions/mean_length": 61.53125286102295, "completions/min_length": 20.875, "epoch": 9.806155373541822, "grad_norm": 0.004812318773267642, "kl": 0.0544891357421875, "learning_rate": 9.361887032438564e-10, "loss": 5.44918148079887e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4933, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 69.50000333786011, "completions/min_length": 21.375, "epoch": 9.808140977910151, "grad_norm": 0.007386824070509152, "kl": 0.08892822265625, "learning_rate": 9.169912146084758e-10, "loss": 8.903484558686614e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4934, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 71.958336353302, "completions/min_length": 26.25, "epoch": 9.810126582278482, "grad_norm": 0.0025956159257214655, "kl": 0.05419921875, "learning_rate": 8.979924215790635e-10, "loss": 5.4165440815268084e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4935, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.125, "completions/mean_length": 61.46875238418579, "completions/min_length": 21.875, "epoch": 9.81211218664681, "grad_norm": 0.0025961079585854205, "kl": 0.0545654296875, "learning_rate": 8.791923317194582e-10, "loss": 5.451752804219723e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4936, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.375, "completions/mean_length": 67.44791793823242, "completions/min_length": 25.0, "epoch": 9.81409779101514, "grad_norm": 0.002320192232239995, "kl": 0.05633544921875, "learning_rate": 8.605909525143396e-10, "loss": 5.624774712487124e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4937, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.875, "completions/mean_length": 71.92708444595337, "completions/min_length": 29.0, "epoch": 9.816083395383469, "grad_norm": 0.004987222309842723, "kl": 0.065155029296875, "learning_rate": 8.421882913692835e-10, "loss": 6.519594171550125e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4938, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.125, "completions/mean_length": 60.50000190734863, "completions/min_length": 16.75, "epoch": 9.8180689997518, "grad_norm": 0.003964087890630469, "kl": 0.0623779296875, "learning_rate": 8.239843556108739e-10, "loss": 6.227349513210356e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4939, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.875, "completions/mean_length": 70.01041841506958, "completions/min_length": 21.75, "epoch": 9.820054604120129, "grad_norm": 0.003700480245563324, "kl": 0.061859130859375, "learning_rate": 8.059791524864801e-10, "loss": 6.183989171404392e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4940, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.375, "completions/mean_length": 71.92708587646484, "completions/min_length": 29.25, "epoch": 9.822040208488458, "grad_norm": 0.003118603510522461, "kl": 0.07806396484375, "learning_rate": 7.881726891642571e-10, "loss": 7.805461063981056e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4941, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 68.65625190734863, "completions/min_length": 25.25, "epoch": 9.824025812856789, "grad_norm": 0.004830495818646426, "kl": 0.075469970703125, "learning_rate": 7.705649727334784e-10, "loss": 7.542136154370382e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4942, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 61.239585876464844, "completions/min_length": 20.0, "epoch": 9.826011417225118, "grad_norm": 0.250625416287303, "kl": 0.228759765625, "learning_rate": 7.531560102040368e-10, "loss": 0.00022838378208689392, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4943, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.875, "completions/mean_length": 68.89583492279053, "completions/min_length": 21.0, "epoch": 9.827997021593447, "grad_norm": 0.004143987166351701, "kl": 0.063751220703125, "learning_rate": 7.359458085068327e-10, "loss": 6.372031202772632e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4944, "train_speed(iter/s)": 0.022604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.75, "completions/mean_length": 77.02083587646484, "completions/min_length": 23.75, "epoch": 9.829982625961778, "grad_norm": 0.00812242766943953, "kl": 0.086761474609375, "learning_rate": 7.189343744936627e-10, "loss": 8.672293915878981e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4945, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.625, "completions/mean_length": 73.32291984558105, "completions/min_length": 27.75, "epoch": 9.831968230330107, "grad_norm": 0.003069944067159677, "kl": 0.055694580078125, "learning_rate": 7.021217149371095e-10, "loss": 5.569959103013389e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4946, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.75, "completions/mean_length": 80.520836353302, "completions/min_length": 31.875, "epoch": 9.833953834698436, "grad_norm": 0.0029404915752553845, "kl": 0.066192626953125, "learning_rate": 6.855078365306521e-10, "loss": 6.622498767683282e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4947, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.875, "completions/mean_length": 57.11458492279053, "completions/min_length": 19.5, "epoch": 9.835939439066767, "grad_norm": 0.006216118937372336, "kl": 0.072662353515625, "learning_rate": 6.690927458886109e-10, "loss": 7.26415601093322e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4948, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.25, "completions/mean_length": 71.93750190734863, "completions/min_length": 23.875, "epoch": 9.837925043435096, "grad_norm": 0.003586536611286647, "kl": 0.09130859375, "learning_rate": 6.52876449546258e-10, "loss": 9.13163967197761e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4949, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.375, "completions/mean_length": 67.66666746139526, "completions/min_length": 25.875, "epoch": 9.839910647803425, "grad_norm": 0.002208604457820552, "kl": 0.05267333984375, "learning_rate": 6.368589539595959e-10, "loss": 5.267279266263358e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4950, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.375, "completions/mean_length": 71.05208492279053, "completions/min_length": 26.5, "epoch": 9.841896252171754, "grad_norm": 0.0029101850324022354, "kl": 0.06866455078125, "learning_rate": 6.21040265505468e-10, "loss": 6.870054494356737e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4951, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.75, "completions/mean_length": 69.93750238418579, "completions/min_length": 19.75, "epoch": 9.843881856540085, "grad_norm": 0.006810479758389723, "kl": 0.07965087890625, "learning_rate": 6.054203904817812e-10, "loss": 7.961005030665547e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4952, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 71.52083539962769, "completions/min_length": 21.125, "epoch": 9.845867460908414, "grad_norm": 0.00233024970663714, "kl": 0.061920166015625, "learning_rate": 5.899993351070054e-10, "loss": 6.190160638652742e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4953, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.5, "completions/mean_length": 73.84375238418579, "completions/min_length": 19.375, "epoch": 9.847853065276743, "grad_norm": 0.003606115658858581, "kl": 0.062957763671875, "learning_rate": 5.747771055206741e-10, "loss": 6.295074126683176e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4954, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.875, "completions/mean_length": 66.42708444595337, "completions/min_length": 24.375, "epoch": 9.849838669645074, "grad_norm": 0.004113093807762768, "kl": 0.07177734375, "learning_rate": 5.597537077830505e-10, "loss": 7.180670218076557e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4955, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 63.62500238418579, "completions/min_length": 25.375, "epoch": 9.851824274013403, "grad_norm": 0.0029272398565606387, "kl": 0.060760498046875, "learning_rate": 5.449291478752394e-10, "loss": 6.069597293389961e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4956, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 77.5729193687439, "completions/min_length": 31.5, "epoch": 9.853809878381732, "grad_norm": 0.00364253130880136, "kl": 0.07098388671875, "learning_rate": 5.303034316992417e-10, "loss": 7.088730490067974e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4957, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.125, "completions/mean_length": 61.01041841506958, "completions/min_length": 17.25, "epoch": 9.855795482750063, "grad_norm": 0.0044567901921322975, "kl": 0.0609130859375, "learning_rate": 5.158765650778996e-10, "loss": 6.0853693867102265e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4958, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.75, "completions/mean_length": 63.92708444595337, "completions/min_length": 18.625, "epoch": 9.857781087118392, "grad_norm": 0.0034395284342734057, "kl": 0.05609130859375, "learning_rate": 5.01648553754841e-10, "loss": 5.60905973543413e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4959, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 72.43750190734863, "completions/min_length": 26.75, "epoch": 9.85976669148672, "grad_norm": 0.0022682243629432905, "kl": 0.05438232421875, "learning_rate": 4.876194033945347e-10, "loss": 5.437176878331229e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4960, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 60.75000190734863, "completions/min_length": 23.0, "epoch": 9.861752295855052, "grad_norm": 0.004860945948056546, "kl": 0.052154541015625, "learning_rate": 4.737891195822352e-10, "loss": 5.2177434554323554e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4961, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 79.11458587646484, "completions/min_length": 25.375, "epoch": 9.86373790022338, "grad_norm": 0.002741801260354241, "kl": 0.07061767578125, "learning_rate": 4.601577078242047e-10, "loss": 7.055123569443822e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4962, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 69.23958539962769, "completions/min_length": 20.375, "epoch": 9.86572350459171, "grad_norm": 0.002916452726004066, "kl": 0.0592041015625, "learning_rate": 4.4672517354721326e-10, "loss": 5.9139008953934535e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4963, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 69.09375190734863, "completions/min_length": 20.875, "epoch": 9.867709108960039, "grad_norm": 0.0025810766734490885, "kl": 0.055572509765625, "learning_rate": 4.334915220992053e-10, "loss": 5.548813351197168e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4964, "train_speed(iter/s)": 0.022603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.875, "completions/mean_length": 83.06250095367432, "completions/min_length": 29.25, "epoch": 9.86969471332837, "grad_norm": 0.004265449261202776, "kl": 0.064239501953125, "learning_rate": 4.204567587486885e-10, "loss": 6.421327998396009e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4965, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.75, "completions/mean_length": 70.1666693687439, "completions/min_length": 19.5, "epoch": 9.871680317696699, "grad_norm": 0.0023019412380832793, "kl": 0.054412841796875, "learning_rate": 4.076208886851229e-10, "loss": 5.441211033030413e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4966, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 65.79166889190674, "completions/min_length": 19.625, "epoch": 9.873665922065028, "grad_norm": 0.00466966472491242, "kl": 0.071746826171875, "learning_rate": 3.949839170187541e-10, "loss": 7.168490265030414e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4967, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.625, "completions/mean_length": 70.17708492279053, "completions/min_length": 30.375, "epoch": 9.875651526433359, "grad_norm": 0.04236118095267577, "kl": 0.088104248046875, "learning_rate": 3.8254584878055773e-10, "loss": 8.810514555079862e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4968, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.0, "completions/mean_length": 74.73958587646484, "completions/min_length": 28.5, "epoch": 9.877637130801688, "grad_norm": 0.0028984809971913626, "kl": 0.074920654296875, "learning_rate": 3.703066889224615e-10, "loss": 7.494209421565756e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4969, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 66.67708444595337, "completions/min_length": 20.125, "epoch": 9.879622735170017, "grad_norm": 0.0026172484586559075, "kl": 0.06500244140625, "learning_rate": 3.5826644231706784e-10, "loss": 6.509361992357299e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4970, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 68.98958539962769, "completions/min_length": 24.625, "epoch": 9.881608339538348, "grad_norm": 0.0030047013288678685, "kl": 0.05316162109375, "learning_rate": 3.4642511375798657e-10, "loss": 5.311940185492858e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4971, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 64.98958492279053, "completions/min_length": 24.5, "epoch": 9.883593943906677, "grad_norm": 0.002863542917715487, "kl": 0.094451904296875, "learning_rate": 3.3478270795933574e-10, "loss": 9.464035974815488e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4972, "train_speed(iter/s)": 0.022602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 71.86458683013916, "completions/min_length": 29.375, "epoch": 9.885579548275006, "grad_norm": 0.0027823123829232204, "kl": 0.057586669921875, "learning_rate": 3.2333922955635194e-10, "loss": 5.755884194513783e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4973, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 69.90625238418579, "completions/min_length": 23.875, "epoch": 9.887565152643337, "grad_norm": 0.003311938889952719, "kl": 0.064788818359375, "learning_rate": 3.120946831048354e-10, "loss": 6.473198300227523e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4974, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.625, "completions/mean_length": 73.69791889190674, "completions/min_length": 25.75, "epoch": 9.889550757011666, "grad_norm": 0.004843637806478451, "kl": 0.062835693359375, "learning_rate": 3.010490730815385e-10, "loss": 6.293550541158766e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4975, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.125, "completions/mean_length": 72.78125238418579, "completions/min_length": 29.5, "epoch": 9.891536361379995, "grad_norm": 0.00258890425231928, "kl": 0.067047119140625, "learning_rate": 2.9020240388388794e-10, "loss": 6.708676664857194e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4976, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 81.65625333786011, "completions/min_length": 28.5, "epoch": 9.893521965748324, "grad_norm": 0.002474235830371792, "kl": 0.062713623046875, "learning_rate": 2.7955467983026284e-10, "loss": 6.276718340814114e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4977, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.375, "completions/mean_length": 62.562501430511475, "completions/min_length": 16.875, "epoch": 9.895507570116655, "grad_norm": 1.566695079567521, "kl": 0.06591796875, "learning_rate": 2.6910590515966113e-10, "loss": 6.595502782147378e-05, "memory(GiB)": 94.21, "reward": 1.7708333432674408, "reward_std": 0.03227486088871956, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.12682486698031425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4978, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 80.21875238418579, "completions/min_length": 26.625, "epoch": 9.897493174484984, "grad_norm": 0.0023293936682199715, "kl": 0.06182861328125, "learning_rate": 2.588560840320331e-10, "loss": 6.178909097798169e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4979, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 70.69791841506958, "completions/min_length": 25.25, "epoch": 9.899478778853313, "grad_norm": 0.0036043351625637122, "kl": 0.058990478515625, "learning_rate": 2.4880522052800334e-10, "loss": 5.893385241506621e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4980, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.25, "completions/mean_length": 65.61458492279053, "completions/min_length": 16.875, "epoch": 9.901464383221644, "grad_norm": 0.0031271481790348596, "kl": 0.071380615234375, "learning_rate": 2.3895331864903776e-10, "loss": 7.129264122340828e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4981, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.375, "completions/mean_length": 66.50000095367432, "completions/min_length": 22.25, "epoch": 9.903449987589973, "grad_norm": 0.002770663657091112, "kl": 0.056396484375, "learning_rate": 2.293003823174433e-10, "loss": 5.6329437938984483e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4982, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.0, "completions/mean_length": 59.66666889190674, "completions/min_length": 21.0, "epoch": 9.905435591958302, "grad_norm": 0.012688918468588451, "kl": 0.0679931640625, "learning_rate": 2.198464153762014e-10, "loss": 6.803065480198711e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4983, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.25, "completions/mean_length": 86.96875190734863, "completions/min_length": 34.125, "epoch": 9.907421196326633, "grad_norm": 0.002783874945721273, "kl": 0.06610107421875, "learning_rate": 2.1059142158919018e-10, "loss": 6.611172284465283e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4984, "train_speed(iter/s)": 0.022599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.125, "completions/mean_length": 63.82291889190674, "completions/min_length": 21.5, "epoch": 9.909406800694962, "grad_norm": 0.003155548573825557, "kl": 0.07037353515625, "learning_rate": 2.015354046409623e-10, "loss": 7.04998237779364e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4985, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 62.70833444595337, "completions/min_length": 20.625, "epoch": 9.91139240506329, "grad_norm": 0.0037685532454678873, "kl": 0.065521240234375, "learning_rate": 1.926783681369115e-10, "loss": 6.549165118485689e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4986, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.125, "completions/mean_length": 70.73958587646484, "completions/min_length": 22.5, "epoch": 9.913378009431622, "grad_norm": 0.0026566020766932816, "kl": 0.0701904296875, "learning_rate": 1.840203156032727e-10, "loss": 7.013032882241532e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4987, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.75, "completions/mean_length": 85.82291793823242, "completions/min_length": 29.875, "epoch": 9.91536361379995, "grad_norm": 0.002659269109254934, "kl": 0.0943603515625, "learning_rate": 1.7556125048695525e-10, "loss": 9.437241533305496e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4988, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.625, "completions/mean_length": 60.04166889190674, "completions/min_length": 19.125, "epoch": 9.91734921816828, "grad_norm": 0.004663215058839115, "kl": 0.086212158203125, "learning_rate": 1.673011761557097e-10, "loss": 8.620247535873204e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4989, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 65.86458587646484, "completions/min_length": 27.0, "epoch": 9.919334822536609, "grad_norm": 0.002386957079413601, "kl": 0.060211181640625, "learning_rate": 1.5924009589801668e-10, "loss": 6.018680869601667e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4990, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.375, "completions/mean_length": 65.12500095367432, "completions/min_length": 24.25, "epoch": 9.92132042690494, "grad_norm": 0.0027547175043255586, "kl": 0.067901611328125, "learning_rate": 1.5137801292325336e-10, "loss": 6.7899476562161e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4991, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.125, "completions/mean_length": 60.083335876464844, "completions/min_length": 18.125, "epoch": 9.923306031273269, "grad_norm": 0.005328498802481913, "kl": 0.0599365234375, "learning_rate": 1.437149303613605e-10, "loss": 5.993892773403786e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4992, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.25, "completions/mean_length": 64.21875143051147, "completions/min_length": 21.0, "epoch": 9.925291635641598, "grad_norm": 0.003329082001985909, "kl": 0.056915283203125, "learning_rate": 1.362508512632865e-10, "loss": 5.6879543990362436e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4993, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.625, "completions/mean_length": 69.89583587646484, "completions/min_length": 20.125, "epoch": 9.927277240009929, "grad_norm": 0.0031193247306549403, "kl": 0.062896728515625, "learning_rate": 1.2898577860054327e-10, "loss": 6.286584539338946e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4994, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 67.25000190734863, "completions/min_length": 23.875, "epoch": 9.929262844378258, "grad_norm": 0.002466307950208977, "kl": 0.081451416015625, "learning_rate": 1.2191971526559485e-10, "loss": 8.159526623785496e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4995, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.375, "completions/mean_length": 67.15625238418579, "completions/min_length": 19.5, "epoch": 9.931248448746587, "grad_norm": 0.0030281324538418268, "kl": 0.05120849609375, "learning_rate": 1.1505266407157987e-10, "loss": 5.126166433910839e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4996, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 66.802086353302, "completions/min_length": 23.25, "epoch": 9.933234053114917, "grad_norm": 0.004728499306660028, "kl": 0.066741943359375, "learning_rate": 1.083846277523115e-10, "loss": 6.680757360300049e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4997, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 71.77083539962769, "completions/min_length": 26.625, "epoch": 9.935219657483247, "grad_norm": 0.0031677994834557223, "kl": 0.055572509765625, "learning_rate": 1.0191560896261053e-10, "loss": 5.560018325923011e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4998, "train_speed(iter/s)": 0.022601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.5, "completions/mean_length": 74.16666793823242, "completions/min_length": 20.125, "epoch": 9.937205261851576, "grad_norm": 0.0041143029368503085, "kl": 0.08026123046875, "learning_rate": 9.564561027791684e-11, "loss": 8.02710055722855e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.32639559358358383, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4999, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 69.90625190734863, "completions/min_length": 22.375, "epoch": 9.939190866219906, "grad_norm": 0.004320772430352747, "kl": 0.065521240234375, "learning_rate": 8.957463419434485e-11, "loss": 6.551534170284867e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5000, "train_speed(iter/s)": 0.0226 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 79.81250143051147, "completions/min_length": 24.375, "epoch": 9.941176470588236, "grad_norm": 0.003311299062815428, "kl": 0.06561279296875, "learning_rate": 8.370268312901662e-11, "loss": 6.555959407705814e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5001, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.125, "completions/mean_length": 68.21875095367432, "completions/min_length": 21.75, "epoch": 9.943162074956565, "grad_norm": 0.0028011093755625142, "kl": 0.061798095703125, "learning_rate": 7.802975941956225e-11, "loss": 6.170413689687848e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5002, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.75, "completions/mean_length": 66.03125190734863, "completions/min_length": 23.0, "epoch": 9.945147679324894, "grad_norm": 0.0029730229984516257, "kl": 0.060546875, "learning_rate": 7.255586532456392e-11, "loss": 6.052818935131654e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5003, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 58.677085399627686, "completions/min_length": 18.875, "epoch": 9.947133283693224, "grad_norm": 0.003736969629975271, "kl": 0.0577392578125, "learning_rate": 6.728100302327844e-11, "loss": 5.776435136795044e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5004, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 63.26041793823242, "completions/min_length": 17.625, "epoch": 9.949118888061554, "grad_norm": 0.0029548581729983373, "kl": 0.05303955078125, "learning_rate": 6.220517461574815e-11, "loss": 5.301543205860071e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5005, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.125, "completions/mean_length": 63.56250190734863, "completions/min_length": 25.125, "epoch": 9.951104492429883, "grad_norm": 0.002751932672667616, "kl": 0.052032470703125, "learning_rate": 5.7328382122745487e-11, "loss": 5.20942521688994e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5006, "train_speed(iter/s)": 0.022598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 72.75000143051147, "completions/min_length": 25.125, "epoch": 9.953090096798213, "grad_norm": 0.0031973263226006974, "kl": 0.069610595703125, "learning_rate": 5.2650627485884005e-11, "loss": 6.958014273550361e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5007, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.625, "completions/mean_length": 70.23958539962769, "completions/min_length": 24.75, "epoch": 9.955075701166543, "grad_norm": 0.0023261012574901053, "kl": 0.074066162109375, "learning_rate": 4.8171912567396275e-11, "loss": 7.407159137073904e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5008, "train_speed(iter/s)": 0.022598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.375, "completions/mean_length": 56.78125190734863, "completions/min_length": 19.875, "epoch": 9.957061305534872, "grad_norm": 0.0030203454945909717, "kl": 0.06207275390625, "learning_rate": 4.3892239150411514e-11, "loss": 6.200814095791429e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5009, "train_speed(iter/s)": 0.022598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.625, "completions/mean_length": 69.86458539962769, "completions/min_length": 26.5, "epoch": 9.959046909903202, "grad_norm": 0.0028704947241669477, "kl": 0.06414794921875, "learning_rate": 3.981160893873348e-11, "loss": 6.415010284399614e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5010, "train_speed(iter/s)": 0.022598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/mean_length": 73.86458492279053, "completions/min_length": 23.75, "epoch": 9.961032514271531, "grad_norm": 0.003954299585060534, "kl": 0.070037841796875, "learning_rate": 3.593002355695152e-11, "loss": 6.996475713094696e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5011, "train_speed(iter/s)": 0.022598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.625, "completions/mean_length": 69.02083539962769, "completions/min_length": 25.125, "epoch": 9.96301811863986, "grad_norm": 0.0031736604836958876, "kl": 0.0596923828125, "learning_rate": 3.224748455038506e-11, "loss": 5.9735488321166486e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5012, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 69.61458539962769, "completions/min_length": 24.0, "epoch": 9.965003723008191, "grad_norm": 0.0048170577250103335, "kl": 0.057830810546875, "learning_rate": 2.876399338519464e-11, "loss": 5.783676169812679e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5013, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 68.30208539962769, "completions/min_length": 18.875, "epoch": 9.96698932737652, "grad_norm": 0.003635558580733918, "kl": 0.0771484375, "learning_rate": 2.5479551448215342e-11, "loss": 7.724007446086034e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5014, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.25, "completions/mean_length": 79.06250238418579, "completions/min_length": 30.625, "epoch": 9.96897493174485, "grad_norm": 0.003912979604279716, "kl": 0.058502197265625, "learning_rate": 2.2394160047012332e-11, "loss": 5.846464046044275e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5015, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.5, "completions/mean_length": 87.96875238418579, "completions/min_length": 24.25, "epoch": 9.970960536113179, "grad_norm": 0.0030208456802302613, "kl": 0.066802978515625, "learning_rate": 1.950782040993637e-11, "loss": 6.677482451777905e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5016, "train_speed(iter/s)": 0.022596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.875, "completions/mean_length": 82.66666841506958, "completions/min_length": 33.0, "epoch": 9.97294614048151, "grad_norm": 0.0038933502705710736, "kl": 0.068206787109375, "learning_rate": 1.6820533686179306e-11, "loss": 6.82783720549196e-05, "memory(GiB)": 94.21, "reward": 1.8125, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5017, "train_speed(iter/s)": 0.022596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.125, "completions/mean_length": 58.520835399627686, "completions/min_length": 20.375, "epoch": 9.974931744849838, "grad_norm": 0.0030747908520010647, "kl": 0.0493621826171875, "learning_rate": 1.4332300945552045e-11, "loss": 4.9329944886267185e-05, "memory(GiB)": 94.21, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.26111647486686707, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5018, "train_speed(iter/s)": 0.022596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 66.68750286102295, "completions/min_length": 21.5, "epoch": 9.976917349218168, "grad_norm": 0.0031566961583329427, "kl": 0.071258544921875, "learning_rate": 1.2043123178651082e-11, "loss": 7.123083923943341e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5019, "train_speed(iter/s)": 0.022597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 73.39583587646484, "completions/min_length": 29.125, "epoch": 9.978902953586498, "grad_norm": 0.0030517392417054394, "kl": 0.074005126953125, "learning_rate": 9.95300129691401e-12, "loss": 7.393911073449999e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5020, "train_speed(iter/s)": 0.022596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 67.25000238418579, "completions/min_length": 18.75, "epoch": 9.980888557954827, "grad_norm": 0.00314708597922162, "kl": 0.0772705078125, "learning_rate": 8.061936132397473e-12, "loss": 7.719548011664301e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5021, "train_speed(iter/s)": 0.022596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.875, "completions/mean_length": 69.01041841506958, "completions/min_length": 22.25, "epoch": 9.982874162323156, "grad_norm": 0.0037940584353413995, "kl": 0.055023193359375, "learning_rate": 6.369928438054728e-12, "loss": 5.4935295338509604e-05, "memory(GiB)": 94.21, "reward": 1.6875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6875, "rewards/CineAccuracyORM/std": 0.1958373561501503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5022, "train_speed(iter/s)": 0.022595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.875, "completions/mean_length": 71.22916889190674, "completions/min_length": 25.625, "epoch": 9.984859766691487, "grad_norm": 0.002759925815876402, "kl": 0.08148193359375, "learning_rate": 4.876978887402572e-12, "loss": 8.154282113537192e-05, "memory(GiB)": 94.21, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5023, "train_speed(iter/s)": 0.022595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 68.32291841506958, "completions/min_length": 19.375, "epoch": 9.986845371059816, "grad_norm": 0.004699794446605444, "kl": 0.0875244140625, "learning_rate": 3.5830880749099237e-12, "loss": 8.739631448406726e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5024, "train_speed(iter/s)": 0.022595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 69.64583587646484, "completions/min_length": 26.125, "epoch": 9.988830975428145, "grad_norm": 0.002320346328278786, "kl": 0.05517578125, "learning_rate": 2.4882565156647552e-12, "loss": 5.5175456509459764e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5025, "train_speed(iter/s)": 0.022595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.5, "completions/mean_length": 70.52083492279053, "completions/min_length": 21.5, "epoch": 9.990816579796476, "grad_norm": 1.5461457951612276, "kl": 0.068878173828125, "learning_rate": 1.592484645540626e-12, "loss": -0.013513864949345589, "memory(GiB)": 94.21, "reward": 1.8645833432674408, "reward_std": 0.025515519082546234, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.16664262861013412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5026, "train_speed(iter/s)": 0.022595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.625, "completions/mean_length": 66.52083492279053, "completions/min_length": 21.75, "epoch": 9.992802184164805, "grad_norm": 0.0023987120296745787, "kl": 0.06939697265625, "learning_rate": 8.957728211411719e-13, "loss": 6.932149699423462e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5027, "train_speed(iter/s)": 0.022595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.625, "completions/mean_length": 66.68750143051147, "completions/min_length": 28.375, "epoch": 9.994787788533134, "grad_norm": 0.003938453124709751, "kl": 0.059234619140625, "learning_rate": 3.9812131985561547e-13, "loss": 5.927008896833286e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5028, "train_speed(iter/s)": 0.022595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.5, "completions/mean_length": 70.76041889190674, "completions/min_length": 24.5, "epoch": 9.996773392901463, "grad_norm": 0.005103637817761054, "kl": 0.067138671875, "learning_rate": 9.953033985876658e-14, "loss": 6.719196971971542e-05, "memory(GiB)": 94.21, "reward": 1.875, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.13055823743343353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5029, "train_speed(iter/s)": 0.022595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 67.50000238418579, "completions/min_length": 26.125, "epoch": 9.998758997269794, "grad_norm": 0.002779238466724921, "kl": 0.0516357421875, "learning_rate": 0.0, "loss": 5.166804476175457e-05, "memory(GiB)": 94.21, "reward": 1.9375, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.06527911871671677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5030, "train_speed(iter/s)": 0.022595 } ], "logging_steps": 1, "max_steps": 5030, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }