Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 192734, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010376996274658336, | |
| "grad_norm": 2.8394250869750977, | |
| "learning_rate": 0.0002998972677368809, | |
| "loss": 7.61689697265625, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0020753992549316673, | |
| "grad_norm": 1.2515239715576172, | |
| "learning_rate": 0.00029979349777413427, | |
| "loss": 6.9781201171875, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0031130988823975013, | |
| "grad_norm": 3.6369314193725586, | |
| "learning_rate": 0.0002996897278113877, | |
| "loss": 6.69011474609375, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0041507985098633345, | |
| "grad_norm": 2.6945459842681885, | |
| "learning_rate": 0.0002995859578486411, | |
| "loss": 6.55205078125, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.005188498137329169, | |
| "grad_norm": 1.4870922565460205, | |
| "learning_rate": 0.0002994821878858945, | |
| "loss": 6.272236938476563, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.006226197764795003, | |
| "grad_norm": 2.198580265045166, | |
| "learning_rate": 0.00029937841792314796, | |
| "loss": 6.2509613037109375, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.007263897392260836, | |
| "grad_norm": 1.332912564277649, | |
| "learning_rate": 0.00029927464796040135, | |
| "loss": 6.2750787353515625, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.008301597019726669, | |
| "grad_norm": 1.6891261339187622, | |
| "learning_rate": 0.0002991708779976548, | |
| "loss": 6.012156372070312, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.009339296647192503, | |
| "grad_norm": 2.389779806137085, | |
| "learning_rate": 0.0002990671080349082, | |
| "loss": 6.011610717773437, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.010376996274658337, | |
| "grad_norm": 3.896207332611084, | |
| "learning_rate": 0.0002989633380721616, | |
| "loss": 5.872296752929688, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.011414695902124171, | |
| "grad_norm": 1.2714102268218994, | |
| "learning_rate": 0.00029885956810941504, | |
| "loss": 5.8444580078125, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.012452395529590005, | |
| "grad_norm": 1.9793014526367188, | |
| "learning_rate": 0.00029875579814666844, | |
| "loss": 5.780259399414063, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01349009515705584, | |
| "grad_norm": 1.7210673093795776, | |
| "learning_rate": 0.0002986520281839219, | |
| "loss": 5.784580688476563, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.014527794784521672, | |
| "grad_norm": 3.133103609085083, | |
| "learning_rate": 0.0002985482582211753, | |
| "loss": 5.726546020507812, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.015565494411987506, | |
| "grad_norm": 3.7988669872283936, | |
| "learning_rate": 0.0002984444882584287, | |
| "loss": 5.659859619140625, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.016603194039453338, | |
| "grad_norm": 1.580628514289856, | |
| "learning_rate": 0.00029834071829568207, | |
| "loss": 5.710869140625, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.017640893666919174, | |
| "grad_norm": 2.1428017616271973, | |
| "learning_rate": 0.0002982369483329355, | |
| "loss": 5.61485107421875, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.018678593294385006, | |
| "grad_norm": 1.9413044452667236, | |
| "learning_rate": 0.00029813317837018897, | |
| "loss": 5.542117309570313, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.019716292921850842, | |
| "grad_norm": 1.9118558168411255, | |
| "learning_rate": 0.00029802940840744236, | |
| "loss": 5.524238891601563, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.020753992549316674, | |
| "grad_norm": 1.9226549863815308, | |
| "learning_rate": 0.00029792563844469576, | |
| "loss": 5.544407348632813, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02179169217678251, | |
| "grad_norm": 3.6845390796661377, | |
| "learning_rate": 0.00029782186848194915, | |
| "loss": 5.507258911132812, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.022829391804248342, | |
| "grad_norm": 1.113272786140442, | |
| "learning_rate": 0.0002977180985192026, | |
| "loss": 5.420562133789063, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.023867091431714175, | |
| "grad_norm": 1.05723237991333, | |
| "learning_rate": 0.00029761432855645605, | |
| "loss": 5.467652587890625, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.02490479105918001, | |
| "grad_norm": 3.3967299461364746, | |
| "learning_rate": 0.00029751055859370944, | |
| "loss": 5.412258911132812, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.025942490686645843, | |
| "grad_norm": 2.4142208099365234, | |
| "learning_rate": 0.0002974067886309629, | |
| "loss": 5.421605224609375, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.02698019031411168, | |
| "grad_norm": 1.577314853668213, | |
| "learning_rate": 0.0002973030186682163, | |
| "loss": 5.2732666015625, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.02801788994157751, | |
| "grad_norm": 2.5680480003356934, | |
| "learning_rate": 0.0002971992487054697, | |
| "loss": 5.42623779296875, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.029055589569043343, | |
| "grad_norm": 1.665701150894165, | |
| "learning_rate": 0.0002970954787427231, | |
| "loss": 5.345192260742188, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.03009328919650918, | |
| "grad_norm": 1.3420246839523315, | |
| "learning_rate": 0.0002969917087799765, | |
| "loss": 5.259754028320312, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.03113098882397501, | |
| "grad_norm": 1.4943575859069824, | |
| "learning_rate": 0.00029688793881723, | |
| "loss": 5.325694580078125, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.032168688451440844, | |
| "grad_norm": 1.7797436714172363, | |
| "learning_rate": 0.00029678416885448337, | |
| "loss": 5.393818359375, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.033206388078906676, | |
| "grad_norm": 3.023359537124634, | |
| "learning_rate": 0.00029668039889173677, | |
| "loss": 5.23187255859375, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.034244087706372515, | |
| "grad_norm": 1.9899531602859497, | |
| "learning_rate": 0.00029657662892899016, | |
| "loss": 5.1434765625, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.03528178733383835, | |
| "grad_norm": 1.0039557218551636, | |
| "learning_rate": 0.0002964728589662436, | |
| "loss": 5.28422607421875, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.03631948696130418, | |
| "grad_norm": 1.9204686880111694, | |
| "learning_rate": 0.000296369089003497, | |
| "loss": 5.149194946289063, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.03735718658877001, | |
| "grad_norm": 1.5530883073806763, | |
| "learning_rate": 0.00029626531904075045, | |
| "loss": 5.0889456176757815, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.03839488621623585, | |
| "grad_norm": 1.4477442502975464, | |
| "learning_rate": 0.00029616154907800385, | |
| "loss": 5.225645751953125, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.039432585843701684, | |
| "grad_norm": 2.998966693878174, | |
| "learning_rate": 0.00029605777911525724, | |
| "loss": 5.127691650390625, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.040470285471167516, | |
| "grad_norm": 1.1760146617889404, | |
| "learning_rate": 0.0002959540091525107, | |
| "loss": 5.099805908203125, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.04150798509863335, | |
| "grad_norm": 1.6684191226959229, | |
| "learning_rate": 0.0002958502391897641, | |
| "loss": 5.195625, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.04254568472609918, | |
| "grad_norm": 3.276620864868164, | |
| "learning_rate": 0.00029574646922701754, | |
| "loss": 5.0514678955078125, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.04358338435356502, | |
| "grad_norm": 1.505712628364563, | |
| "learning_rate": 0.00029564269926427093, | |
| "loss": 5.234470825195313, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.04462108398103085, | |
| "grad_norm": 1.561785101890564, | |
| "learning_rate": 0.0002955389293015243, | |
| "loss": 5.18435302734375, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.045658783608496685, | |
| "grad_norm": 2.103935956954956, | |
| "learning_rate": 0.0002954351593387778, | |
| "loss": 5.127916259765625, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.04669648323596252, | |
| "grad_norm": 1.1984394788742065, | |
| "learning_rate": 0.00029533138937603117, | |
| "loss": 5.009371032714844, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.04773418286342835, | |
| "grad_norm": 1.35122549533844, | |
| "learning_rate": 0.0002952276194132846, | |
| "loss": 4.988144836425781, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.04877188249089419, | |
| "grad_norm": 1.7199909687042236, | |
| "learning_rate": 0.000295123849450538, | |
| "loss": 5.139700317382813, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.04980958211836002, | |
| "grad_norm": 2.299783706665039, | |
| "learning_rate": 0.00029502007948779146, | |
| "loss": 5.189196166992187, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.050847281745825854, | |
| "grad_norm": 1.251342535018921, | |
| "learning_rate": 0.00029491630952504486, | |
| "loss": 5.0067724609375, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.051884981373291686, | |
| "grad_norm": 1.7228055000305176, | |
| "learning_rate": 0.00029481253956229825, | |
| "loss": 5.058696594238281, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.05292268100075752, | |
| "grad_norm": 1.2999722957611084, | |
| "learning_rate": 0.0002947087695995517, | |
| "loss": 4.953595275878906, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.05396038062822336, | |
| "grad_norm": 2.576788902282715, | |
| "learning_rate": 0.0002946049996368051, | |
| "loss": 4.935113220214844, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.05499808025568919, | |
| "grad_norm": 3.006600856781006, | |
| "learning_rate": 0.00029450122967405854, | |
| "loss": 5.13054931640625, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.05603577988315502, | |
| "grad_norm": 1.5450797080993652, | |
| "learning_rate": 0.00029439745971131194, | |
| "loss": 4.888633117675782, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.057073479510620855, | |
| "grad_norm": 1.9071307182312012, | |
| "learning_rate": 0.00029429368974856533, | |
| "loss": 4.968219299316406, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.05811117913808669, | |
| "grad_norm": 1.2374857664108276, | |
| "learning_rate": 0.0002941899197858188, | |
| "loss": 5.0035269165039065, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.059148878765552526, | |
| "grad_norm": 1.270337462425232, | |
| "learning_rate": 0.0002940861498230722, | |
| "loss": 4.9964404296875, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.06018657839301836, | |
| "grad_norm": 2.112285614013672, | |
| "learning_rate": 0.0002939823798603256, | |
| "loss": 4.882070007324219, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.06122427802048419, | |
| "grad_norm": 1.2048200368881226, | |
| "learning_rate": 0.000293878609897579, | |
| "loss": 4.689561767578125, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.06226197764795002, | |
| "grad_norm": 1.213274359703064, | |
| "learning_rate": 0.0002937748399348324, | |
| "loss": 4.969376525878906, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.06329967727541586, | |
| "grad_norm": 1.1453360319137573, | |
| "learning_rate": 0.00029367106997208587, | |
| "loss": 4.848797302246094, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.06433737690288169, | |
| "grad_norm": 1.78568696975708, | |
| "learning_rate": 0.00029356730000933926, | |
| "loss": 4.889250793457031, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.06537507653034752, | |
| "grad_norm": 1.004668951034546, | |
| "learning_rate": 0.0002934635300465927, | |
| "loss": 4.881064758300782, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.06641277615781335, | |
| "grad_norm": 3.34089994430542, | |
| "learning_rate": 0.0002933597600838461, | |
| "loss": 4.922989501953125, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.0674504757852792, | |
| "grad_norm": 1.7132960557937622, | |
| "learning_rate": 0.00029325599012109955, | |
| "loss": 4.900790405273438, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.06848817541274503, | |
| "grad_norm": 3.6154215335845947, | |
| "learning_rate": 0.00029315222015835295, | |
| "loss": 4.858998718261719, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.06952587504021086, | |
| "grad_norm": 2.199787139892578, | |
| "learning_rate": 0.00029304845019560634, | |
| "loss": 4.776265258789063, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.0705635746676767, | |
| "grad_norm": 1.193831443786621, | |
| "learning_rate": 0.0002929446802328598, | |
| "loss": 4.933597717285156, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.07160127429514253, | |
| "grad_norm": 1.0364950895309448, | |
| "learning_rate": 0.0002928409102701132, | |
| "loss": 4.812368469238281, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.07263897392260836, | |
| "grad_norm": 4.54287576675415, | |
| "learning_rate": 0.00029273714030736664, | |
| "loss": 4.874449157714844, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.07367667355007419, | |
| "grad_norm": 1.9481868743896484, | |
| "learning_rate": 0.00029263337034462003, | |
| "loss": 4.836025390625, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.07471437317754002, | |
| "grad_norm": 1.5283995866775513, | |
| "learning_rate": 0.0002925296003818734, | |
| "loss": 4.789447631835937, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.07575207280500586, | |
| "grad_norm": 1.1243209838867188, | |
| "learning_rate": 0.0002924258304191268, | |
| "loss": 4.771495971679688, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.0767897724324717, | |
| "grad_norm": 1.2010672092437744, | |
| "learning_rate": 0.00029232206045638027, | |
| "loss": 4.796032104492188, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.07782747205993754, | |
| "grad_norm": 1.3179821968078613, | |
| "learning_rate": 0.0002922182904936337, | |
| "loss": 4.949848022460937, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.07886517168740337, | |
| "grad_norm": 2.766585111618042, | |
| "learning_rate": 0.0002921145205308871, | |
| "loss": 4.7913055419921875, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.0799028713148692, | |
| "grad_norm": 1.301639437675476, | |
| "learning_rate": 0.0002920107505681405, | |
| "loss": 4.828057556152344, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.08094057094233503, | |
| "grad_norm": 1.205676794052124, | |
| "learning_rate": 0.0002919069806053939, | |
| "loss": 4.7562734985351565, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.08197827056980087, | |
| "grad_norm": 2.1412694454193115, | |
| "learning_rate": 0.00029180321064264735, | |
| "loss": 4.7240576171875, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.0830159701972667, | |
| "grad_norm": 1.9297393560409546, | |
| "learning_rate": 0.0002916994406799008, | |
| "loss": 4.752750244140625, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.08405366982473253, | |
| "grad_norm": 1.5971039533615112, | |
| "learning_rate": 0.0002915956707171542, | |
| "loss": 4.7790225219726565, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.08509136945219836, | |
| "grad_norm": 1.4667614698410034, | |
| "learning_rate": 0.0002914919007544076, | |
| "loss": 4.823405151367187, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.0861290690796642, | |
| "grad_norm": 1.8018951416015625, | |
| "learning_rate": 0.000291388130791661, | |
| "loss": 4.806950378417969, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.08716676870713004, | |
| "grad_norm": 3.0917904376983643, | |
| "learning_rate": 0.00029128436082891443, | |
| "loss": 4.716513977050782, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.08820446833459587, | |
| "grad_norm": 1.8211461305618286, | |
| "learning_rate": 0.00029118059086616783, | |
| "loss": 4.803590393066406, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.0892421679620617, | |
| "grad_norm": 1.4940656423568726, | |
| "learning_rate": 0.0002910768209034213, | |
| "loss": 4.682643737792969, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.09027986758952754, | |
| "grad_norm": 1.432560682296753, | |
| "learning_rate": 0.00029097305094067473, | |
| "loss": 4.758638610839844, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.09131756721699337, | |
| "grad_norm": 1.0015602111816406, | |
| "learning_rate": 0.0002908692809779281, | |
| "loss": 4.829322204589844, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.0923552668444592, | |
| "grad_norm": 1.3050769567489624, | |
| "learning_rate": 0.0002907655110151815, | |
| "loss": 4.62219482421875, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.09339296647192503, | |
| "grad_norm": 1.0704928636550903, | |
| "learning_rate": 0.0002906617410524349, | |
| "loss": 4.6304998779296875, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.09443066609939087, | |
| "grad_norm": 2.2267684936523438, | |
| "learning_rate": 0.00029055797108968836, | |
| "loss": 4.664536437988281, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.0954683657268567, | |
| "grad_norm": 2.4608747959136963, | |
| "learning_rate": 0.00029045420112694176, | |
| "loss": 4.759125366210937, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.09650606535432253, | |
| "grad_norm": 1.5068875551223755, | |
| "learning_rate": 0.0002903504311641952, | |
| "loss": 4.665271606445312, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.09754376498178838, | |
| "grad_norm": 2.078646421432495, | |
| "learning_rate": 0.0002902466612014486, | |
| "loss": 4.739638671875, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.09858146460925421, | |
| "grad_norm": 1.3762885332107544, | |
| "learning_rate": 0.000290142891238702, | |
| "loss": 4.698047485351562, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.09961916423672004, | |
| "grad_norm": 1.2879425287246704, | |
| "learning_rate": 0.00029003912127595544, | |
| "loss": 4.619927673339844, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.10065686386418587, | |
| "grad_norm": 1.584159016609192, | |
| "learning_rate": 0.00028993535131320884, | |
| "loss": 4.748394165039063, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.10169456349165171, | |
| "grad_norm": 1.453415870666504, | |
| "learning_rate": 0.0002898315813504623, | |
| "loss": 4.62876220703125, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.10273226311911754, | |
| "grad_norm": 0.965919017791748, | |
| "learning_rate": 0.0002897278113877157, | |
| "loss": 4.665562438964844, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.10376996274658337, | |
| "grad_norm": 1.2607330083847046, | |
| "learning_rate": 0.0002896240414249691, | |
| "loss": 4.7940805053710935, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.1048076623740492, | |
| "grad_norm": 1.0126069784164429, | |
| "learning_rate": 0.0002895202714622225, | |
| "loss": 4.7508541870117185, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.10584536200151504, | |
| "grad_norm": 1.541813850402832, | |
| "learning_rate": 0.0002894165014994759, | |
| "loss": 4.57702880859375, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.10688306162898087, | |
| "grad_norm": 2.78938889503479, | |
| "learning_rate": 0.00028931273153672937, | |
| "loss": 4.652121887207032, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.10792076125644672, | |
| "grad_norm": 2.3567938804626465, | |
| "learning_rate": 0.00028920896157398276, | |
| "loss": 4.566509094238281, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.10895846088391255, | |
| "grad_norm": 1.0480419397354126, | |
| "learning_rate": 0.0002891051916112362, | |
| "loss": 4.611513977050781, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.10999616051137838, | |
| "grad_norm": 1.577042579650879, | |
| "learning_rate": 0.0002890014216484896, | |
| "loss": 4.62977783203125, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.11103386013884421, | |
| "grad_norm": 1.5839786529541016, | |
| "learning_rate": 0.000288897651685743, | |
| "loss": 4.569055786132813, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.11207155976631004, | |
| "grad_norm": 3.9769680500030518, | |
| "learning_rate": 0.00028879388172299645, | |
| "loss": 4.6786282348632815, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.11310925939377588, | |
| "grad_norm": 1.8089715242385864, | |
| "learning_rate": 0.00028869011176024985, | |
| "loss": 4.630350036621094, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.11414695902124171, | |
| "grad_norm": 1.4216063022613525, | |
| "learning_rate": 0.0002885863417975033, | |
| "loss": 4.669395751953125, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.11518465864870754, | |
| "grad_norm": 1.2107151746749878, | |
| "learning_rate": 0.0002884825718347567, | |
| "loss": 4.612738342285156, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.11622235827617337, | |
| "grad_norm": 1.5037158727645874, | |
| "learning_rate": 0.0002883788018720101, | |
| "loss": 4.534631958007813, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.1172600579036392, | |
| "grad_norm": 1.1375142335891724, | |
| "learning_rate": 0.00028827503190926353, | |
| "loss": 4.803286437988281, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.11829775753110505, | |
| "grad_norm": 1.8553053140640259, | |
| "learning_rate": 0.00028817126194651693, | |
| "loss": 4.684965515136719, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.11933545715857088, | |
| "grad_norm": 5.896717071533203, | |
| "learning_rate": 0.0002880674919837704, | |
| "loss": 4.533707275390625, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.12037315678603672, | |
| "grad_norm": 0.9495351910591125, | |
| "learning_rate": 0.0002879637220210238, | |
| "loss": 4.481864013671875, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.12141085641350255, | |
| "grad_norm": 1.2148685455322266, | |
| "learning_rate": 0.00028785995205827717, | |
| "loss": 4.508511047363282, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.12244855604096838, | |
| "grad_norm": 1.2658835649490356, | |
| "learning_rate": 0.0002877561820955306, | |
| "loss": 4.453274841308594, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.12348625566843421, | |
| "grad_norm": 1.0808942317962646, | |
| "learning_rate": 0.000287652412132784, | |
| "loss": 4.470396118164063, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.12452395529590005, | |
| "grad_norm": 2.0280075073242188, | |
| "learning_rate": 0.00028754864217003746, | |
| "loss": 4.629884643554687, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.12556165492336588, | |
| "grad_norm": 1.6987171173095703, | |
| "learning_rate": 0.00028744487220729086, | |
| "loss": 4.673434143066406, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.1265993545508317, | |
| "grad_norm": 1.076246976852417, | |
| "learning_rate": 0.00028734110224454425, | |
| "loss": 4.707933349609375, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.12763705417829754, | |
| "grad_norm": 1.4878133535385132, | |
| "learning_rate": 0.00028723733228179765, | |
| "loss": 4.649747924804688, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.12867475380576338, | |
| "grad_norm": 1.132073163986206, | |
| "learning_rate": 0.0002871335623190511, | |
| "loss": 4.510395812988281, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.1297124534332292, | |
| "grad_norm": 1.172968864440918, | |
| "learning_rate": 0.00028702979235630454, | |
| "loss": 4.7042324829101565, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.13075015306069504, | |
| "grad_norm": 1.331409215927124, | |
| "learning_rate": 0.00028692602239355794, | |
| "loss": 4.478284912109375, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.13178785268816087, | |
| "grad_norm": 0.9544440507888794, | |
| "learning_rate": 0.0002868222524308114, | |
| "loss": 4.574405517578125, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.1328255523156267, | |
| "grad_norm": 1.3560587167739868, | |
| "learning_rate": 0.0002867184824680648, | |
| "loss": 4.359691467285156, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.13386325194309256, | |
| "grad_norm": 1.4807325601577759, | |
| "learning_rate": 0.0002866147125053182, | |
| "loss": 4.541731872558594, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.1349009515705584, | |
| "grad_norm": 1.0621514320373535, | |
| "learning_rate": 0.00028651094254257157, | |
| "loss": 4.442927551269531, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.13593865119802423, | |
| "grad_norm": 0.9886642098426819, | |
| "learning_rate": 0.000286407172579825, | |
| "loss": 4.690697326660156, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.13697635082549006, | |
| "grad_norm": 1.9239803552627563, | |
| "learning_rate": 0.00028630340261707847, | |
| "loss": 4.497586669921875, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.1380140504529559, | |
| "grad_norm": 1.644500494003296, | |
| "learning_rate": 0.00028619963265433186, | |
| "loss": 4.598764038085937, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.13905175008042173, | |
| "grad_norm": 1.3600581884384155, | |
| "learning_rate": 0.00028609586269158526, | |
| "loss": 4.550304260253906, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.14008944970788756, | |
| "grad_norm": 1.4329279661178589, | |
| "learning_rate": 0.00028599209272883865, | |
| "loss": 4.506571960449219, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.1411271493353534, | |
| "grad_norm": 1.386486291885376, | |
| "learning_rate": 0.0002858883227660921, | |
| "loss": 4.419360046386719, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.14216484896281922, | |
| "grad_norm": 0.9777548909187317, | |
| "learning_rate": 0.00028578455280334555, | |
| "loss": 4.371921691894531, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.14320254859028506, | |
| "grad_norm": 1.323614239692688, | |
| "learning_rate": 0.00028568078284059895, | |
| "loss": 4.449886474609375, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.1442402482177509, | |
| "grad_norm": 2.0104715824127197, | |
| "learning_rate": 0.00028557701287785234, | |
| "loss": 4.498194885253906, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.14527794784521672, | |
| "grad_norm": 1.040453314781189, | |
| "learning_rate": 0.00028547324291510574, | |
| "loss": 4.410159301757813, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.14631564747268255, | |
| "grad_norm": 1.6704965829849243, | |
| "learning_rate": 0.0002853694729523592, | |
| "loss": 4.4047763061523435, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.14735334710014839, | |
| "grad_norm": 1.1640102863311768, | |
| "learning_rate": 0.0002852657029896126, | |
| "loss": 4.482722778320312, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.14839104672761422, | |
| "grad_norm": 1.5910676717758179, | |
| "learning_rate": 0.00028516193302686603, | |
| "loss": 4.464485473632813, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.14942874635508005, | |
| "grad_norm": 2.349853277206421, | |
| "learning_rate": 0.0002850581630641194, | |
| "loss": 4.478161010742188, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.15046644598254588, | |
| "grad_norm": 1.6594980955123901, | |
| "learning_rate": 0.0002849543931013728, | |
| "loss": 4.524984741210938, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.15150414561001171, | |
| "grad_norm": 1.0867830514907837, | |
| "learning_rate": 0.00028485062313862627, | |
| "loss": 4.444278259277343, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.15254184523747755, | |
| "grad_norm": 1.4026222229003906, | |
| "learning_rate": 0.00028474685317587966, | |
| "loss": 4.562846374511719, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.1535795448649434, | |
| "grad_norm": 1.7118810415267944, | |
| "learning_rate": 0.0002846430832131331, | |
| "loss": 4.434857177734375, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.15461724449240924, | |
| "grad_norm": 1.3377333879470825, | |
| "learning_rate": 0.0002845393132503865, | |
| "loss": 4.50284912109375, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.15565494411987507, | |
| "grad_norm": 1.0628588199615479, | |
| "learning_rate": 0.00028443554328763996, | |
| "loss": 4.467984924316406, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.1566926437473409, | |
| "grad_norm": 1.122900366783142, | |
| "learning_rate": 0.00028433177332489335, | |
| "loss": 4.477691650390625, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.15773034337480674, | |
| "grad_norm": 1.0721949338912964, | |
| "learning_rate": 0.00028422800336214675, | |
| "loss": 4.566653137207031, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.15876804300227257, | |
| "grad_norm": 2.0959179401397705, | |
| "learning_rate": 0.0002841242333994002, | |
| "loss": 4.459400939941406, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.1598057426297384, | |
| "grad_norm": 1.832321047782898, | |
| "learning_rate": 0.0002840204634366536, | |
| "loss": 4.441622009277344, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.16084344225720423, | |
| "grad_norm": 1.9756203889846802, | |
| "learning_rate": 0.00028391669347390704, | |
| "loss": 4.5193002319335935, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.16188114188467007, | |
| "grad_norm": 1.9734655618667603, | |
| "learning_rate": 0.00028381292351116043, | |
| "loss": 4.403963012695312, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.1629188415121359, | |
| "grad_norm": 1.0987114906311035, | |
| "learning_rate": 0.00028370915354841383, | |
| "loss": 4.3827951049804685, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.16395654113960173, | |
| "grad_norm": 1.0084813833236694, | |
| "learning_rate": 0.0002836053835856673, | |
| "loss": 4.431182861328125, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.16499424076706756, | |
| "grad_norm": 0.8771688342094421, | |
| "learning_rate": 0.00028350161362292067, | |
| "loss": 4.386305236816407, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.1660319403945334, | |
| "grad_norm": 1.960618495941162, | |
| "learning_rate": 0.0002833978436601741, | |
| "loss": 4.450301513671875, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.16706964002199923, | |
| "grad_norm": 2.016059398651123, | |
| "learning_rate": 0.0002832940736974275, | |
| "loss": 4.443774719238281, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.16810733964946506, | |
| "grad_norm": 2.1017072200775146, | |
| "learning_rate": 0.0002831903037346809, | |
| "loss": 4.387731323242187, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.1691450392769309, | |
| "grad_norm": 3.876704216003418, | |
| "learning_rate": 0.00028308653377193436, | |
| "loss": 4.339099731445312, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.17018273890439672, | |
| "grad_norm": 2.4443888664245605, | |
| "learning_rate": 0.00028298276380918776, | |
| "loss": 4.420601196289063, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.17122043853186256, | |
| "grad_norm": 2.2986700534820557, | |
| "learning_rate": 0.0002828789938464412, | |
| "loss": 4.574692687988281, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.1722581381593284, | |
| "grad_norm": 3.120959997177124, | |
| "learning_rate": 0.0002827752238836946, | |
| "loss": 4.3793856811523435, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.17329583778679422, | |
| "grad_norm": 3.928020715713501, | |
| "learning_rate": 0.00028267145392094805, | |
| "loss": 4.389268188476563, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.17433353741426008, | |
| "grad_norm": 1.5828691720962524, | |
| "learning_rate": 0.00028256768395820144, | |
| "loss": 4.353381652832031, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.1753712370417259, | |
| "grad_norm": 1.0565470457077026, | |
| "learning_rate": 0.00028246391399545484, | |
| "loss": 4.289037170410157, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.17640893666919175, | |
| "grad_norm": 1.7072774171829224, | |
| "learning_rate": 0.0002823601440327083, | |
| "loss": 4.325290832519531, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.17744663629665758, | |
| "grad_norm": 1.0402146577835083, | |
| "learning_rate": 0.0002822563740699617, | |
| "loss": 4.450514221191407, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.1784843359241234, | |
| "grad_norm": 1.4970057010650635, | |
| "learning_rate": 0.00028215260410721513, | |
| "loss": 4.393040161132813, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.17952203555158924, | |
| "grad_norm": 1.266546607017517, | |
| "learning_rate": 0.0002820488341444685, | |
| "loss": 4.276432800292969, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.18055973517905508, | |
| "grad_norm": 1.751590371131897, | |
| "learning_rate": 0.0002819450641817219, | |
| "loss": 4.40036376953125, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.1815974348065209, | |
| "grad_norm": 1.5430057048797607, | |
| "learning_rate": 0.00028184129421897537, | |
| "loss": 4.279835205078125, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.18263513443398674, | |
| "grad_norm": 4.205715179443359, | |
| "learning_rate": 0.00028173752425622876, | |
| "loss": 4.501398315429688, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.18367283406145257, | |
| "grad_norm": 2.2290608882904053, | |
| "learning_rate": 0.0002816337542934822, | |
| "loss": 4.400292053222656, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.1847105336889184, | |
| "grad_norm": 1.6409145593643188, | |
| "learning_rate": 0.0002815299843307356, | |
| "loss": 4.361965026855469, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.18574823331638424, | |
| "grad_norm": 1.235737919807434, | |
| "learning_rate": 0.000281426214367989, | |
| "loss": 4.4263699340820315, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.18678593294385007, | |
| "grad_norm": 1.8182483911514282, | |
| "learning_rate": 0.0002813224444052424, | |
| "loss": 4.38103759765625, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.1878236325713159, | |
| "grad_norm": 1.725359559059143, | |
| "learning_rate": 0.00028121867444249585, | |
| "loss": 4.332106323242187, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.18886133219878173, | |
| "grad_norm": 1.9186443090438843, | |
| "learning_rate": 0.0002811149044797493, | |
| "loss": 4.354175415039062, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.18989903182624757, | |
| "grad_norm": 1.1907823085784912, | |
| "learning_rate": 0.0002810111345170027, | |
| "loss": 4.521398315429687, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.1909367314537134, | |
| "grad_norm": 2.796095609664917, | |
| "learning_rate": 0.0002809073645542561, | |
| "loss": 4.280415649414063, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.19197443108117923, | |
| "grad_norm": 2.043811798095703, | |
| "learning_rate": 0.0002808035945915095, | |
| "loss": 4.364379272460938, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.19301213070864506, | |
| "grad_norm": 6.419173240661621, | |
| "learning_rate": 0.00028069982462876293, | |
| "loss": 4.420321044921875, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.1940498303361109, | |
| "grad_norm": 2.0183868408203125, | |
| "learning_rate": 0.0002805960546660163, | |
| "loss": 4.203153381347656, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.19508752996357676, | |
| "grad_norm": 1.1752562522888184, | |
| "learning_rate": 0.00028049228470326977, | |
| "loss": 4.362376098632812, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.1961252295910426, | |
| "grad_norm": 1.7152916193008423, | |
| "learning_rate": 0.0002803885147405232, | |
| "loss": 4.423097229003906, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.19716292921850842, | |
| "grad_norm": 0.8988032341003418, | |
| "learning_rate": 0.0002802847447777766, | |
| "loss": 4.291071166992188, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.19820062884597425, | |
| "grad_norm": 1.2874023914337158, | |
| "learning_rate": 0.00028018097481503, | |
| "loss": 4.257485046386718, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.19923832847344009, | |
| "grad_norm": 3.89581561088562, | |
| "learning_rate": 0.0002800772048522834, | |
| "loss": 4.355436401367188, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.20027602810090592, | |
| "grad_norm": 1.4264250993728638, | |
| "learning_rate": 0.00027997343488953686, | |
| "loss": 4.268387451171875, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.20131372772837175, | |
| "grad_norm": 2.3243231773376465, | |
| "learning_rate": 0.0002798696649267903, | |
| "loss": 4.248961791992188, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.20235142735583758, | |
| "grad_norm": 1.609995722770691, | |
| "learning_rate": 0.0002797658949640437, | |
| "loss": 4.299253845214844, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.20338912698330341, | |
| "grad_norm": 1.636496901512146, | |
| "learning_rate": 0.0002796621250012971, | |
| "loss": 4.379757690429687, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.20442682661076925, | |
| "grad_norm": 1.742827296257019, | |
| "learning_rate": 0.0002795583550385505, | |
| "loss": 4.298026733398437, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.20546452623823508, | |
| "grad_norm": 1.3360769748687744, | |
| "learning_rate": 0.00027945458507580394, | |
| "loss": 4.443134155273437, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.2065022258657009, | |
| "grad_norm": 1.5279536247253418, | |
| "learning_rate": 0.00027935081511305733, | |
| "loss": 4.3536380004882815, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.20753992549316674, | |
| "grad_norm": 1.2768709659576416, | |
| "learning_rate": 0.0002792470451503108, | |
| "loss": 4.420497741699219, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.20857762512063258, | |
| "grad_norm": 1.1040194034576416, | |
| "learning_rate": 0.0002791432751875642, | |
| "loss": 4.308759155273438, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.2096153247480984, | |
| "grad_norm": 1.5710710287094116, | |
| "learning_rate": 0.00027903950522481757, | |
| "loss": 4.188085021972657, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.21065302437556424, | |
| "grad_norm": 0.9058725237846375, | |
| "learning_rate": 0.000278935735262071, | |
| "loss": 4.162925720214844, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.21169072400303007, | |
| "grad_norm": 2.4681508541107178, | |
| "learning_rate": 0.0002788319652993244, | |
| "loss": 4.207759704589844, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.2127284236304959, | |
| "grad_norm": 1.7522861957550049, | |
| "learning_rate": 0.00027872819533657786, | |
| "loss": 4.448352355957031, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.21376612325796174, | |
| "grad_norm": 1.8361260890960693, | |
| "learning_rate": 0.00027862442537383126, | |
| "loss": 4.27260986328125, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.2148038228854276, | |
| "grad_norm": 1.7720355987548828, | |
| "learning_rate": 0.0002785206554110847, | |
| "loss": 4.315809326171875, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.21584152251289343, | |
| "grad_norm": 2.2454731464385986, | |
| "learning_rate": 0.0002784168854483381, | |
| "loss": 4.421763916015625, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.21687922214035926, | |
| "grad_norm": 2.7393276691436768, | |
| "learning_rate": 0.0002783131154855915, | |
| "loss": 4.268560791015625, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.2179169217678251, | |
| "grad_norm": 1.8933848142623901, | |
| "learning_rate": 0.00027820934552284495, | |
| "loss": 4.316322937011718, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.21895462139529093, | |
| "grad_norm": 1.2294155359268188, | |
| "learning_rate": 0.00027810557556009834, | |
| "loss": 4.247787780761719, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.21999232102275676, | |
| "grad_norm": 1.5950024127960205, | |
| "learning_rate": 0.0002780018055973518, | |
| "loss": 4.292718811035156, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.2210300206502226, | |
| "grad_norm": 0.9710947275161743, | |
| "learning_rate": 0.0002778980356346052, | |
| "loss": 4.238976135253906, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.22206772027768842, | |
| "grad_norm": 1.3599995374679565, | |
| "learning_rate": 0.0002777942656718586, | |
| "loss": 4.441769409179687, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.22310541990515426, | |
| "grad_norm": 1.2248610258102417, | |
| "learning_rate": 0.00027769049570911203, | |
| "loss": 4.34153564453125, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.2241431195326201, | |
| "grad_norm": 1.07679283618927, | |
| "learning_rate": 0.0002775867257463654, | |
| "loss": 4.307798767089844, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.22518081916008592, | |
| "grad_norm": 2.6134791374206543, | |
| "learning_rate": 0.0002774829557836189, | |
| "loss": 4.170127868652344, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.22621851878755175, | |
| "grad_norm": 3.8844735622406006, | |
| "learning_rate": 0.00027737918582087227, | |
| "loss": 4.2596041870117185, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.22725621841501759, | |
| "grad_norm": 3.4798216819763184, | |
| "learning_rate": 0.00027727541585812566, | |
| "loss": 4.257220153808594, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.22829391804248342, | |
| "grad_norm": 1.0172936916351318, | |
| "learning_rate": 0.0002771716458953791, | |
| "loss": 4.342347717285156, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.22933161766994925, | |
| "grad_norm": 2.0007245540618896, | |
| "learning_rate": 0.0002770678759326325, | |
| "loss": 4.21951171875, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.23036931729741508, | |
| "grad_norm": 1.0652577877044678, | |
| "learning_rate": 0.00027696410596988596, | |
| "loss": 4.309334411621093, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.23140701692488092, | |
| "grad_norm": 1.0696879625320435, | |
| "learning_rate": 0.00027686033600713935, | |
| "loss": 4.333943481445313, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.23244471655234675, | |
| "grad_norm": 1.0693758726119995, | |
| "learning_rate": 0.00027675656604439275, | |
| "loss": 4.325413513183594, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.23348241617981258, | |
| "grad_norm": 1.3958321809768677, | |
| "learning_rate": 0.00027665279608164614, | |
| "loss": 4.1349484252929685, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.2345201158072784, | |
| "grad_norm": 1.732444167137146, | |
| "learning_rate": 0.0002765490261188996, | |
| "loss": 4.191957397460937, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.23555781543474427, | |
| "grad_norm": 1.329959750175476, | |
| "learning_rate": 0.00027644525615615304, | |
| "loss": 4.440416870117187, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.2365955150622101, | |
| "grad_norm": 1.4088762998580933, | |
| "learning_rate": 0.00027634148619340643, | |
| "loss": 4.128535461425781, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.23763321468967594, | |
| "grad_norm": 1.167936086654663, | |
| "learning_rate": 0.0002762377162306599, | |
| "loss": 4.3338143920898435, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.23867091431714177, | |
| "grad_norm": 1.1570918560028076, | |
| "learning_rate": 0.0002761339462679133, | |
| "loss": 4.180432739257813, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.2397086139446076, | |
| "grad_norm": 1.2544199228286743, | |
| "learning_rate": 0.00027603017630516667, | |
| "loss": 4.1538671875, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.24074631357207343, | |
| "grad_norm": 1.844802975654602, | |
| "learning_rate": 0.0002759264063424201, | |
| "loss": 4.238400268554687, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.24178401319953927, | |
| "grad_norm": 2.407107353210449, | |
| "learning_rate": 0.0002758226363796735, | |
| "loss": 4.1402197265625, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.2428217128270051, | |
| "grad_norm": 1.7526997327804565, | |
| "learning_rate": 0.00027571886641692696, | |
| "loss": 4.253873901367188, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.24385941245447093, | |
| "grad_norm": 2.1768147945404053, | |
| "learning_rate": 0.00027561509645418036, | |
| "loss": 4.146066589355469, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.24489711208193676, | |
| "grad_norm": 1.0545059442520142, | |
| "learning_rate": 0.00027551132649143375, | |
| "loss": 4.199613037109375, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.2459348117094026, | |
| "grad_norm": 1.2132643461227417, | |
| "learning_rate": 0.00027540755652868715, | |
| "loss": 4.202657775878906, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.24697251133686843, | |
| "grad_norm": 2.1652746200561523, | |
| "learning_rate": 0.0002753037865659406, | |
| "loss": 4.301669311523438, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.24801021096433426, | |
| "grad_norm": 1.0687705278396606, | |
| "learning_rate": 0.00027520001660319405, | |
| "loss": 4.310574340820312, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.2490479105918001, | |
| "grad_norm": 2.6030638217926025, | |
| "learning_rate": 0.00027509624664044744, | |
| "loss": 4.220720825195312, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.25008561021926595, | |
| "grad_norm": 0.9720291495323181, | |
| "learning_rate": 0.00027499247667770084, | |
| "loss": 4.376803283691406, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.25112330984673176, | |
| "grad_norm": 1.398289680480957, | |
| "learning_rate": 0.00027488870671495423, | |
| "loss": 4.39901123046875, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.2521610094741976, | |
| "grad_norm": 2.2055957317352295, | |
| "learning_rate": 0.0002747849367522077, | |
| "loss": 4.196527709960938, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.2531987091016634, | |
| "grad_norm": 2.036271810531616, | |
| "learning_rate": 0.0002746811667894611, | |
| "loss": 4.274451599121094, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.2542364087291293, | |
| "grad_norm": 2.6011345386505127, | |
| "learning_rate": 0.0002745773968267145, | |
| "loss": 4.2699462890625, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.2552741083565951, | |
| "grad_norm": 1.9660414457321167, | |
| "learning_rate": 0.0002744736268639679, | |
| "loss": 4.2452325439453125, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.25631180798406095, | |
| "grad_norm": 1.2747102975845337, | |
| "learning_rate": 0.0002743698569012213, | |
| "loss": 4.348042907714844, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.25734950761152675, | |
| "grad_norm": 1.4823510646820068, | |
| "learning_rate": 0.00027426608693847476, | |
| "loss": 4.154461669921875, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.2583872072389926, | |
| "grad_norm": 1.6665210723876953, | |
| "learning_rate": 0.00027416231697572816, | |
| "loss": 4.136954956054687, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.2594249068664584, | |
| "grad_norm": 1.8465914726257324, | |
| "learning_rate": 0.0002740585470129816, | |
| "loss": 4.296747741699218, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.2604626064939243, | |
| "grad_norm": 1.0613303184509277, | |
| "learning_rate": 0.00027395477705023506, | |
| "loss": 4.209448547363281, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.2615003061213901, | |
| "grad_norm": 2.3083701133728027, | |
| "learning_rate": 0.00027385100708748845, | |
| "loss": 4.412258911132812, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.26253800574885594, | |
| "grad_norm": 1.8509588241577148, | |
| "learning_rate": 0.00027374723712474185, | |
| "loss": 4.171485595703125, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.26357570537632175, | |
| "grad_norm": 1.091736078262329, | |
| "learning_rate": 0.00027364346716199524, | |
| "loss": 4.24049560546875, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.2646134050037876, | |
| "grad_norm": 1.201401710510254, | |
| "learning_rate": 0.0002735396971992487, | |
| "loss": 4.135834350585937, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.2656511046312534, | |
| "grad_norm": 1.5545823574066162, | |
| "learning_rate": 0.0002734359272365021, | |
| "loss": 4.291419677734375, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.26668880425871927, | |
| "grad_norm": 1.3560378551483154, | |
| "learning_rate": 0.00027333215727375553, | |
| "loss": 4.236996459960937, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.26772650388618513, | |
| "grad_norm": 1.0210782289505005, | |
| "learning_rate": 0.00027322838731100893, | |
| "loss": 4.249810791015625, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.26876420351365093, | |
| "grad_norm": 1.3093341588974, | |
| "learning_rate": 0.0002731246173482623, | |
| "loss": 4.195414428710937, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.2698019031411168, | |
| "grad_norm": 1.7895358800888062, | |
| "learning_rate": 0.00027302084738551577, | |
| "loss": 4.180751037597656, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.2708396027685826, | |
| "grad_norm": 11.451671600341797, | |
| "learning_rate": 0.00027291707742276917, | |
| "loss": 4.157826538085938, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.27187730239604846, | |
| "grad_norm": 1.9708665609359741, | |
| "learning_rate": 0.0002728133074600226, | |
| "loss": 4.128204956054687, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.27291500202351426, | |
| "grad_norm": 1.2628132104873657, | |
| "learning_rate": 0.000272709537497276, | |
| "loss": 4.281667785644531, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.2739527016509801, | |
| "grad_norm": 2.2199666500091553, | |
| "learning_rate": 0.0002726057675345294, | |
| "loss": 4.237691650390625, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.27499040127844593, | |
| "grad_norm": 2.815150022506714, | |
| "learning_rate": 0.00027250199757178285, | |
| "loss": 4.080834045410156, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.2760281009059118, | |
| "grad_norm": 1.7167062759399414, | |
| "learning_rate": 0.00027239822760903625, | |
| "loss": 4.224625549316406, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.2770658005333776, | |
| "grad_norm": 2.769949436187744, | |
| "learning_rate": 0.0002722944576462897, | |
| "loss": 4.3115145874023435, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.27810350016084345, | |
| "grad_norm": 1.3523616790771484, | |
| "learning_rate": 0.0002721906876835431, | |
| "loss": 4.356557006835938, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.27914119978830926, | |
| "grad_norm": 4.089077949523926, | |
| "learning_rate": 0.00027208691772079654, | |
| "loss": 4.286115112304688, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.2801788994157751, | |
| "grad_norm": 1.1650248765945435, | |
| "learning_rate": 0.00027198314775804994, | |
| "loss": 4.335249328613282, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.2812165990432409, | |
| "grad_norm": 1.8776350021362305, | |
| "learning_rate": 0.00027187937779530333, | |
| "loss": 4.274792175292969, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.2822542986707068, | |
| "grad_norm": 3.665797710418701, | |
| "learning_rate": 0.0002717756078325568, | |
| "loss": 4.347820739746094, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.2832919982981726, | |
| "grad_norm": 1.1905182600021362, | |
| "learning_rate": 0.0002716718378698102, | |
| "loss": 4.234444274902343, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.28432969792563845, | |
| "grad_norm": 1.2664549350738525, | |
| "learning_rate": 0.0002715680679070636, | |
| "loss": 4.19026123046875, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.28536739755310425, | |
| "grad_norm": 1.5952035188674927, | |
| "learning_rate": 0.000271464297944317, | |
| "loss": 4.284921569824219, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.2864050971805701, | |
| "grad_norm": 1.5898215770721436, | |
| "learning_rate": 0.0002713605279815704, | |
| "loss": 4.128340759277344, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.28744279680803597, | |
| "grad_norm": 1.701250433921814, | |
| "learning_rate": 0.00027125675801882386, | |
| "loss": 4.1064456176757815, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.2884804964355018, | |
| "grad_norm": 2.2521140575408936, | |
| "learning_rate": 0.00027115298805607726, | |
| "loss": 4.188478698730469, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.28951819606296764, | |
| "grad_norm": 1.428589105606079, | |
| "learning_rate": 0.0002710492180933307, | |
| "loss": 4.172950134277344, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.29055589569043344, | |
| "grad_norm": 1.5243910551071167, | |
| "learning_rate": 0.0002709454481305841, | |
| "loss": 4.251683044433594, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.2915935953178993, | |
| "grad_norm": 1.285276174545288, | |
| "learning_rate": 0.0002708416781678375, | |
| "loss": 4.291034851074219, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.2926312949453651, | |
| "grad_norm": 1.2959215641021729, | |
| "learning_rate": 0.0002707379082050909, | |
| "loss": 4.223204040527344, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.29366899457283097, | |
| "grad_norm": 1.9572069644927979, | |
| "learning_rate": 0.00027063413824234434, | |
| "loss": 4.1140069580078125, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.29470669420029677, | |
| "grad_norm": 2.5625929832458496, | |
| "learning_rate": 0.0002705303682795978, | |
| "loss": 4.2418734741210935, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.29574439382776263, | |
| "grad_norm": 1.657065510749817, | |
| "learning_rate": 0.0002704265983168512, | |
| "loss": 4.2059628295898435, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.29678209345522844, | |
| "grad_norm": 1.4735133647918701, | |
| "learning_rate": 0.0002703228283541046, | |
| "loss": 4.232904663085938, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.2978197930826943, | |
| "grad_norm": 2.643979549407959, | |
| "learning_rate": 0.000270219058391358, | |
| "loss": 4.151640930175781, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.2988574927101601, | |
| "grad_norm": 1.5147004127502441, | |
| "learning_rate": 0.0002701152884286114, | |
| "loss": 4.171849060058594, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.29989519233762596, | |
| "grad_norm": 1.4815659523010254, | |
| "learning_rate": 0.00027001151846586487, | |
| "loss": 4.120007019042969, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.30093289196509176, | |
| "grad_norm": 3.8772029876708984, | |
| "learning_rate": 0.00026990774850311827, | |
| "loss": 4.113840637207031, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.3019705915925576, | |
| "grad_norm": 1.8152740001678467, | |
| "learning_rate": 0.0002698039785403717, | |
| "loss": 4.143219604492187, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.30300829122002343, | |
| "grad_norm": 1.3441669940948486, | |
| "learning_rate": 0.0002697002085776251, | |
| "loss": 4.151035461425781, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.3040459908474893, | |
| "grad_norm": 2.0656609535217285, | |
| "learning_rate": 0.0002695964386148785, | |
| "loss": 4.229763793945312, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.3050836904749551, | |
| "grad_norm": 2.8376095294952393, | |
| "learning_rate": 0.0002694926686521319, | |
| "loss": 4.2303158569335935, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.30612139010242095, | |
| "grad_norm": 1.9161107540130615, | |
| "learning_rate": 0.00026938889868938535, | |
| "loss": 4.252763061523438, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.3071590897298868, | |
| "grad_norm": 2.1317851543426514, | |
| "learning_rate": 0.0002692851287266388, | |
| "loss": 4.12993408203125, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.3081967893573526, | |
| "grad_norm": 2.9762330055236816, | |
| "learning_rate": 0.0002691813587638922, | |
| "loss": 4.347277221679687, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.3092344889848185, | |
| "grad_norm": 2.135929584503174, | |
| "learning_rate": 0.0002690775888011456, | |
| "loss": 4.052276611328125, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.3102721886122843, | |
| "grad_norm": 1.3577543497085571, | |
| "learning_rate": 0.000268973818838399, | |
| "loss": 4.199589233398438, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.31130988823975014, | |
| "grad_norm": 1.2834597826004028, | |
| "learning_rate": 0.00026887004887565243, | |
| "loss": 4.134565734863282, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.31234758786721595, | |
| "grad_norm": 2.093669891357422, | |
| "learning_rate": 0.00026876627891290583, | |
| "loss": 4.183307495117187, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.3133852874946818, | |
| "grad_norm": 1.1888537406921387, | |
| "learning_rate": 0.0002686625089501593, | |
| "loss": 4.022268371582031, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.3144229871221476, | |
| "grad_norm": 1.4640058279037476, | |
| "learning_rate": 0.00026855873898741267, | |
| "loss": 4.191292724609375, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.3154606867496135, | |
| "grad_norm": 0.9469636678695679, | |
| "learning_rate": 0.00026845496902466607, | |
| "loss": 4.2131259155273435, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.3164983863770793, | |
| "grad_norm": 1.5227535963058472, | |
| "learning_rate": 0.0002683511990619195, | |
| "loss": 4.24783935546875, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.31753608600454514, | |
| "grad_norm": 2.524731159210205, | |
| "learning_rate": 0.0002682474290991729, | |
| "loss": 4.206085205078125, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.31857378563201094, | |
| "grad_norm": 2.7074637413024902, | |
| "learning_rate": 0.00026814365913642636, | |
| "loss": 3.964501953125, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.3196114852594768, | |
| "grad_norm": 2.1479899883270264, | |
| "learning_rate": 0.00026803988917367975, | |
| "loss": 4.121002197265625, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.3206491848869426, | |
| "grad_norm": 3.6871800422668457, | |
| "learning_rate": 0.00026793611921093315, | |
| "loss": 4.290604858398438, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.32168688451440847, | |
| "grad_norm": 2.0092685222625732, | |
| "learning_rate": 0.0002678323492481866, | |
| "loss": 4.169475708007813, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.32272458414187427, | |
| "grad_norm": 1.3000237941741943, | |
| "learning_rate": 0.00026772857928544, | |
| "loss": 4.096010131835937, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.32376228376934013, | |
| "grad_norm": 2.161574125289917, | |
| "learning_rate": 0.00026762480932269344, | |
| "loss": 4.249325561523437, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.32479998339680594, | |
| "grad_norm": 1.0579701662063599, | |
| "learning_rate": 0.00026752103935994684, | |
| "loss": 4.270779724121094, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.3258376830242718, | |
| "grad_norm": 1.2264137268066406, | |
| "learning_rate": 0.0002674172693972003, | |
| "loss": 4.2367620849609375, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.3268753826517376, | |
| "grad_norm": 3.2623612880706787, | |
| "learning_rate": 0.0002673134994344537, | |
| "loss": 4.160564575195313, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.32791308227920346, | |
| "grad_norm": 2.1803345680236816, | |
| "learning_rate": 0.0002672097294717071, | |
| "loss": 4.203829040527344, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.3289507819066693, | |
| "grad_norm": 1.9515228271484375, | |
| "learning_rate": 0.0002671059595089605, | |
| "loss": 4.315436706542969, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.3299884815341351, | |
| "grad_norm": 3.0683810710906982, | |
| "learning_rate": 0.0002670021895462139, | |
| "loss": 4.155743103027344, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.331026181161601, | |
| "grad_norm": 2.6642050743103027, | |
| "learning_rate": 0.00026689841958346737, | |
| "loss": 4.222473754882812, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.3320638807890668, | |
| "grad_norm": 1.8333579301834106, | |
| "learning_rate": 0.00026679464962072076, | |
| "loss": 4.210680541992187, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.33310158041653265, | |
| "grad_norm": 2.136242151260376, | |
| "learning_rate": 0.00026669087965797416, | |
| "loss": 4.144779357910156, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.33413928004399845, | |
| "grad_norm": 0.9694802165031433, | |
| "learning_rate": 0.0002665871096952276, | |
| "loss": 4.1770632934570315, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.3351769796714643, | |
| "grad_norm": 2.070678949356079, | |
| "learning_rate": 0.000266483339732481, | |
| "loss": 4.140425415039062, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.3362146792989301, | |
| "grad_norm": 1.3420311212539673, | |
| "learning_rate": 0.00026637956976973445, | |
| "loss": 4.117745361328125, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.337252378926396, | |
| "grad_norm": 1.7498325109481812, | |
| "learning_rate": 0.00026627579980698785, | |
| "loss": 4.090622253417969, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.3382900785538618, | |
| "grad_norm": 5.7661848068237305, | |
| "learning_rate": 0.00026617202984424124, | |
| "loss": 4.212898864746093, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.33932777818132764, | |
| "grad_norm": 1.856246829032898, | |
| "learning_rate": 0.0002660682598814947, | |
| "loss": 4.20351806640625, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.34036547780879345, | |
| "grad_norm": 5.002403259277344, | |
| "learning_rate": 0.0002659644899187481, | |
| "loss": 4.095009765625, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.3414031774362593, | |
| "grad_norm": 1.0896239280700684, | |
| "learning_rate": 0.00026586071995600153, | |
| "loss": 4.061651000976562, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.3424408770637251, | |
| "grad_norm": 1.4536166191101074, | |
| "learning_rate": 0.00026575694999325493, | |
| "loss": 4.05192626953125, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.343478576691191, | |
| "grad_norm": 3.966247081756592, | |
| "learning_rate": 0.0002656531800305084, | |
| "loss": 4.122060241699219, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.3445162763186568, | |
| "grad_norm": 2.3092470169067383, | |
| "learning_rate": 0.00026554941006776177, | |
| "loss": 4.171341247558594, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.34555397594612264, | |
| "grad_norm": 1.6187312602996826, | |
| "learning_rate": 0.00026544564010501517, | |
| "loss": 4.147681579589844, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.34659167557358844, | |
| "grad_norm": 1.4459052085876465, | |
| "learning_rate": 0.0002653418701422686, | |
| "loss": 4.12395751953125, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.3476293752010543, | |
| "grad_norm": 1.6370753049850464, | |
| "learning_rate": 0.000265238100179522, | |
| "loss": 4.043997192382813, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.34866707482852016, | |
| "grad_norm": 2.5965089797973633, | |
| "learning_rate": 0.00026513433021677546, | |
| "loss": 4.149281311035156, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.34970477445598597, | |
| "grad_norm": 1.4466602802276611, | |
| "learning_rate": 0.00026503056025402885, | |
| "loss": 4.153418884277344, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.3507424740834518, | |
| "grad_norm": 1.1217280626296997, | |
| "learning_rate": 0.00026492679029128225, | |
| "loss": 4.173803405761719, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.35178017371091763, | |
| "grad_norm": 2.853686809539795, | |
| "learning_rate": 0.00026482302032853564, | |
| "loss": 4.1212451171875, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.3528178733383835, | |
| "grad_norm": 1.1508560180664062, | |
| "learning_rate": 0.0002647192503657891, | |
| "loss": 4.179091186523437, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.3538555729658493, | |
| "grad_norm": 1.8668493032455444, | |
| "learning_rate": 0.00026461548040304254, | |
| "loss": 4.142960205078125, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.35489327259331516, | |
| "grad_norm": 1.7272940874099731, | |
| "learning_rate": 0.00026451171044029594, | |
| "loss": 4.127975769042969, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.35593097222078096, | |
| "grad_norm": 1.5529290437698364, | |
| "learning_rate": 0.00026440794047754933, | |
| "loss": 4.2190853881835935, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.3569686718482468, | |
| "grad_norm": 1.506499171257019, | |
| "learning_rate": 0.0002643041705148027, | |
| "loss": 4.168932800292969, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.3580063714757126, | |
| "grad_norm": 1.2258543968200684, | |
| "learning_rate": 0.0002642004005520562, | |
| "loss": 4.065081176757812, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.3590440711031785, | |
| "grad_norm": 1.4408226013183594, | |
| "learning_rate": 0.0002640966305893096, | |
| "loss": 4.102992858886719, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.3600817707306443, | |
| "grad_norm": 2.467862844467163, | |
| "learning_rate": 0.000263992860626563, | |
| "loss": 4.061658935546875, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.36111947035811015, | |
| "grad_norm": 1.3214993476867676, | |
| "learning_rate": 0.0002638890906638164, | |
| "loss": 4.133033752441406, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.36215716998557596, | |
| "grad_norm": 1.2223659753799438, | |
| "learning_rate": 0.0002637853207010698, | |
| "loss": 4.0944091796875, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.3631948696130418, | |
| "grad_norm": 1.5864417552947998, | |
| "learning_rate": 0.00026368155073832326, | |
| "loss": 4.031897277832031, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.3642325692405076, | |
| "grad_norm": 3.021804094314575, | |
| "learning_rate": 0.00026357778077557665, | |
| "loss": 4.253480224609375, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.3652702688679735, | |
| "grad_norm": 2.419196844100952, | |
| "learning_rate": 0.0002634740108128301, | |
| "loss": 4.060654602050781, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.3663079684954393, | |
| "grad_norm": 3.106058359146118, | |
| "learning_rate": 0.00026337024085008355, | |
| "loss": 4.167652282714844, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.36734566812290514, | |
| "grad_norm": 2.6082842350006104, | |
| "learning_rate": 0.00026326647088733695, | |
| "loss": 4.126443481445312, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.368383367750371, | |
| "grad_norm": 3.2292778491973877, | |
| "learning_rate": 0.00026316270092459034, | |
| "loss": 4.16947509765625, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.3694210673778368, | |
| "grad_norm": 3.438127279281616, | |
| "learning_rate": 0.00026305893096184374, | |
| "loss": 4.18126220703125, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.37045876700530267, | |
| "grad_norm": 1.1258721351623535, | |
| "learning_rate": 0.0002629551609990972, | |
| "loss": 4.133269348144531, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.3714964666327685, | |
| "grad_norm": 2.0176923274993896, | |
| "learning_rate": 0.00026285139103635063, | |
| "loss": 4.000823059082031, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.37253416626023433, | |
| "grad_norm": 2.162721872329712, | |
| "learning_rate": 0.00026274762107360403, | |
| "loss": 4.158842163085938, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.37357186588770014, | |
| "grad_norm": 1.3159765005111694, | |
| "learning_rate": 0.0002626438511108574, | |
| "loss": 4.156724853515625, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.374609565515166, | |
| "grad_norm": 1.8504067659378052, | |
| "learning_rate": 0.0002625400811481108, | |
| "loss": 4.074109191894531, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.3756472651426318, | |
| "grad_norm": 1.3491618633270264, | |
| "learning_rate": 0.00026243631118536427, | |
| "loss": 4.117833557128907, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.37668496477009766, | |
| "grad_norm": 1.1090528964996338, | |
| "learning_rate": 0.00026233254122261766, | |
| "loss": 4.0473480224609375, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.37772266439756347, | |
| "grad_norm": 4.539895057678223, | |
| "learning_rate": 0.0002622287712598711, | |
| "loss": 4.0527517700195315, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.37876036402502933, | |
| "grad_norm": 1.792636513710022, | |
| "learning_rate": 0.0002621250012971245, | |
| "loss": 3.9459353637695314, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.37979806365249513, | |
| "grad_norm": 2.4098236560821533, | |
| "learning_rate": 0.0002620212313343779, | |
| "loss": 4.144781494140625, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.380835763279961, | |
| "grad_norm": 1.8648608922958374, | |
| "learning_rate": 0.00026191746137163135, | |
| "loss": 4.104746704101562, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.3818734629074268, | |
| "grad_norm": 2.071338653564453, | |
| "learning_rate": 0.00026181369140888474, | |
| "loss": 4.074734191894532, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.38291116253489266, | |
| "grad_norm": 1.3856308460235596, | |
| "learning_rate": 0.0002617099214461382, | |
| "loss": 4.158983154296875, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.38394886216235846, | |
| "grad_norm": 2.072495698928833, | |
| "learning_rate": 0.0002616061514833916, | |
| "loss": 4.08581787109375, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.3849865617898243, | |
| "grad_norm": 1.3703645467758179, | |
| "learning_rate": 0.00026150238152064504, | |
| "loss": 4.006895446777344, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.3860242614172901, | |
| "grad_norm": 2.7975013256073, | |
| "learning_rate": 0.00026139861155789843, | |
| "loss": 4.147843627929688, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.387061961044756, | |
| "grad_norm": 3.56386661529541, | |
| "learning_rate": 0.0002612948415951518, | |
| "loss": 4.2793121337890625, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.3880996606722218, | |
| "grad_norm": 2.8237593173980713, | |
| "learning_rate": 0.0002611910716324053, | |
| "loss": 4.13215087890625, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.38913736029968765, | |
| "grad_norm": 1.2382421493530273, | |
| "learning_rate": 0.00026108730166965867, | |
| "loss": 4.071390991210937, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.3901750599271535, | |
| "grad_norm": 1.620809555053711, | |
| "learning_rate": 0.0002609835317069121, | |
| "loss": 4.081386108398437, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.3912127595546193, | |
| "grad_norm": 1.5530173778533936, | |
| "learning_rate": 0.0002608797617441655, | |
| "loss": 4.095469970703125, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.3922504591820852, | |
| "grad_norm": 2.7742369174957275, | |
| "learning_rate": 0.0002607759917814189, | |
| "loss": 4.096160888671875, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.393288158809551, | |
| "grad_norm": 1.0493942499160767, | |
| "learning_rate": 0.00026067222181867236, | |
| "loss": 4.000921936035156, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.39432585843701684, | |
| "grad_norm": 4.1348958015441895, | |
| "learning_rate": 0.00026056845185592575, | |
| "loss": 3.991048583984375, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.39536355806448265, | |
| "grad_norm": 4.481339454650879, | |
| "learning_rate": 0.0002604646818931792, | |
| "loss": 4.004680786132813, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.3964012576919485, | |
| "grad_norm": 1.5849348306655884, | |
| "learning_rate": 0.0002603609119304326, | |
| "loss": 4.127825317382812, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.3974389573194143, | |
| "grad_norm": 1.5340007543563843, | |
| "learning_rate": 0.000260257141967686, | |
| "loss": 4.126565551757812, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.39847665694688017, | |
| "grad_norm": 1.9388331174850464, | |
| "learning_rate": 0.00026015337200493944, | |
| "loss": 4.147232666015625, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.399514356574346, | |
| "grad_norm": 1.4936273097991943, | |
| "learning_rate": 0.00026004960204219284, | |
| "loss": 4.046693115234375, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.40055205620181183, | |
| "grad_norm": 1.4128496646881104, | |
| "learning_rate": 0.0002599458320794463, | |
| "loss": 4.027592468261719, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.40158975582927764, | |
| "grad_norm": 1.2070266008377075, | |
| "learning_rate": 0.0002598420621166997, | |
| "loss": 3.9974462890625, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.4026274554567435, | |
| "grad_norm": 1.0721571445465088, | |
| "learning_rate": 0.0002597382921539531, | |
| "loss": 4.048193054199219, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.4036651550842093, | |
| "grad_norm": 4.593639373779297, | |
| "learning_rate": 0.00025963452219120647, | |
| "loss": 3.9815548706054686, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.40470285471167516, | |
| "grad_norm": 2.84889817237854, | |
| "learning_rate": 0.0002595307522284599, | |
| "loss": 4.118370666503906, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.40574055433914097, | |
| "grad_norm": 1.6757389307022095, | |
| "learning_rate": 0.00025942698226571337, | |
| "loss": 4.095942077636718, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.40677825396660683, | |
| "grad_norm": 3.5596885681152344, | |
| "learning_rate": 0.00025932321230296676, | |
| "loss": 4.0965576171875, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.40781595359407263, | |
| "grad_norm": 1.0558372735977173, | |
| "learning_rate": 0.0002592194423402202, | |
| "loss": 4.239440307617188, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.4088536532215385, | |
| "grad_norm": 5.334078311920166, | |
| "learning_rate": 0.0002591156723774736, | |
| "loss": 4.089285888671875, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.40989135284900435, | |
| "grad_norm": 2.4086287021636963, | |
| "learning_rate": 0.000259011902414727, | |
| "loss": 4.103414611816406, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.41092905247647016, | |
| "grad_norm": 4.432836055755615, | |
| "learning_rate": 0.00025890813245198045, | |
| "loss": 4.0577630615234375, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.411966752103936, | |
| "grad_norm": 1.3129891157150269, | |
| "learning_rate": 0.00025880436248923384, | |
| "loss": 4.128912353515625, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.4130044517314018, | |
| "grad_norm": 2.148174524307251, | |
| "learning_rate": 0.0002587005925264873, | |
| "loss": 4.197516174316406, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.4140421513588677, | |
| "grad_norm": 6.447707176208496, | |
| "learning_rate": 0.0002585968225637407, | |
| "loss": 4.087812805175782, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.4150798509863335, | |
| "grad_norm": 2.721989393234253, | |
| "learning_rate": 0.0002584930526009941, | |
| "loss": 3.9460833740234373, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.41611755061379935, | |
| "grad_norm": 1.543135166168213, | |
| "learning_rate": 0.0002583892826382475, | |
| "loss": 4.02151611328125, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.41715525024126515, | |
| "grad_norm": 1.4670268297195435, | |
| "learning_rate": 0.0002582855126755009, | |
| "loss": 4.18778564453125, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.418192949868731, | |
| "grad_norm": 3.8556268215179443, | |
| "learning_rate": 0.0002581817427127544, | |
| "loss": 3.996910400390625, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.4192306494961968, | |
| "grad_norm": 1.702594518661499, | |
| "learning_rate": 0.00025807797275000777, | |
| "loss": 4.031709594726562, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.4202683491236627, | |
| "grad_norm": 1.2531317472457886, | |
| "learning_rate": 0.00025797420278726117, | |
| "loss": 4.188993835449219, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.4213060487511285, | |
| "grad_norm": 2.5484142303466797, | |
| "learning_rate": 0.00025787043282451456, | |
| "loss": 4.031621398925782, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.42234374837859434, | |
| "grad_norm": 1.823457956314087, | |
| "learning_rate": 0.000257766662861768, | |
| "loss": 4.001983032226563, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.42338144800606015, | |
| "grad_norm": 1.9530704021453857, | |
| "learning_rate": 0.0002576628928990214, | |
| "loss": 4.030978088378906, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.424419147633526, | |
| "grad_norm": 4.55501127243042, | |
| "learning_rate": 0.00025755912293627485, | |
| "loss": 4.062133178710938, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.4254568472609918, | |
| "grad_norm": 1.9799492359161377, | |
| "learning_rate": 0.00025745535297352825, | |
| "loss": 3.9875259399414062, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.42649454688845767, | |
| "grad_norm": 2.4329614639282227, | |
| "learning_rate": 0.00025735158301078164, | |
| "loss": 3.9634893798828124, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.4275322465159235, | |
| "grad_norm": 1.3791182041168213, | |
| "learning_rate": 0.0002572478130480351, | |
| "loss": 4.171094055175781, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.42856994614338934, | |
| "grad_norm": 1.4852691888809204, | |
| "learning_rate": 0.0002571440430852885, | |
| "loss": 4.059336547851562, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.4296076457708552, | |
| "grad_norm": 2.191392183303833, | |
| "learning_rate": 0.00025704027312254194, | |
| "loss": 3.9574560546875, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.430645345398321, | |
| "grad_norm": 3.4423017501831055, | |
| "learning_rate": 0.0002569365031597954, | |
| "loss": 3.990745849609375, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.43168304502578686, | |
| "grad_norm": 2.979930877685547, | |
| "learning_rate": 0.0002568327331970488, | |
| "loss": 4.166605529785156, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.43272074465325266, | |
| "grad_norm": 3.131230354309082, | |
| "learning_rate": 0.0002567289632343022, | |
| "loss": 4.026178894042968, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.4337584442807185, | |
| "grad_norm": 1.578643798828125, | |
| "learning_rate": 0.00025662519327155557, | |
| "loss": 4.10739990234375, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.43479614390818433, | |
| "grad_norm": 3.628096580505371, | |
| "learning_rate": 0.000256521423308809, | |
| "loss": 4.021985473632813, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.4358338435356502, | |
| "grad_norm": 2.235994815826416, | |
| "learning_rate": 0.0002564176533460624, | |
| "loss": 4.138570251464844, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.436871543163116, | |
| "grad_norm": 3.0459887981414795, | |
| "learning_rate": 0.00025631388338331586, | |
| "loss": 4.139791564941406, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.43790924279058185, | |
| "grad_norm": 1.0590101480484009, | |
| "learning_rate": 0.00025621011342056926, | |
| "loss": 4.018776550292968, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.43894694241804766, | |
| "grad_norm": 3.5735878944396973, | |
| "learning_rate": 0.00025610634345782265, | |
| "loss": 4.182121887207031, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.4399846420455135, | |
| "grad_norm": 1.1051421165466309, | |
| "learning_rate": 0.0002560025734950761, | |
| "loss": 4.086949157714844, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.4410223416729793, | |
| "grad_norm": 2.8680758476257324, | |
| "learning_rate": 0.0002558988035323295, | |
| "loss": 4.053037414550781, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.4420600413004452, | |
| "grad_norm": 1.6805782318115234, | |
| "learning_rate": 0.00025579503356958294, | |
| "loss": 4.041470947265625, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.443097740927911, | |
| "grad_norm": 1.7229841947555542, | |
| "learning_rate": 0.00025569126360683634, | |
| "loss": 4.1356103515625, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.44413544055537685, | |
| "grad_norm": 1.4601655006408691, | |
| "learning_rate": 0.00025558749364408973, | |
| "loss": 4.052696533203125, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.44517314018284265, | |
| "grad_norm": 1.552959680557251, | |
| "learning_rate": 0.0002554837236813432, | |
| "loss": 4.020947875976563, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.4462108398103085, | |
| "grad_norm": 1.3446309566497803, | |
| "learning_rate": 0.0002553799537185966, | |
| "loss": 4.150856018066406, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.4472485394377743, | |
| "grad_norm": 3.128110408782959, | |
| "learning_rate": 0.00025527618375585003, | |
| "loss": 4.118401794433594, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.4482862390652402, | |
| "grad_norm": 1.328148603439331, | |
| "learning_rate": 0.0002551724137931034, | |
| "loss": 4.073428649902343, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.449323938692706, | |
| "grad_norm": 1.5910078287124634, | |
| "learning_rate": 0.00025506864383035687, | |
| "loss": 4.110806579589844, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.45036163832017184, | |
| "grad_norm": 1.2686039209365845, | |
| "learning_rate": 0.00025496487386761027, | |
| "loss": 4.007551574707032, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.4513993379476377, | |
| "grad_norm": 4.290769577026367, | |
| "learning_rate": 0.00025486110390486366, | |
| "loss": 4.068913269042969, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.4524370375751035, | |
| "grad_norm": 1.6915346384048462, | |
| "learning_rate": 0.0002547573339421171, | |
| "loss": 4.066489562988282, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.45347473720256937, | |
| "grad_norm": 1.3425647020339966, | |
| "learning_rate": 0.0002546535639793705, | |
| "loss": 4.024351806640625, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.45451243683003517, | |
| "grad_norm": 4.726262092590332, | |
| "learning_rate": 0.00025454979401662395, | |
| "loss": 4.055924987792968, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.45555013645750103, | |
| "grad_norm": 1.3767929077148438, | |
| "learning_rate": 0.00025444602405387735, | |
| "loss": 4.1108706665039065, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.45658783608496684, | |
| "grad_norm": 2.199096918106079, | |
| "learning_rate": 0.00025434225409113074, | |
| "loss": 4.032781982421875, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.4576255357124327, | |
| "grad_norm": 1.529963731765747, | |
| "learning_rate": 0.0002542384841283842, | |
| "loss": 3.9078250122070313, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.4586632353398985, | |
| "grad_norm": 2.381452798843384, | |
| "learning_rate": 0.0002541347141656376, | |
| "loss": 4.1637747192382815, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.45970093496736436, | |
| "grad_norm": 1.3512217998504639, | |
| "learning_rate": 0.00025403094420289104, | |
| "loss": 4.1603765869140625, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.46073863459483017, | |
| "grad_norm": 1.6877330541610718, | |
| "learning_rate": 0.00025392717424014443, | |
| "loss": 3.9833114624023436, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.461776334222296, | |
| "grad_norm": 10.19050121307373, | |
| "learning_rate": 0.0002538234042773978, | |
| "loss": 4.087564086914062, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.46281403384976183, | |
| "grad_norm": 2.2430684566497803, | |
| "learning_rate": 0.0002537196343146512, | |
| "loss": 3.943908386230469, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.4638517334772277, | |
| "grad_norm": 1.8005903959274292, | |
| "learning_rate": 0.00025361586435190467, | |
| "loss": 4.026759948730469, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.4648894331046935, | |
| "grad_norm": 1.3022342920303345, | |
| "learning_rate": 0.0002535120943891581, | |
| "loss": 4.106507263183594, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.46592713273215935, | |
| "grad_norm": 1.1729425191879272, | |
| "learning_rate": 0.0002534083244264115, | |
| "loss": 4.0660693359375, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.46696483235962516, | |
| "grad_norm": 1.7224327325820923, | |
| "learning_rate": 0.0002533045544636649, | |
| "loss": 3.9855413818359375, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.468002531987091, | |
| "grad_norm": 1.6977527141571045, | |
| "learning_rate": 0.0002532007845009183, | |
| "loss": 3.813612976074219, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.4690402316145568, | |
| "grad_norm": 2.9529614448547363, | |
| "learning_rate": 0.00025309701453817175, | |
| "loss": 3.995145263671875, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.4700779312420227, | |
| "grad_norm": 3.1997270584106445, | |
| "learning_rate": 0.0002529932445754252, | |
| "loss": 4.031595153808594, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.47111563086948854, | |
| "grad_norm": 5.878026008605957, | |
| "learning_rate": 0.0002528894746126786, | |
| "loss": 4.028975524902344, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.47215333049695435, | |
| "grad_norm": 1.7146035432815552, | |
| "learning_rate": 0.00025278570464993205, | |
| "loss": 4.085393676757812, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.4731910301244202, | |
| "grad_norm": 2.954148292541504, | |
| "learning_rate": 0.00025268193468718544, | |
| "loss": 4.039700622558594, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.474228729751886, | |
| "grad_norm": 1.9127237796783447, | |
| "learning_rate": 0.00025257816472443883, | |
| "loss": 4.100406494140625, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.4752664293793519, | |
| "grad_norm": 1.8794509172439575, | |
| "learning_rate": 0.00025247439476169223, | |
| "loss": 3.9390939331054686, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.4763041290068177, | |
| "grad_norm": 2.165816307067871, | |
| "learning_rate": 0.0002523706247989457, | |
| "loss": 4.155856628417968, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.47734182863428354, | |
| "grad_norm": 6.686591148376465, | |
| "learning_rate": 0.00025226685483619913, | |
| "loss": 4.097453918457031, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.47837952826174934, | |
| "grad_norm": 2.4973371028900146, | |
| "learning_rate": 0.0002521630848734525, | |
| "loss": 4.200291137695313, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.4794172278892152, | |
| "grad_norm": 2.1478147506713867, | |
| "learning_rate": 0.0002520593149107059, | |
| "loss": 3.899898681640625, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.480454927516681, | |
| "grad_norm": 1.6290667057037354, | |
| "learning_rate": 0.0002519555449479593, | |
| "loss": 4.157419128417969, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.48149262714414687, | |
| "grad_norm": 2.3697171211242676, | |
| "learning_rate": 0.00025185177498521276, | |
| "loss": 4.0068753051757815, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.48253032677161267, | |
| "grad_norm": 3.123157501220703, | |
| "learning_rate": 0.00025174800502246616, | |
| "loss": 3.9923574829101565, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.48356802639907853, | |
| "grad_norm": 3.4272193908691406, | |
| "learning_rate": 0.0002516442350597196, | |
| "loss": 4.144463195800781, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.48460572602654434, | |
| "grad_norm": 2.8348467350006104, | |
| "learning_rate": 0.000251540465096973, | |
| "loss": 4.055748291015625, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.4856434256540102, | |
| "grad_norm": 3.0261967182159424, | |
| "learning_rate": 0.0002514366951342264, | |
| "loss": 4.177880554199219, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.486681125281476, | |
| "grad_norm": 10.726264953613281, | |
| "learning_rate": 0.00025133292517147984, | |
| "loss": 3.9125796508789064, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.48771882490894186, | |
| "grad_norm": 8.811136245727539, | |
| "learning_rate": 0.00025122915520873324, | |
| "loss": 3.9216848754882814, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.48875652453640767, | |
| "grad_norm": 6.8598151206970215, | |
| "learning_rate": 0.0002511253852459867, | |
| "loss": 3.9738433837890623, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.4897942241638735, | |
| "grad_norm": 5.096536636352539, | |
| "learning_rate": 0.0002510216152832401, | |
| "loss": 3.998507080078125, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.4908319237913394, | |
| "grad_norm": 1.4742202758789062, | |
| "learning_rate": 0.00025091784532049353, | |
| "loss": 4.171350402832031, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.4918696234188052, | |
| "grad_norm": 1.88887357711792, | |
| "learning_rate": 0.0002508140753577469, | |
| "loss": 4.106647644042969, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.49290732304627105, | |
| "grad_norm": 1.6502625942230225, | |
| "learning_rate": 0.0002507103053950003, | |
| "loss": 3.877885437011719, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.49394502267373686, | |
| "grad_norm": 1.728053331375122, | |
| "learning_rate": 0.00025060653543225377, | |
| "loss": 4.064427795410157, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.4949827223012027, | |
| "grad_norm": 4.632587432861328, | |
| "learning_rate": 0.00025050276546950716, | |
| "loss": 4.113824157714844, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.4960204219286685, | |
| "grad_norm": 1.5823708772659302, | |
| "learning_rate": 0.0002503989955067606, | |
| "loss": 4.080696411132813, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.4970581215561344, | |
| "grad_norm": 1.9801136255264282, | |
| "learning_rate": 0.000250295225544014, | |
| "loss": 3.945875549316406, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.4980958211836002, | |
| "grad_norm": 1.3339368104934692, | |
| "learning_rate": 0.0002501914555812674, | |
| "loss": 3.951331787109375, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.49913352081106604, | |
| "grad_norm": 2.1013355255126953, | |
| "learning_rate": 0.00025008768561852085, | |
| "loss": 4.022156372070312, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.5001712204385319, | |
| "grad_norm": 2.7022488117218018, | |
| "learning_rate": 0.00024998391565577425, | |
| "loss": 3.9780624389648436, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.5012089200659977, | |
| "grad_norm": 10.230494499206543, | |
| "learning_rate": 0.0002498801456930277, | |
| "loss": 4.024637145996094, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.5022466196934635, | |
| "grad_norm": 7.242427349090576, | |
| "learning_rate": 0.0002497763757302811, | |
| "loss": 3.9954248046875, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.5032843193209293, | |
| "grad_norm": 2.742445945739746, | |
| "learning_rate": 0.0002496726057675345, | |
| "loss": 3.9637130737304687, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.5043220189483952, | |
| "grad_norm": 1.6320149898529053, | |
| "learning_rate": 0.00024956883580478794, | |
| "loss": 4.035350952148438, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.505359718575861, | |
| "grad_norm": 2.239950180053711, | |
| "learning_rate": 0.00024946506584204133, | |
| "loss": 3.961440124511719, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.5063974182033268, | |
| "grad_norm": 6.686822891235352, | |
| "learning_rate": 0.0002493612958792948, | |
| "loss": 4.003260498046875, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.5074351178307926, | |
| "grad_norm": 1.9818964004516602, | |
| "learning_rate": 0.0002492575259165482, | |
| "loss": 4.018614501953125, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.5084728174582586, | |
| "grad_norm": 1.5698004961013794, | |
| "learning_rate": 0.00024915375595380157, | |
| "loss": 4.045997314453125, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.5095105170857244, | |
| "grad_norm": 2.3865158557891846, | |
| "learning_rate": 0.000249049985991055, | |
| "loss": 4.050853576660156, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.5105482167131902, | |
| "grad_norm": 14.248946189880371, | |
| "learning_rate": 0.0002489462160283084, | |
| "loss": 3.991949462890625, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.5115859163406561, | |
| "grad_norm": 1.279118537902832, | |
| "learning_rate": 0.00024884244606556186, | |
| "loss": 3.92796875, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.5126236159681219, | |
| "grad_norm": 2.575704574584961, | |
| "learning_rate": 0.00024873867610281526, | |
| "loss": 4.12865478515625, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.5136613155955877, | |
| "grad_norm": 2.0912930965423584, | |
| "learning_rate": 0.0002486349061400687, | |
| "loss": 4.042799682617187, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.5146990152230535, | |
| "grad_norm": 2.6358580589294434, | |
| "learning_rate": 0.0002485311361773221, | |
| "loss": 4.069761047363281, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.5157367148505194, | |
| "grad_norm": 2.6711385250091553, | |
| "learning_rate": 0.0002484273662145755, | |
| "loss": 3.9823483276367186, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.5167744144779852, | |
| "grad_norm": 3.348376989364624, | |
| "learning_rate": 0.00024832359625182894, | |
| "loss": 4.119874572753906, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.517812114105451, | |
| "grad_norm": 1.7040736675262451, | |
| "learning_rate": 0.00024821982628908234, | |
| "loss": 4.038002319335938, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.5188498137329168, | |
| "grad_norm": 11.144097328186035, | |
| "learning_rate": 0.0002481160563263358, | |
| "loss": 3.933763122558594, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.5198875133603827, | |
| "grad_norm": 3.1529595851898193, | |
| "learning_rate": 0.0002480122863635892, | |
| "loss": 3.990421142578125, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.5209252129878486, | |
| "grad_norm": 2.3761773109436035, | |
| "learning_rate": 0.0002479085164008426, | |
| "loss": 3.9385421752929686, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.5219629126153144, | |
| "grad_norm": 14.909253120422363, | |
| "learning_rate": 0.00024780474643809597, | |
| "loss": 3.924638671875, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.5230006122427802, | |
| "grad_norm": 1.4870705604553223, | |
| "learning_rate": 0.0002477009764753494, | |
| "loss": 4.003363037109375, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.5240383118702461, | |
| "grad_norm": 2.5456697940826416, | |
| "learning_rate": 0.00024759720651260287, | |
| "loss": 4.063373413085937, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.5250760114977119, | |
| "grad_norm": 4.392611980438232, | |
| "learning_rate": 0.00024749343654985627, | |
| "loss": 4.108450927734375, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.5261137111251777, | |
| "grad_norm": 2.8420300483703613, | |
| "learning_rate": 0.00024738966658710966, | |
| "loss": 3.9724908447265626, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.5271514107526435, | |
| "grad_norm": 2.3819692134857178, | |
| "learning_rate": 0.00024728589662436306, | |
| "loss": 4.040487060546875, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.5281891103801094, | |
| "grad_norm": 2.1021909713745117, | |
| "learning_rate": 0.0002471821266616165, | |
| "loss": 4.101463623046875, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.5292268100075752, | |
| "grad_norm": 2.8605117797851562, | |
| "learning_rate": 0.00024707835669886995, | |
| "loss": 3.9426974487304687, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.530264509635041, | |
| "grad_norm": 1.331457257270813, | |
| "learning_rate": 0.00024697458673612335, | |
| "loss": 4.005464172363281, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.5313022092625068, | |
| "grad_norm": 2.4866714477539062, | |
| "learning_rate": 0.00024687081677337674, | |
| "loss": 4.089916687011719, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.5323399088899727, | |
| "grad_norm": 6.342608451843262, | |
| "learning_rate": 0.00024676704681063014, | |
| "loss": 3.979620361328125, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.5333776085174385, | |
| "grad_norm": 1.3954708576202393, | |
| "learning_rate": 0.0002466632768478836, | |
| "loss": 3.9805123901367185, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.5344153081449043, | |
| "grad_norm": 24.8520450592041, | |
| "learning_rate": 0.000246559506885137, | |
| "loss": 4.0105502319335935, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.5354530077723703, | |
| "grad_norm": 2.0366039276123047, | |
| "learning_rate": 0.00024645573692239043, | |
| "loss": 3.919516296386719, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.5364907073998361, | |
| "grad_norm": 1.3017858266830444, | |
| "learning_rate": 0.0002463519669596439, | |
| "loss": 3.951867980957031, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.5375284070273019, | |
| "grad_norm": 2.579885244369507, | |
| "learning_rate": 0.0002462481969968973, | |
| "loss": 3.960545959472656, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.5385661066547677, | |
| "grad_norm": 1.5787100791931152, | |
| "learning_rate": 0.00024614442703415067, | |
| "loss": 4.013999938964844, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.5396038062822336, | |
| "grad_norm": 3.9871633052825928, | |
| "learning_rate": 0.00024604065707140406, | |
| "loss": 3.950070495605469, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.5406415059096994, | |
| "grad_norm": 1.572277545928955, | |
| "learning_rate": 0.0002459368871086575, | |
| "loss": 4.086417846679687, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.5416792055371652, | |
| "grad_norm": 7.029146671295166, | |
| "learning_rate": 0.0002458331171459109, | |
| "loss": 3.8767724609375, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.542716905164631, | |
| "grad_norm": 1.2442755699157715, | |
| "learning_rate": 0.00024572934718316436, | |
| "loss": 3.875315856933594, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.5437546047920969, | |
| "grad_norm": 3.5381152629852295, | |
| "learning_rate": 0.00024562557722041775, | |
| "loss": 4.013727416992188, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.5447923044195627, | |
| "grad_norm": 16.472898483276367, | |
| "learning_rate": 0.00024552180725767115, | |
| "loss": 4.058722839355469, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.5458300040470285, | |
| "grad_norm": 1.4836983680725098, | |
| "learning_rate": 0.0002454180372949246, | |
| "loss": 4.106039123535156, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.5468677036744943, | |
| "grad_norm": 4.735908031463623, | |
| "learning_rate": 0.000245314267332178, | |
| "loss": 4.109900817871094, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.5479054033019602, | |
| "grad_norm": 1.7438913583755493, | |
| "learning_rate": 0.00024521049736943144, | |
| "loss": 4.098789978027344, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.548943102929426, | |
| "grad_norm": 3.592564105987549, | |
| "learning_rate": 0.00024510672740668483, | |
| "loss": 3.9866278076171877, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.5499808025568919, | |
| "grad_norm": 1.9763888120651245, | |
| "learning_rate": 0.00024500295744393823, | |
| "loss": 3.9620831298828123, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.5510185021843577, | |
| "grad_norm": 1.0539793968200684, | |
| "learning_rate": 0.0002448991874811917, | |
| "loss": 4.006460266113281, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.5520562018118236, | |
| "grad_norm": 2.2474358081817627, | |
| "learning_rate": 0.00024479541751844507, | |
| "loss": 4.067258605957031, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.5530939014392894, | |
| "grad_norm": 1.5785913467407227, | |
| "learning_rate": 0.0002446916475556985, | |
| "loss": 4.057683715820312, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.5541316010667552, | |
| "grad_norm": 2.2754416465759277, | |
| "learning_rate": 0.0002445878775929519, | |
| "loss": 3.9662628173828125, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.5551693006942211, | |
| "grad_norm": 2.0118043422698975, | |
| "learning_rate": 0.00024448410763020537, | |
| "loss": 3.9848583984375, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.5562070003216869, | |
| "grad_norm": 2.3987770080566406, | |
| "learning_rate": 0.00024438033766745876, | |
| "loss": 4.00030029296875, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.5572446999491527, | |
| "grad_norm": 2.9198148250579834, | |
| "learning_rate": 0.00024427656770471216, | |
| "loss": 3.8882846069335937, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.5582823995766185, | |
| "grad_norm": 2.0234696865081787, | |
| "learning_rate": 0.0002441727977419656, | |
| "loss": 3.9845794677734374, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.5593200992040844, | |
| "grad_norm": 1.701568841934204, | |
| "learning_rate": 0.000244069027779219, | |
| "loss": 4.01090087890625, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.5603577988315502, | |
| "grad_norm": 2.3093771934509277, | |
| "learning_rate": 0.00024396525781647242, | |
| "loss": 3.9678195190429686, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.561395498459016, | |
| "grad_norm": 2.0182909965515137, | |
| "learning_rate": 0.00024386148785372582, | |
| "loss": 4.025320434570313, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.5624331980864818, | |
| "grad_norm": 3.1341028213500977, | |
| "learning_rate": 0.00024375771789097927, | |
| "loss": 3.9826446533203126, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.5634708977139478, | |
| "grad_norm": 2.025581121444702, | |
| "learning_rate": 0.0002436539479282327, | |
| "loss": 3.906527404785156, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.5645085973414136, | |
| "grad_norm": 2.913895845413208, | |
| "learning_rate": 0.00024355017796548608, | |
| "loss": 3.970755920410156, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.5655462969688794, | |
| "grad_norm": 1.9220850467681885, | |
| "learning_rate": 0.0002434464080027395, | |
| "loss": 3.943621826171875, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.5665839965963452, | |
| "grad_norm": 1.2168983221054077, | |
| "learning_rate": 0.0002433426380399929, | |
| "loss": 3.9780545043945312, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.5676216962238111, | |
| "grad_norm": 1.5367380380630493, | |
| "learning_rate": 0.00024323886807724635, | |
| "loss": 3.8468157958984377, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.5686593958512769, | |
| "grad_norm": 2.7281689643859863, | |
| "learning_rate": 0.00024313509811449977, | |
| "loss": 3.9043319702148436, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.5696970954787427, | |
| "grad_norm": 1.1875724792480469, | |
| "learning_rate": 0.00024303132815175316, | |
| "loss": 4.029020385742188, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.5707347951062085, | |
| "grad_norm": 9.087173461914062, | |
| "learning_rate": 0.00024292755818900659, | |
| "loss": 3.977708740234375, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.5717724947336744, | |
| "grad_norm": 1.94620943069458, | |
| "learning_rate": 0.00024282378822626, | |
| "loss": 3.9465988159179686, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.5728101943611402, | |
| "grad_norm": 3.0396885871887207, | |
| "learning_rate": 0.00024272001826351343, | |
| "loss": 4.030888366699219, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.573847893988606, | |
| "grad_norm": 1.557199239730835, | |
| "learning_rate": 0.00024261624830076682, | |
| "loss": 3.9756591796875, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.5748855936160719, | |
| "grad_norm": 3.0625579357147217, | |
| "learning_rate": 0.00024251247833802025, | |
| "loss": 4.076784362792969, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.5759232932435377, | |
| "grad_norm": 1.9166301488876343, | |
| "learning_rate": 0.0002424087083752737, | |
| "loss": 3.9604058837890626, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.5769609928710036, | |
| "grad_norm": 1.2829216718673706, | |
| "learning_rate": 0.0002423049384125271, | |
| "loss": 3.841531066894531, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.5779986924984694, | |
| "grad_norm": 2.9800634384155273, | |
| "learning_rate": 0.0002422011684497805, | |
| "loss": 3.915208435058594, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.5790363921259353, | |
| "grad_norm": 4.931972026824951, | |
| "learning_rate": 0.0002420973984870339, | |
| "loss": 3.7610516357421875, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.5800740917534011, | |
| "grad_norm": 3.796473264694214, | |
| "learning_rate": 0.00024199362852428733, | |
| "loss": 4.009695129394531, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.5811117913808669, | |
| "grad_norm": 2.3635172843933105, | |
| "learning_rate": 0.00024188985856154075, | |
| "loss": 4.164959716796875, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.5821494910083327, | |
| "grad_norm": 2.3295187950134277, | |
| "learning_rate": 0.00024178608859879417, | |
| "loss": 4.012393493652343, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.5831871906357986, | |
| "grad_norm": 3.1501762866973877, | |
| "learning_rate": 0.0002416823186360476, | |
| "loss": 3.9226104736328127, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.5842248902632644, | |
| "grad_norm": 2.8185627460479736, | |
| "learning_rate": 0.000241578548673301, | |
| "loss": 3.9830364990234375, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.5852625898907302, | |
| "grad_norm": 2.39125657081604, | |
| "learning_rate": 0.00024147477871055444, | |
| "loss": 4.058615112304688, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.586300289518196, | |
| "grad_norm": 2.658254623413086, | |
| "learning_rate": 0.00024137100874780783, | |
| "loss": 3.9012820434570314, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.5873379891456619, | |
| "grad_norm": 2.873662233352661, | |
| "learning_rate": 0.00024126723878506126, | |
| "loss": 4.018562622070313, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.5883756887731277, | |
| "grad_norm": 2.0522000789642334, | |
| "learning_rate": 0.00024116346882231468, | |
| "loss": 4.0417938232421875, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.5894133884005935, | |
| "grad_norm": 2.688117742538452, | |
| "learning_rate": 0.00024105969885956807, | |
| "loss": 3.910294494628906, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.5904510880280593, | |
| "grad_norm": 3.5324251651763916, | |
| "learning_rate": 0.00024095592889682152, | |
| "loss": 4.042366027832031, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.5914887876555253, | |
| "grad_norm": 3.254483461380005, | |
| "learning_rate": 0.00024085215893407492, | |
| "loss": 3.875579833984375, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.5925264872829911, | |
| "grad_norm": 1.4469491243362427, | |
| "learning_rate": 0.00024074838897132834, | |
| "loss": 3.8468057250976564, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.5935641869104569, | |
| "grad_norm": 7.142496585845947, | |
| "learning_rate": 0.00024064461900858173, | |
| "loss": 3.9028366088867186, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.5946018865379228, | |
| "grad_norm": 2.8328020572662354, | |
| "learning_rate": 0.00024054084904583518, | |
| "loss": 4.013849182128906, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.5956395861653886, | |
| "grad_norm": 1.999799370765686, | |
| "learning_rate": 0.0002404370790830886, | |
| "loss": 3.9890103149414062, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.5966772857928544, | |
| "grad_norm": 5.142120361328125, | |
| "learning_rate": 0.000240333309120342, | |
| "loss": 3.8782421875, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.5977149854203202, | |
| "grad_norm": 2.6170506477355957, | |
| "learning_rate": 0.00024022953915759542, | |
| "loss": 3.9341799926757814, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.5987526850477861, | |
| "grad_norm": 4.847115993499756, | |
| "learning_rate": 0.00024012576919484882, | |
| "loss": 4.028234252929687, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.5997903846752519, | |
| "grad_norm": 3.093014717102051, | |
| "learning_rate": 0.00024002199923210226, | |
| "loss": 4.02893310546875, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.6008280843027177, | |
| "grad_norm": 2.6559977531433105, | |
| "learning_rate": 0.00023991822926935566, | |
| "loss": 3.9997882080078124, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.6018657839301835, | |
| "grad_norm": 1.5972485542297363, | |
| "learning_rate": 0.00023981445930660908, | |
| "loss": 3.9749560546875, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.6029034835576494, | |
| "grad_norm": 3.777557134628296, | |
| "learning_rate": 0.0002397106893438625, | |
| "loss": 3.9969076538085937, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.6039411831851152, | |
| "grad_norm": 1.8903939723968506, | |
| "learning_rate": 0.00023960691938111593, | |
| "loss": 4.007763977050781, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.604978882812581, | |
| "grad_norm": 3.150963068008423, | |
| "learning_rate": 0.00023950314941836935, | |
| "loss": 4.019749145507813, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.6060165824400469, | |
| "grad_norm": 1.934287190437317, | |
| "learning_rate": 0.00023939937945562274, | |
| "loss": 4.014994812011719, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.6070542820675128, | |
| "grad_norm": 7.10530948638916, | |
| "learning_rate": 0.00023929560949287616, | |
| "loss": 4.050195617675781, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.6080919816949786, | |
| "grad_norm": 2.367403030395508, | |
| "learning_rate": 0.0002391918395301296, | |
| "loss": 3.8701296997070314, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.6091296813224444, | |
| "grad_norm": 1.9392305612564087, | |
| "learning_rate": 0.000239088069567383, | |
| "loss": 4.08440185546875, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.6101673809499102, | |
| "grad_norm": 2.5947983264923096, | |
| "learning_rate": 0.00023898429960463643, | |
| "loss": 4.050205078125, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.6112050805773761, | |
| "grad_norm": 2.1583032608032227, | |
| "learning_rate": 0.00023888052964188982, | |
| "loss": 3.958690490722656, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.6122427802048419, | |
| "grad_norm": 1.6529427766799927, | |
| "learning_rate": 0.00023877675967914325, | |
| "loss": 3.9609234619140623, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.6132804798323077, | |
| "grad_norm": 2.0239171981811523, | |
| "learning_rate": 0.00023867298971639667, | |
| "loss": 4.128135986328125, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.6143181794597736, | |
| "grad_norm": 3.8679206371307373, | |
| "learning_rate": 0.0002385692197536501, | |
| "loss": 4.005528869628907, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.6153558790872394, | |
| "grad_norm": 3.305494785308838, | |
| "learning_rate": 0.0002384654497909035, | |
| "loss": 3.9134161376953127, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.6163935787147052, | |
| "grad_norm": 1.640649676322937, | |
| "learning_rate": 0.0002383616798281569, | |
| "loss": 3.92852783203125, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.617431278342171, | |
| "grad_norm": 1.7184723615646362, | |
| "learning_rate": 0.00023825790986541036, | |
| "loss": 3.8771322631835936, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.618468977969637, | |
| "grad_norm": 2.6886117458343506, | |
| "learning_rate": 0.00023815413990266375, | |
| "loss": 4.047822875976562, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.6195066775971028, | |
| "grad_norm": 2.9485394954681396, | |
| "learning_rate": 0.00023805036993991717, | |
| "loss": 4.04974853515625, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.6205443772245686, | |
| "grad_norm": 18.998411178588867, | |
| "learning_rate": 0.00023794659997717057, | |
| "loss": 3.978843994140625, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.6215820768520344, | |
| "grad_norm": 1.6347628831863403, | |
| "learning_rate": 0.000237842830014424, | |
| "loss": 3.94311279296875, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.6226197764795003, | |
| "grad_norm": 4.1301798820495605, | |
| "learning_rate": 0.00023773906005167744, | |
| "loss": 4.044434814453125, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.6236574761069661, | |
| "grad_norm": 2.7278170585632324, | |
| "learning_rate": 0.00023763529008893083, | |
| "loss": 3.9771295166015626, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.6246951757344319, | |
| "grad_norm": 3.4196488857269287, | |
| "learning_rate": 0.00023753152012618426, | |
| "loss": 3.9663619995117188, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.6257328753618977, | |
| "grad_norm": 1.3134477138519287, | |
| "learning_rate": 0.00023742775016343765, | |
| "loss": 4.089789733886719, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.6267705749893636, | |
| "grad_norm": 4.490455627441406, | |
| "learning_rate": 0.0002373239802006911, | |
| "loss": 3.87512939453125, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.6278082746168294, | |
| "grad_norm": 3.0652222633361816, | |
| "learning_rate": 0.00023722021023794452, | |
| "loss": 3.893270263671875, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.6288459742442952, | |
| "grad_norm": 8.751646995544434, | |
| "learning_rate": 0.00023711644027519792, | |
| "loss": 3.862340393066406, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.629883673871761, | |
| "grad_norm": 2.9108734130859375, | |
| "learning_rate": 0.00023701267031245134, | |
| "loss": 3.9849557495117187, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.630921373499227, | |
| "grad_norm": 2.250643253326416, | |
| "learning_rate": 0.00023690890034970473, | |
| "loss": 3.955241394042969, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.6319590731266927, | |
| "grad_norm": 1.4363751411437988, | |
| "learning_rate": 0.00023680513038695818, | |
| "loss": 4.0179071044921875, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.6329967727541586, | |
| "grad_norm": 1.6399027109146118, | |
| "learning_rate": 0.00023670136042421158, | |
| "loss": 3.911060485839844, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.6340344723816245, | |
| "grad_norm": 2.371727228164673, | |
| "learning_rate": 0.000236597590461465, | |
| "loss": 3.9237380981445313, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 0.6350721720090903, | |
| "grad_norm": 1.6354718208312988, | |
| "learning_rate": 0.00023649382049871842, | |
| "loss": 4.036581420898438, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.6361098716365561, | |
| "grad_norm": 3.147254705429077, | |
| "learning_rate": 0.00023639005053597184, | |
| "loss": 4.009747619628906, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 0.6371475712640219, | |
| "grad_norm": 2.9439003467559814, | |
| "learning_rate": 0.00023628628057322526, | |
| "loss": 3.965068664550781, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.6381852708914878, | |
| "grad_norm": 2.8980836868286133, | |
| "learning_rate": 0.00023618251061047866, | |
| "loss": 3.99951171875, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.6392229705189536, | |
| "grad_norm": 2.862438201904297, | |
| "learning_rate": 0.00023607874064773208, | |
| "loss": 3.8896145629882812, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.6402606701464194, | |
| "grad_norm": 1.7125756740570068, | |
| "learning_rate": 0.00023597497068498548, | |
| "loss": 3.9900253295898436, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 0.6412983697738852, | |
| "grad_norm": 13.891119956970215, | |
| "learning_rate": 0.00023587120072223892, | |
| "loss": 3.8787249755859374, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.6423360694013511, | |
| "grad_norm": 3.5258827209472656, | |
| "learning_rate": 0.00023576743075949235, | |
| "loss": 3.940326843261719, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 0.6433737690288169, | |
| "grad_norm": 4.297271251678467, | |
| "learning_rate": 0.00023566366079674574, | |
| "loss": 3.8732571411132812, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.6444114686562827, | |
| "grad_norm": 3.574477195739746, | |
| "learning_rate": 0.00023555989083399916, | |
| "loss": 4.078603515625, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 0.6454491682837485, | |
| "grad_norm": 3.2514758110046387, | |
| "learning_rate": 0.00023545612087125259, | |
| "loss": 3.956298522949219, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.6464868679112145, | |
| "grad_norm": 2.582719326019287, | |
| "learning_rate": 0.000235352350908506, | |
| "loss": 3.8729116821289065, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 0.6475245675386803, | |
| "grad_norm": 2.445774793624878, | |
| "learning_rate": 0.00023524858094575943, | |
| "loss": 4.064724426269532, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.6485622671661461, | |
| "grad_norm": 4.912772178649902, | |
| "learning_rate": 0.00023514481098301282, | |
| "loss": 4.02049560546875, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.6495999667936119, | |
| "grad_norm": 3.490936040878296, | |
| "learning_rate": 0.00023504104102026627, | |
| "loss": 3.912366943359375, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 0.6506376664210778, | |
| "grad_norm": 2.109618902206421, | |
| "learning_rate": 0.00023493727105751967, | |
| "loss": 3.963838806152344, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 0.6516753660485436, | |
| "grad_norm": 12.706518173217773, | |
| "learning_rate": 0.0002348335010947731, | |
| "loss": 3.901888732910156, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.6527130656760094, | |
| "grad_norm": 4.266041278839111, | |
| "learning_rate": 0.00023472973113202648, | |
| "loss": 3.902781982421875, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 0.6537507653034752, | |
| "grad_norm": 3.4900457859039307, | |
| "learning_rate": 0.0002346259611692799, | |
| "loss": 3.8866873168945313, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.6547884649309411, | |
| "grad_norm": 2.4276134967803955, | |
| "learning_rate": 0.00023452219120653336, | |
| "loss": 3.8234634399414062, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 0.6558261645584069, | |
| "grad_norm": 2.8377914428710938, | |
| "learning_rate": 0.00023441842124378675, | |
| "loss": 3.836332092285156, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.6568638641858727, | |
| "grad_norm": 6.935495853424072, | |
| "learning_rate": 0.00023431465128104017, | |
| "loss": 4.100373229980469, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 0.6579015638133386, | |
| "grad_norm": 2.90283465385437, | |
| "learning_rate": 0.00023421088131829357, | |
| "loss": 3.9408758544921874, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 0.6589392634408044, | |
| "grad_norm": 2.8002378940582275, | |
| "learning_rate": 0.00023410711135554702, | |
| "loss": 3.9959124755859374, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.6599769630682703, | |
| "grad_norm": 6.091791152954102, | |
| "learning_rate": 0.0002340033413928004, | |
| "loss": 3.9287460327148436, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.661014662695736, | |
| "grad_norm": 1.2786389589309692, | |
| "learning_rate": 0.00023389957143005383, | |
| "loss": 4.015799560546875, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 0.662052362323202, | |
| "grad_norm": 1.4586912393569946, | |
| "learning_rate": 0.00023379580146730726, | |
| "loss": 3.89241455078125, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 0.6630900619506678, | |
| "grad_norm": 2.502657890319824, | |
| "learning_rate": 0.00023369203150456065, | |
| "loss": 3.9217596435546875, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 0.6641277615781336, | |
| "grad_norm": 3.8019394874572754, | |
| "learning_rate": 0.0002335882615418141, | |
| "loss": 3.91360595703125, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.6651654612055994, | |
| "grad_norm": 1.5058764219284058, | |
| "learning_rate": 0.0002334844915790675, | |
| "loss": 4.059972839355469, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 0.6662031608330653, | |
| "grad_norm": 2.416229248046875, | |
| "learning_rate": 0.00023338072161632092, | |
| "loss": 3.9887905883789063, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 0.6672408604605311, | |
| "grad_norm": 1.8767884969711304, | |
| "learning_rate": 0.00023327695165357434, | |
| "loss": 3.8748153686523437, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 0.6682785600879969, | |
| "grad_norm": 1.7000967264175415, | |
| "learning_rate": 0.00023317318169082776, | |
| "loss": 3.9118023681640626, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.6693162597154627, | |
| "grad_norm": 4.796393394470215, | |
| "learning_rate": 0.00023306941172808118, | |
| "loss": 3.9076058959960935, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.6703539593429286, | |
| "grad_norm": 3.117870807647705, | |
| "learning_rate": 0.00023296564176533458, | |
| "loss": 3.95484375, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 0.6713916589703944, | |
| "grad_norm": 1.6787638664245605, | |
| "learning_rate": 0.000232861871802588, | |
| "loss": 3.858246154785156, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 0.6724293585978602, | |
| "grad_norm": 5.671106815338135, | |
| "learning_rate": 0.0002327581018398414, | |
| "loss": 3.9156753540039064, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.673467058225326, | |
| "grad_norm": 7.058924674987793, | |
| "learning_rate": 0.00023265433187709484, | |
| "loss": 3.8724734497070314, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 0.674504757852792, | |
| "grad_norm": 4.8587422370910645, | |
| "learning_rate": 0.00023255056191434826, | |
| "loss": 3.958966064453125, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.6755424574802578, | |
| "grad_norm": 2.546802520751953, | |
| "learning_rate": 0.00023244679195160166, | |
| "loss": 3.9913558959960938, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 0.6765801571077236, | |
| "grad_norm": 1.8444024324417114, | |
| "learning_rate": 0.00023234302198885508, | |
| "loss": 4.089451293945313, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.6776178567351895, | |
| "grad_norm": 1.5202494859695435, | |
| "learning_rate": 0.0002322392520261085, | |
| "loss": 3.83590576171875, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 0.6786555563626553, | |
| "grad_norm": 2.554324150085449, | |
| "learning_rate": 0.00023213548206336192, | |
| "loss": 3.9957940673828123, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 0.6796932559901211, | |
| "grad_norm": 1.6007890701293945, | |
| "learning_rate": 0.00023203171210061532, | |
| "loss": 3.9022012329101563, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.6807309556175869, | |
| "grad_norm": 2.593081474304199, | |
| "learning_rate": 0.00023192794213786874, | |
| "loss": 3.944790954589844, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.6817686552450528, | |
| "grad_norm": 2.1474156379699707, | |
| "learning_rate": 0.0002318241721751222, | |
| "loss": 3.78737060546875, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 0.6828063548725186, | |
| "grad_norm": 3.1960246562957764, | |
| "learning_rate": 0.00023172040221237559, | |
| "loss": 3.9783554077148438, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 0.6838440544999844, | |
| "grad_norm": 3.8228328227996826, | |
| "learning_rate": 0.000231616632249629, | |
| "loss": 3.856565246582031, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 0.6848817541274502, | |
| "grad_norm": 11.939492225646973, | |
| "learning_rate": 0.0002315128622868824, | |
| "loss": 3.8156298828125, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.6859194537549161, | |
| "grad_norm": 1.8741025924682617, | |
| "learning_rate": 0.00023140909232413582, | |
| "loss": 3.9566534423828124, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 0.686957153382382, | |
| "grad_norm": 1.682139277458191, | |
| "learning_rate": 0.00023130532236138927, | |
| "loss": 3.9164004516601563, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 0.6879948530098478, | |
| "grad_norm": 1.1901954412460327, | |
| "learning_rate": 0.00023120155239864267, | |
| "loss": 4.0331982421875, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 0.6890325526373136, | |
| "grad_norm": 2.2226786613464355, | |
| "learning_rate": 0.0002310977824358961, | |
| "loss": 3.901326904296875, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.6900702522647795, | |
| "grad_norm": 2.28139328956604, | |
| "learning_rate": 0.00023099401247314948, | |
| "loss": 3.734437255859375, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.6911079518922453, | |
| "grad_norm": 3.9518322944641113, | |
| "learning_rate": 0.00023089024251040293, | |
| "loss": 3.890718994140625, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 0.6921456515197111, | |
| "grad_norm": 4.689309120178223, | |
| "learning_rate": 0.00023078647254765633, | |
| "loss": 3.83462646484375, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 0.6931833511471769, | |
| "grad_norm": 2.5103607177734375, | |
| "learning_rate": 0.00023068270258490975, | |
| "loss": 3.8714788818359374, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.6942210507746428, | |
| "grad_norm": 2.060398578643799, | |
| "learning_rate": 0.00023057893262216317, | |
| "loss": 3.8463949584960937, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 0.6952587504021086, | |
| "grad_norm": 3.9058265686035156, | |
| "learning_rate": 0.00023047516265941657, | |
| "loss": 3.955802001953125, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.6962964500295744, | |
| "grad_norm": 2.7018091678619385, | |
| "learning_rate": 0.00023037139269667002, | |
| "loss": 4.010853271484375, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 0.6973341496570403, | |
| "grad_norm": 1.759364366531372, | |
| "learning_rate": 0.0002302676227339234, | |
| "loss": 3.8436270141601563, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.6983718492845061, | |
| "grad_norm": 4.264219284057617, | |
| "learning_rate": 0.00023016385277117683, | |
| "loss": 3.906452941894531, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 0.6994095489119719, | |
| "grad_norm": 2.064502000808716, | |
| "learning_rate": 0.00023006008280843023, | |
| "loss": 3.9249755859375, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 0.7004472485394377, | |
| "grad_norm": 4.326413154602051, | |
| "learning_rate": 0.00022995631284568368, | |
| "loss": 3.9763421630859375, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.7014849481669037, | |
| "grad_norm": 1.5424126386642456, | |
| "learning_rate": 0.0002298525428829371, | |
| "loss": 3.9105490112304686, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.7025226477943695, | |
| "grad_norm": 3.1067123413085938, | |
| "learning_rate": 0.0002297487729201905, | |
| "loss": 4.066288146972656, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 0.7035603474218353, | |
| "grad_norm": 1.3455185890197754, | |
| "learning_rate": 0.00022964500295744392, | |
| "loss": 3.906605224609375, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 0.7045980470493011, | |
| "grad_norm": 4.567904472351074, | |
| "learning_rate": 0.0002295412329946973, | |
| "loss": 3.8274655151367187, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 0.705635746676767, | |
| "grad_norm": 1.4911061525344849, | |
| "learning_rate": 0.00022943746303195076, | |
| "loss": 3.8712289428710935, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.7066734463042328, | |
| "grad_norm": 1.8636422157287598, | |
| "learning_rate": 0.00022933369306920418, | |
| "loss": 3.9435845947265626, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 0.7077111459316986, | |
| "grad_norm": 4.616937637329102, | |
| "learning_rate": 0.00022922992310645758, | |
| "loss": 4.073515319824219, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 0.7087488455591644, | |
| "grad_norm": 2.339660167694092, | |
| "learning_rate": 0.000229126153143711, | |
| "loss": 3.752909851074219, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 0.7097865451866303, | |
| "grad_norm": 2.2960572242736816, | |
| "learning_rate": 0.00022902238318096442, | |
| "loss": 3.841389465332031, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.7108242448140961, | |
| "grad_norm": 1.9303183555603027, | |
| "learning_rate": 0.00022891861321821784, | |
| "loss": 4.007230529785156, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.7118619444415619, | |
| "grad_norm": 3.3750216960906982, | |
| "learning_rate": 0.00022881484325547124, | |
| "loss": 4.0530221557617185, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 0.7128996440690277, | |
| "grad_norm": 3.9443397521972656, | |
| "learning_rate": 0.00022871107329272466, | |
| "loss": 3.92802734375, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 0.7139373436964936, | |
| "grad_norm": 2.2526562213897705, | |
| "learning_rate": 0.0002286073033299781, | |
| "loss": 4.117896728515625, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.7149750433239594, | |
| "grad_norm": 3.631329298019409, | |
| "learning_rate": 0.0002285035333672315, | |
| "loss": 3.876401062011719, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 0.7160127429514253, | |
| "grad_norm": 2.0594444274902344, | |
| "learning_rate": 0.00022839976340448492, | |
| "loss": 3.9595294189453125, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.7170504425788912, | |
| "grad_norm": 6.801323413848877, | |
| "learning_rate": 0.00022829599344173832, | |
| "loss": 3.966697998046875, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 0.718088142206357, | |
| "grad_norm": 3.579699754714966, | |
| "learning_rate": 0.00022819222347899174, | |
| "loss": 3.9083868408203126, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.7191258418338228, | |
| "grad_norm": 3.9111030101776123, | |
| "learning_rate": 0.0002280884535162452, | |
| "loss": 4.020595092773437, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 0.7201635414612886, | |
| "grad_norm": 1.5465009212493896, | |
| "learning_rate": 0.00022798468355349858, | |
| "loss": 4.002583618164063, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 0.7212012410887545, | |
| "grad_norm": 2.5977070331573486, | |
| "learning_rate": 0.000227880913590752, | |
| "loss": 3.82881591796875, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.7222389407162203, | |
| "grad_norm": 3.807143211364746, | |
| "learning_rate": 0.0002277771436280054, | |
| "loss": 3.8127020263671874, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.7232766403436861, | |
| "grad_norm": 3.562692165374756, | |
| "learning_rate": 0.00022767337366525885, | |
| "loss": 3.861103820800781, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 0.7243143399711519, | |
| "grad_norm": 4.136765003204346, | |
| "learning_rate": 0.00022756960370251225, | |
| "loss": 3.817465515136719, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 0.7253520395986178, | |
| "grad_norm": 1.9534144401550293, | |
| "learning_rate": 0.00022746583373976567, | |
| "loss": 3.784884338378906, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 0.7263897392260836, | |
| "grad_norm": 2.2738490104675293, | |
| "learning_rate": 0.0002273620637770191, | |
| "loss": 3.9553741455078124, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.7274274388535494, | |
| "grad_norm": 8.41178035736084, | |
| "learning_rate": 0.00022725829381427248, | |
| "loss": 3.9581622314453124, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 0.7284651384810152, | |
| "grad_norm": 2.574738025665283, | |
| "learning_rate": 0.00022715452385152593, | |
| "loss": 3.865647888183594, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 0.7295028381084812, | |
| "grad_norm": 4.12198543548584, | |
| "learning_rate": 0.00022705075388877933, | |
| "loss": 3.8447744750976565, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 0.730540537735947, | |
| "grad_norm": 3.4615478515625, | |
| "learning_rate": 0.00022694698392603275, | |
| "loss": 3.8417919921875, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.7315782373634128, | |
| "grad_norm": 1.9662399291992188, | |
| "learning_rate": 0.00022684321396328614, | |
| "loss": 3.943636779785156, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.7326159369908786, | |
| "grad_norm": 6.054515361785889, | |
| "learning_rate": 0.0002267394440005396, | |
| "loss": 3.9477130126953126, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 0.7336536366183445, | |
| "grad_norm": 2.6368846893310547, | |
| "learning_rate": 0.00022663567403779302, | |
| "loss": 3.9134860229492188, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 0.7346913362458103, | |
| "grad_norm": 18.437114715576172, | |
| "learning_rate": 0.0002265319040750464, | |
| "loss": 3.9025979614257813, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.7357290358732761, | |
| "grad_norm": 3.9227664470672607, | |
| "learning_rate": 0.00022642813411229983, | |
| "loss": 3.9925546264648437, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 0.736766735500742, | |
| "grad_norm": 2.9096601009368896, | |
| "learning_rate": 0.00022632436414955323, | |
| "loss": 3.7520477294921877, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.7378044351282078, | |
| "grad_norm": 2.756199598312378, | |
| "learning_rate": 0.00022622059418680668, | |
| "loss": 3.7744400024414064, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 0.7388421347556736, | |
| "grad_norm": 4.398651123046875, | |
| "learning_rate": 0.0002261168242240601, | |
| "loss": 3.8754537963867186, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.7398798343831394, | |
| "grad_norm": 3.0455260276794434, | |
| "learning_rate": 0.0002260130542613135, | |
| "loss": 3.8303518676757813, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 0.7409175340106053, | |
| "grad_norm": 1.6435341835021973, | |
| "learning_rate": 0.00022590928429856692, | |
| "loss": 3.868741149902344, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 0.7419552336380711, | |
| "grad_norm": 2.460381507873535, | |
| "learning_rate": 0.00022580551433582034, | |
| "loss": 3.971143798828125, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.742992933265537, | |
| "grad_norm": 3.793260335922241, | |
| "learning_rate": 0.00022570174437307376, | |
| "loss": 3.9564599609375, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.7440306328930028, | |
| "grad_norm": 2.2400221824645996, | |
| "learning_rate": 0.00022559797441032715, | |
| "loss": 3.868074951171875, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 0.7450683325204687, | |
| "grad_norm": 4.521097660064697, | |
| "learning_rate": 0.00022549420444758058, | |
| "loss": 3.9104345703125, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 0.7461060321479345, | |
| "grad_norm": 2.454610824584961, | |
| "learning_rate": 0.00022539043448483402, | |
| "loss": 3.8415142822265627, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 0.7471437317754003, | |
| "grad_norm": 1.7384246587753296, | |
| "learning_rate": 0.00022528666452208742, | |
| "loss": 3.9767572021484376, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.7481814314028661, | |
| "grad_norm": 2.3506603240966797, | |
| "learning_rate": 0.00022518289455934084, | |
| "loss": 3.804529724121094, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 0.749219131030332, | |
| "grad_norm": 8.719681739807129, | |
| "learning_rate": 0.00022507912459659424, | |
| "loss": 3.6692437744140625, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 0.7502568306577978, | |
| "grad_norm": 2.188565254211426, | |
| "learning_rate": 0.00022497535463384766, | |
| "loss": 3.9966400146484373, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 0.7512945302852636, | |
| "grad_norm": 2.7061383724212646, | |
| "learning_rate": 0.00022487158467110108, | |
| "loss": 3.7955560302734375, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.7523322299127294, | |
| "grad_norm": 1.820816993713379, | |
| "learning_rate": 0.0002247678147083545, | |
| "loss": 3.800717468261719, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.7533699295401953, | |
| "grad_norm": 2.3510568141937256, | |
| "learning_rate": 0.00022466404474560792, | |
| "loss": 3.8987237548828126, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 0.7544076291676611, | |
| "grad_norm": 3.0852279663085938, | |
| "learning_rate": 0.00022456027478286132, | |
| "loss": 3.9560122680664063, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 0.7554453287951269, | |
| "grad_norm": 2.3377742767333984, | |
| "learning_rate": 0.00022445650482011477, | |
| "loss": 3.9077328491210936, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.7564830284225929, | |
| "grad_norm": 4.257030010223389, | |
| "learning_rate": 0.00022435273485736816, | |
| "loss": 3.915125732421875, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 0.7575207280500587, | |
| "grad_norm": 1.8238855600357056, | |
| "learning_rate": 0.00022424896489462158, | |
| "loss": 3.8456768798828125, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.7585584276775245, | |
| "grad_norm": 2.2102901935577393, | |
| "learning_rate": 0.000224145194931875, | |
| "loss": 3.9905462646484375, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 0.7595961273049903, | |
| "grad_norm": 6.003772735595703, | |
| "learning_rate": 0.0002240414249691284, | |
| "loss": 3.831954040527344, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.7606338269324562, | |
| "grad_norm": 2.209681272506714, | |
| "learning_rate": 0.00022393765500638185, | |
| "loss": 3.96739990234375, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 0.761671526559922, | |
| "grad_norm": 5.8811235427856445, | |
| "learning_rate": 0.00022383388504363525, | |
| "loss": 3.8418869018554687, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 0.7627092261873878, | |
| "grad_norm": 1.9358527660369873, | |
| "learning_rate": 0.00022373011508088867, | |
| "loss": 3.9846435546875, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.7637469258148536, | |
| "grad_norm": 4.668230056762695, | |
| "learning_rate": 0.00022362634511814206, | |
| "loss": 3.87702880859375, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.7647846254423195, | |
| "grad_norm": 2.1674551963806152, | |
| "learning_rate": 0.0002235225751553955, | |
| "loss": 3.9948715209960937, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 0.7658223250697853, | |
| "grad_norm": 3.276775360107422, | |
| "learning_rate": 0.00022341880519264893, | |
| "loss": 3.876432189941406, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 0.7668600246972511, | |
| "grad_norm": 2.382432222366333, | |
| "learning_rate": 0.00022331503522990233, | |
| "loss": 3.9535626220703124, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 0.7678977243247169, | |
| "grad_norm": 2.288184404373169, | |
| "learning_rate": 0.00022321126526715575, | |
| "loss": 3.962213134765625, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.7689354239521828, | |
| "grad_norm": 11.535764694213867, | |
| "learning_rate": 0.00022310749530440914, | |
| "loss": 3.839007568359375, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 0.7699731235796486, | |
| "grad_norm": 2.520615816116333, | |
| "learning_rate": 0.0002230037253416626, | |
| "loss": 3.942041015625, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 0.7710108232071144, | |
| "grad_norm": 5.035190582275391, | |
| "learning_rate": 0.000222899955378916, | |
| "loss": 3.827362365722656, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 0.7720485228345803, | |
| "grad_norm": 2.1133370399475098, | |
| "learning_rate": 0.0002227961854161694, | |
| "loss": 3.8085946655273437, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.7730862224620462, | |
| "grad_norm": 3.3813223838806152, | |
| "learning_rate": 0.00022269241545342283, | |
| "loss": 3.8528924560546876, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.774123922089512, | |
| "grad_norm": 2.5912599563598633, | |
| "learning_rate": 0.00022258864549067625, | |
| "loss": 4.025367126464844, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 0.7751616217169778, | |
| "grad_norm": 8.560553550720215, | |
| "learning_rate": 0.00022248487552792968, | |
| "loss": 3.8942611694335936, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 0.7761993213444436, | |
| "grad_norm": 2.7210657596588135, | |
| "learning_rate": 0.00022238110556518307, | |
| "loss": 3.7450421142578123, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.7772370209719095, | |
| "grad_norm": 3.06449031829834, | |
| "learning_rate": 0.0002222773356024365, | |
| "loss": 4.058497619628906, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 0.7782747205993753, | |
| "grad_norm": 2.6780056953430176, | |
| "learning_rate": 0.00022217356563968994, | |
| "loss": 3.908025207519531, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.7793124202268411, | |
| "grad_norm": 2.579087257385254, | |
| "learning_rate": 0.00022206979567694334, | |
| "loss": 3.914963684082031, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 0.780350119854307, | |
| "grad_norm": 6.844696998596191, | |
| "learning_rate": 0.00022196602571419676, | |
| "loss": 3.8832046508789064, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.7813878194817728, | |
| "grad_norm": 7.694204330444336, | |
| "learning_rate": 0.00022186225575145015, | |
| "loss": 3.9718392944335936, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 0.7824255191092386, | |
| "grad_norm": 9.200462341308594, | |
| "learning_rate": 0.00022175848578870358, | |
| "loss": 3.859333801269531, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 0.7834632187367044, | |
| "grad_norm": 4.622501850128174, | |
| "learning_rate": 0.000221654715825957, | |
| "loss": 3.9099847412109376, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.7845009183641704, | |
| "grad_norm": 1.9592938423156738, | |
| "learning_rate": 0.00022155094586321042, | |
| "loss": 3.8727886962890623, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.7855386179916362, | |
| "grad_norm": 4.431970119476318, | |
| "learning_rate": 0.00022144717590046384, | |
| "loss": 3.9126931762695314, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 0.786576317619102, | |
| "grad_norm": 4.069213390350342, | |
| "learning_rate": 0.00022134340593771724, | |
| "loss": 3.8846563720703124, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 0.7876140172465678, | |
| "grad_norm": 2.009706497192383, | |
| "learning_rate": 0.00022123963597497068, | |
| "loss": 3.951784362792969, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 0.7886517168740337, | |
| "grad_norm": 3.475999116897583, | |
| "learning_rate": 0.00022113586601222408, | |
| "loss": 3.8493191528320314, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.7896894165014995, | |
| "grad_norm": 2.45090913772583, | |
| "learning_rate": 0.0002210320960494775, | |
| "loss": 3.938821105957031, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 0.7907271161289653, | |
| "grad_norm": 3.2572762966156006, | |
| "learning_rate": 0.0002209283260867309, | |
| "loss": 3.8848175048828124, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 0.7917648157564311, | |
| "grad_norm": 2.2695441246032715, | |
| "learning_rate": 0.00022082455612398432, | |
| "loss": 3.8166204833984376, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 0.792802515383897, | |
| "grad_norm": 6.520568370819092, | |
| "learning_rate": 0.00022072078616123777, | |
| "loss": 3.8947482299804688, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.7938402150113628, | |
| "grad_norm": 9.233070373535156, | |
| "learning_rate": 0.00022061701619849116, | |
| "loss": 3.8395782470703126, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.7948779146388286, | |
| "grad_norm": 1.5229090452194214, | |
| "learning_rate": 0.00022051324623574458, | |
| "loss": 3.979128723144531, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 0.7959156142662944, | |
| "grad_norm": 3.9737226963043213, | |
| "learning_rate": 0.00022040947627299798, | |
| "loss": 3.890586242675781, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 0.7969533138937603, | |
| "grad_norm": 1.9717073440551758, | |
| "learning_rate": 0.00022030570631025143, | |
| "loss": 3.971199951171875, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.7979910135212261, | |
| "grad_norm": 3.3416688442230225, | |
| "learning_rate": 0.00022020193634750485, | |
| "loss": 3.961914367675781, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 0.799028713148692, | |
| "grad_norm": 2.037693738937378, | |
| "learning_rate": 0.00022009816638475824, | |
| "loss": 3.8637881469726563, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.8000664127761579, | |
| "grad_norm": 5.026768207550049, | |
| "learning_rate": 0.00021999439642201167, | |
| "loss": 3.9692828369140627, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 0.8011041124036237, | |
| "grad_norm": 2.230590581893921, | |
| "learning_rate": 0.00021989062645926506, | |
| "loss": 3.852244873046875, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.8021418120310895, | |
| "grad_norm": 2.0119717121124268, | |
| "learning_rate": 0.0002197868564965185, | |
| "loss": 3.9774188232421874, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 0.8031795116585553, | |
| "grad_norm": 5.08432674407959, | |
| "learning_rate": 0.0002196830865337719, | |
| "loss": 3.8257907104492186, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 0.8042172112860212, | |
| "grad_norm": 3.0086820125579834, | |
| "learning_rate": 0.00021957931657102533, | |
| "loss": 3.865489501953125, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.805254910913487, | |
| "grad_norm": 4.534199237823486, | |
| "learning_rate": 0.00021947554660827875, | |
| "loss": 3.875529479980469, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.8062926105409528, | |
| "grad_norm": 2.68324613571167, | |
| "learning_rate": 0.00021937177664553217, | |
| "loss": 3.928450927734375, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 0.8073303101684186, | |
| "grad_norm": 3.7302651405334473, | |
| "learning_rate": 0.0002192680066827856, | |
| "loss": 3.9593939208984374, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 0.8083680097958845, | |
| "grad_norm": 2.8160176277160645, | |
| "learning_rate": 0.000219164236720039, | |
| "loss": 4.003828735351562, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 0.8094057094233503, | |
| "grad_norm": 2.314183473587036, | |
| "learning_rate": 0.0002190604667572924, | |
| "loss": 3.988243408203125, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.8104434090508161, | |
| "grad_norm": 2.661289691925049, | |
| "learning_rate": 0.0002189566967945458, | |
| "loss": 3.9358248901367188, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 0.8114811086782819, | |
| "grad_norm": 5.065707206726074, | |
| "learning_rate": 0.00021885292683179925, | |
| "loss": 3.7886788940429685, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 0.8125188083057479, | |
| "grad_norm": 5.173181056976318, | |
| "learning_rate": 0.00021874915686905268, | |
| "loss": 3.790332946777344, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 0.8135565079332137, | |
| "grad_norm": 2.573274850845337, | |
| "learning_rate": 0.00021864538690630607, | |
| "loss": 3.975767822265625, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.8145942075606795, | |
| "grad_norm": 3.010472536087036, | |
| "learning_rate": 0.0002185416169435595, | |
| "loss": 3.861507568359375, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.8156319071881453, | |
| "grad_norm": 2.632009983062744, | |
| "learning_rate": 0.00021843784698081291, | |
| "loss": 3.9550189208984374, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 0.8166696068156112, | |
| "grad_norm": 5.590510368347168, | |
| "learning_rate": 0.00021833407701806634, | |
| "loss": 3.924696044921875, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 0.817707306443077, | |
| "grad_norm": 4.052700042724609, | |
| "learning_rate": 0.00021823030705531976, | |
| "loss": 3.831592712402344, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.8187450060705428, | |
| "grad_norm": 2.7363622188568115, | |
| "learning_rate": 0.00021812653709257315, | |
| "loss": 4.028314208984375, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 0.8197827056980087, | |
| "grad_norm": 4.773056507110596, | |
| "learning_rate": 0.0002180227671298266, | |
| "loss": 3.7855130004882813, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.8208204053254745, | |
| "grad_norm": 2.6858768463134766, | |
| "learning_rate": 0.00021791899716708, | |
| "loss": 3.9081768798828125, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 0.8218581049529403, | |
| "grad_norm": 4.861189842224121, | |
| "learning_rate": 0.00021781522720433342, | |
| "loss": 3.99755126953125, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.8228958045804061, | |
| "grad_norm": 2.1088833808898926, | |
| "learning_rate": 0.00021771145724158681, | |
| "loss": 3.8839871215820314, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 0.823933504207872, | |
| "grad_norm": 2.911973237991333, | |
| "learning_rate": 0.00021760768727884024, | |
| "loss": 3.864557189941406, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 0.8249712038353378, | |
| "grad_norm": 6.847414016723633, | |
| "learning_rate": 0.00021750391731609368, | |
| "loss": 3.868388366699219, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.8260089034628036, | |
| "grad_norm": 2.0376992225646973, | |
| "learning_rate": 0.00021740014735334708, | |
| "loss": 3.9390859985351563, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.8270466030902694, | |
| "grad_norm": 4.972707271575928, | |
| "learning_rate": 0.0002172963773906005, | |
| "loss": 3.8582077026367188, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 0.8280843027177354, | |
| "grad_norm": 7.205460071563721, | |
| "learning_rate": 0.0002171926074278539, | |
| "loss": 3.839405212402344, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 0.8291220023452012, | |
| "grad_norm": 12.633910179138184, | |
| "learning_rate": 0.00021708883746510735, | |
| "loss": 3.831856384277344, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 0.830159701972667, | |
| "grad_norm": 4.479480743408203, | |
| "learning_rate": 0.00021698506750236074, | |
| "loss": 3.795959167480469, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.8311974016001328, | |
| "grad_norm": 4.281702995300293, | |
| "learning_rate": 0.00021688129753961416, | |
| "loss": 4.039653625488281, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 0.8322351012275987, | |
| "grad_norm": 3.5497429370880127, | |
| "learning_rate": 0.00021677752757686758, | |
| "loss": 4.000389709472656, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 0.8332728008550645, | |
| "grad_norm": 2.431144952774048, | |
| "learning_rate": 0.00021667375761412098, | |
| "loss": 3.9792193603515624, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 0.8343105004825303, | |
| "grad_norm": 13.734992980957031, | |
| "learning_rate": 0.00021656998765137443, | |
| "loss": 3.8038821411132813, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 0.8353482001099961, | |
| "grad_norm": 1.6895164251327515, | |
| "learning_rate": 0.00021646621768862782, | |
| "loss": 3.7827383422851564, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.836385899737462, | |
| "grad_norm": 3.4907968044281006, | |
| "learning_rate": 0.00021636244772588124, | |
| "loss": 3.882090759277344, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 0.8374235993649278, | |
| "grad_norm": 2.345144510269165, | |
| "learning_rate": 0.0002162586777631347, | |
| "loss": 3.8400167846679687, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 0.8384612989923936, | |
| "grad_norm": 3.4369494915008545, | |
| "learning_rate": 0.0002161549078003881, | |
| "loss": 3.7776190185546876, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 0.8394989986198595, | |
| "grad_norm": 5.47845983505249, | |
| "learning_rate": 0.0002160511378376415, | |
| "loss": 4.044588623046875, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 0.8405366982473254, | |
| "grad_norm": 1.5931683778762817, | |
| "learning_rate": 0.0002159473678748949, | |
| "loss": 3.9998703002929688, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.8415743978747912, | |
| "grad_norm": 3.1940066814422607, | |
| "learning_rate": 0.00021584359791214833, | |
| "loss": 3.8839016723632813, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 0.842612097502257, | |
| "grad_norm": 9.511052131652832, | |
| "learning_rate": 0.00021573982794940172, | |
| "loss": 3.9398565673828125, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 0.8436497971297229, | |
| "grad_norm": 1.9886616468429565, | |
| "learning_rate": 0.00021563605798665517, | |
| "loss": 3.82979736328125, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 0.8446874967571887, | |
| "grad_norm": 2.362103223800659, | |
| "learning_rate": 0.0002155322880239086, | |
| "loss": 3.8248995971679687, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 0.8457251963846545, | |
| "grad_norm": 1.7605165243148804, | |
| "learning_rate": 0.000215428518061162, | |
| "loss": 3.7230010986328126, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.8467628960121203, | |
| "grad_norm": 1.8303929567337036, | |
| "learning_rate": 0.0002153247480984154, | |
| "loss": 3.861679992675781, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 0.8478005956395862, | |
| "grad_norm": 4.539703845977783, | |
| "learning_rate": 0.00021522097813566883, | |
| "loss": 3.8151321411132812, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 0.848838295267052, | |
| "grad_norm": 1.8927255868911743, | |
| "learning_rate": 0.00021511720817292225, | |
| "loss": 3.999220886230469, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 0.8498759948945178, | |
| "grad_norm": 3.66632080078125, | |
| "learning_rate": 0.00021501343821017565, | |
| "loss": 3.9149603271484374, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 0.8509136945219836, | |
| "grad_norm": 6.1261887550354, | |
| "learning_rate": 0.00021490966824742907, | |
| "loss": 3.808494873046875, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.8519513941494495, | |
| "grad_norm": 2.9073901176452637, | |
| "learning_rate": 0.00021480589828468252, | |
| "loss": 3.8501129150390625, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 0.8529890937769153, | |
| "grad_norm": 1.9176596403121948, | |
| "learning_rate": 0.00021470212832193591, | |
| "loss": 3.9358505249023437, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 0.8540267934043811, | |
| "grad_norm": 2.3072047233581543, | |
| "learning_rate": 0.00021459835835918934, | |
| "loss": 3.8934945678710937, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 0.855064493031847, | |
| "grad_norm": 2.7599945068359375, | |
| "learning_rate": 0.00021449458839644273, | |
| "loss": 3.929814453125, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 0.8561021926593129, | |
| "grad_norm": 2.0721237659454346, | |
| "learning_rate": 0.00021439081843369615, | |
| "loss": 3.86040283203125, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.8571398922867787, | |
| "grad_norm": 5.156016826629639, | |
| "learning_rate": 0.0002142870484709496, | |
| "loss": 3.8864166259765627, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 0.8581775919142445, | |
| "grad_norm": 4.168294906616211, | |
| "learning_rate": 0.000214183278508203, | |
| "loss": 4.001069030761719, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 0.8592152915417104, | |
| "grad_norm": 1.7126719951629639, | |
| "learning_rate": 0.00021407950854545642, | |
| "loss": 3.946321716308594, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 0.8602529911691762, | |
| "grad_norm": 5.809075355529785, | |
| "learning_rate": 0.0002139757385827098, | |
| "loss": 3.82521240234375, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 0.861290690796642, | |
| "grad_norm": 5.8849921226501465, | |
| "learning_rate": 0.00021387196861996326, | |
| "loss": 3.7766848754882814, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.8623283904241078, | |
| "grad_norm": 2.317793607711792, | |
| "learning_rate": 0.00021376819865721666, | |
| "loss": 4.01570068359375, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 0.8633660900515737, | |
| "grad_norm": 19.14999008178711, | |
| "learning_rate": 0.00021366442869447008, | |
| "loss": 3.760934143066406, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 0.8644037896790395, | |
| "grad_norm": 2.025818109512329, | |
| "learning_rate": 0.0002135606587317235, | |
| "loss": 3.9255300903320314, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 0.8654414893065053, | |
| "grad_norm": 3.068112373352051, | |
| "learning_rate": 0.0002134568887689769, | |
| "loss": 3.821394348144531, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 0.8664791889339711, | |
| "grad_norm": 8.730904579162598, | |
| "learning_rate": 0.00021335311880623034, | |
| "loss": 3.8662478637695314, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.867516888561437, | |
| "grad_norm": 2.9956910610198975, | |
| "learning_rate": 0.00021324934884348374, | |
| "loss": 3.8266961669921873, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 0.8685545881889029, | |
| "grad_norm": 2.774705410003662, | |
| "learning_rate": 0.00021314557888073716, | |
| "loss": 3.8334832763671876, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 0.8695922878163687, | |
| "grad_norm": 1.9926444292068481, | |
| "learning_rate": 0.00021304180891799056, | |
| "loss": 3.973898620605469, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 0.8706299874438345, | |
| "grad_norm": 1.8433290719985962, | |
| "learning_rate": 0.000212938038955244, | |
| "loss": 3.8273077392578125, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 0.8716676870713004, | |
| "grad_norm": 5.3389410972595215, | |
| "learning_rate": 0.00021283426899249743, | |
| "loss": 3.8604061889648436, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.8727053866987662, | |
| "grad_norm": 7.391428470611572, | |
| "learning_rate": 0.00021273049902975082, | |
| "loss": 3.8056671142578127, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 0.873743086326232, | |
| "grad_norm": 5.367404937744141, | |
| "learning_rate": 0.00021262672906700424, | |
| "loss": 3.8744406127929687, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 0.8747807859536978, | |
| "grad_norm": 3.1199004650115967, | |
| "learning_rate": 0.00021252295910425764, | |
| "loss": 3.8992080688476562, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 0.8758184855811637, | |
| "grad_norm": 1.8603098392486572, | |
| "learning_rate": 0.0002124191891415111, | |
| "loss": 3.8311639404296876, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 0.8768561852086295, | |
| "grad_norm": 2.5739691257476807, | |
| "learning_rate": 0.0002123154191787645, | |
| "loss": 3.754921569824219, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.8778938848360953, | |
| "grad_norm": 3.090057134628296, | |
| "learning_rate": 0.0002122116492160179, | |
| "loss": 3.74908935546875, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 0.8789315844635612, | |
| "grad_norm": 9.258840560913086, | |
| "learning_rate": 0.00021210787925327133, | |
| "loss": 3.985562744140625, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 0.879969284091027, | |
| "grad_norm": 3.738255262374878, | |
| "learning_rate": 0.00021200410929052475, | |
| "loss": 3.9656732177734373, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 0.8810069837184928, | |
| "grad_norm": 3.415017604827881, | |
| "learning_rate": 0.00021190033932777817, | |
| "loss": 3.958587341308594, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 0.8820446833459586, | |
| "grad_norm": 6.633699893951416, | |
| "learning_rate": 0.00021179656936503157, | |
| "loss": 3.866285705566406, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.8830823829734246, | |
| "grad_norm": 1.7935473918914795, | |
| "learning_rate": 0.000211692799402285, | |
| "loss": 3.9740695190429687, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 0.8841200826008904, | |
| "grad_norm": 2.706197500228882, | |
| "learning_rate": 0.00021158902943953844, | |
| "loss": 3.8669891357421875, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 0.8851577822283562, | |
| "grad_norm": 4.353029727935791, | |
| "learning_rate": 0.00021148525947679183, | |
| "loss": 3.881668701171875, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 0.886195481855822, | |
| "grad_norm": 3.0080366134643555, | |
| "learning_rate": 0.00021138148951404525, | |
| "loss": 3.8229278564453124, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 0.8872331814832879, | |
| "grad_norm": 7.4073028564453125, | |
| "learning_rate": 0.00021127771955129865, | |
| "loss": 4.015174560546875, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.8882708811107537, | |
| "grad_norm": 4.174534320831299, | |
| "learning_rate": 0.00021117394958855207, | |
| "loss": 3.8184585571289062, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 0.8893085807382195, | |
| "grad_norm": 5.683806896209717, | |
| "learning_rate": 0.0002110701796258055, | |
| "loss": 3.8243557739257814, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 0.8903462803656853, | |
| "grad_norm": 2.076599597930908, | |
| "learning_rate": 0.00021096640966305891, | |
| "loss": 3.71376220703125, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 0.8913839799931512, | |
| "grad_norm": 2.4622974395751953, | |
| "learning_rate": 0.00021086263970031234, | |
| "loss": 3.85018310546875, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 0.892421679620617, | |
| "grad_norm": 2.3247082233428955, | |
| "learning_rate": 0.00021075886973756573, | |
| "loss": 3.9427032470703125, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.8934593792480828, | |
| "grad_norm": 5.115243911743164, | |
| "learning_rate": 0.00021065509977481918, | |
| "loss": 3.6884475708007813, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 0.8944970788755486, | |
| "grad_norm": 5.306711196899414, | |
| "learning_rate": 0.00021055132981207257, | |
| "loss": 3.8416738891601563, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 0.8955347785030146, | |
| "grad_norm": 1.5796631574630737, | |
| "learning_rate": 0.000210447559849326, | |
| "loss": 3.874592590332031, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 0.8965724781304804, | |
| "grad_norm": 1.6183887720108032, | |
| "learning_rate": 0.00021034378988657942, | |
| "loss": 3.840068054199219, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 0.8976101777579462, | |
| "grad_norm": 3.1412158012390137, | |
| "learning_rate": 0.0002102400199238328, | |
| "loss": 4.0432958984375, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.898647877385412, | |
| "grad_norm": 1.6547956466674805, | |
| "learning_rate": 0.00021013624996108626, | |
| "loss": 3.829620361328125, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 0.8996855770128779, | |
| "grad_norm": 9.84925365447998, | |
| "learning_rate": 0.00021003247999833966, | |
| "loss": 3.74409912109375, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 0.9007232766403437, | |
| "grad_norm": 4.718574523925781, | |
| "learning_rate": 0.00020992871003559308, | |
| "loss": 3.8265109252929688, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 0.9017609762678095, | |
| "grad_norm": 4.692354679107666, | |
| "learning_rate": 0.00020982494007284647, | |
| "loss": 3.9203875732421873, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 0.9027986758952754, | |
| "grad_norm": 3.620683431625366, | |
| "learning_rate": 0.00020972117011009992, | |
| "loss": 3.9122955322265627, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.9038363755227412, | |
| "grad_norm": 4.431119918823242, | |
| "learning_rate": 0.00020961740014735334, | |
| "loss": 3.9402545166015623, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 0.904874075150207, | |
| "grad_norm": 3.734344005584717, | |
| "learning_rate": 0.00020951363018460674, | |
| "loss": 3.8481884765625, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 0.9059117747776728, | |
| "grad_norm": 3.735985279083252, | |
| "learning_rate": 0.00020940986022186016, | |
| "loss": 3.8412353515625, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 0.9069494744051387, | |
| "grad_norm": 2.774721145629883, | |
| "learning_rate": 0.00020930609025911356, | |
| "loss": 3.76121337890625, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 0.9079871740326045, | |
| "grad_norm": 13.096595764160156, | |
| "learning_rate": 0.000209202320296367, | |
| "loss": 3.9009844970703127, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.9090248736600703, | |
| "grad_norm": 5.561835765838623, | |
| "learning_rate": 0.0002090985503336204, | |
| "loss": 3.7489013671875, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 0.9100625732875361, | |
| "grad_norm": 5.21470832824707, | |
| "learning_rate": 0.00020899478037087382, | |
| "loss": 3.9491476440429687, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 0.9111002729150021, | |
| "grad_norm": 3.611980438232422, | |
| "learning_rate": 0.00020889101040812724, | |
| "loss": 3.8744741821289064, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 0.9121379725424679, | |
| "grad_norm": 3.670480489730835, | |
| "learning_rate": 0.00020878724044538067, | |
| "loss": 3.8484326171875, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 0.9131756721699337, | |
| "grad_norm": 2.46195387840271, | |
| "learning_rate": 0.0002086834704826341, | |
| "loss": 3.8545870971679688, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.9142133717973995, | |
| "grad_norm": 2.256782054901123, | |
| "learning_rate": 0.00020857970051988748, | |
| "loss": 3.788062744140625, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 0.9152510714248654, | |
| "grad_norm": 1.5597251653671265, | |
| "learning_rate": 0.0002084759305571409, | |
| "loss": 3.8967153930664065, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 0.9162887710523312, | |
| "grad_norm": 4.607747554779053, | |
| "learning_rate": 0.00020837216059439435, | |
| "loss": 3.84433837890625, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 0.917326470679797, | |
| "grad_norm": 2.7213637828826904, | |
| "learning_rate": 0.00020826839063164775, | |
| "loss": 3.6432476806640626, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 0.9183641703072628, | |
| "grad_norm": 1.6943309307098389, | |
| "learning_rate": 0.00020816462066890117, | |
| "loss": 3.942064208984375, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.9194018699347287, | |
| "grad_norm": 1.9761497974395752, | |
| "learning_rate": 0.00020806085070615457, | |
| "loss": 3.757283020019531, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 0.9204395695621945, | |
| "grad_norm": 2.720459461212158, | |
| "learning_rate": 0.000207957080743408, | |
| "loss": 3.723210754394531, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 0.9214772691896603, | |
| "grad_norm": 2.986565589904785, | |
| "learning_rate": 0.0002078533107806614, | |
| "loss": 3.9739913940429688, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 0.9225149688171262, | |
| "grad_norm": 2.682279348373413, | |
| "learning_rate": 0.00020774954081791483, | |
| "loss": 3.706415100097656, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 0.923552668444592, | |
| "grad_norm": 14.281532287597656, | |
| "learning_rate": 0.00020764577085516825, | |
| "loss": 3.799072570800781, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.9245903680720579, | |
| "grad_norm": 3.1239538192749023, | |
| "learning_rate": 0.00020754200089242165, | |
| "loss": 3.8822201538085936, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 0.9256280676995237, | |
| "grad_norm": 7.4986252784729, | |
| "learning_rate": 0.0002074382309296751, | |
| "loss": 3.852564392089844, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 0.9266657673269896, | |
| "grad_norm": 4.3345441818237305, | |
| "learning_rate": 0.0002073344609669285, | |
| "loss": 3.890749206542969, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 0.9277034669544554, | |
| "grad_norm": 2.6886496543884277, | |
| "learning_rate": 0.0002072306910041819, | |
| "loss": 3.8261907958984374, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 0.9287411665819212, | |
| "grad_norm": 2.2986016273498535, | |
| "learning_rate": 0.0002071269210414353, | |
| "loss": 3.8075076293945314, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.929778866209387, | |
| "grad_norm": 11.309110641479492, | |
| "learning_rate": 0.00020702315107868873, | |
| "loss": 3.829825744628906, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 0.9308165658368529, | |
| "grad_norm": 2.784146308898926, | |
| "learning_rate": 0.00020691938111594218, | |
| "loss": 3.7934060668945313, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 0.9318542654643187, | |
| "grad_norm": 2.3935048580169678, | |
| "learning_rate": 0.00020681561115319557, | |
| "loss": 3.882371826171875, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 0.9328919650917845, | |
| "grad_norm": 3.6735377311706543, | |
| "learning_rate": 0.000206711841190449, | |
| "loss": 3.842451171875, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 0.9339296647192503, | |
| "grad_norm": 3.037416696548462, | |
| "learning_rate": 0.0002066080712277024, | |
| "loss": 3.9087152099609375, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.9349673643467162, | |
| "grad_norm": 9.315804481506348, | |
| "learning_rate": 0.00020650430126495584, | |
| "loss": 3.773963623046875, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 0.936005063974182, | |
| "grad_norm": 5.039952278137207, | |
| "learning_rate": 0.00020640053130220926, | |
| "loss": 3.7935626220703127, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 0.9370427636016478, | |
| "grad_norm": 5.707028388977051, | |
| "learning_rate": 0.00020629676133946266, | |
| "loss": 3.775277404785156, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 0.9380804632291136, | |
| "grad_norm": 3.8109843730926514, | |
| "learning_rate": 0.00020619299137671608, | |
| "loss": 3.779449462890625, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 0.9391181628565796, | |
| "grad_norm": 2.9235146045684814, | |
| "learning_rate": 0.00020608922141396947, | |
| "loss": 3.8383111572265625, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.9401558624840454, | |
| "grad_norm": 1.6856282949447632, | |
| "learning_rate": 0.00020598545145122292, | |
| "loss": 3.8841232299804687, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 0.9411935621115112, | |
| "grad_norm": 7.263090133666992, | |
| "learning_rate": 0.00020588168148847632, | |
| "loss": 3.9575741577148436, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 0.9422312617389771, | |
| "grad_norm": 3.6679883003234863, | |
| "learning_rate": 0.00020577791152572974, | |
| "loss": 3.81220947265625, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 0.9432689613664429, | |
| "grad_norm": 5.708615303039551, | |
| "learning_rate": 0.0002056741415629832, | |
| "loss": 3.807239685058594, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 0.9443066609939087, | |
| "grad_norm": 4.463714122772217, | |
| "learning_rate": 0.00020557037160023658, | |
| "loss": 3.841280517578125, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.9453443606213745, | |
| "grad_norm": 10.150075912475586, | |
| "learning_rate": 0.00020546660163749, | |
| "loss": 3.75313232421875, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 0.9463820602488404, | |
| "grad_norm": 11.987652778625488, | |
| "learning_rate": 0.0002053628316747434, | |
| "loss": 3.903273620605469, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 0.9474197598763062, | |
| "grad_norm": 4.522410869598389, | |
| "learning_rate": 0.00020525906171199682, | |
| "loss": 3.760314636230469, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 0.948457459503772, | |
| "grad_norm": 4.449744701385498, | |
| "learning_rate": 0.00020515529174925022, | |
| "loss": 3.685667724609375, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 0.9494951591312378, | |
| "grad_norm": 1.8593145608901978, | |
| "learning_rate": 0.00020505152178650367, | |
| "loss": 3.7343402099609375, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.9505328587587037, | |
| "grad_norm": 2.4731132984161377, | |
| "learning_rate": 0.0002049477518237571, | |
| "loss": 3.783785705566406, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 0.9515705583861696, | |
| "grad_norm": 1.820862889289856, | |
| "learning_rate": 0.00020484398186101048, | |
| "loss": 3.719476318359375, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 0.9526082580136354, | |
| "grad_norm": 2.214238166809082, | |
| "learning_rate": 0.0002047402118982639, | |
| "loss": 3.7817031860351564, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 0.9536459576411012, | |
| "grad_norm": 3.6466450691223145, | |
| "learning_rate": 0.00020463644193551733, | |
| "loss": 3.7672024536132813, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 0.9546836572685671, | |
| "grad_norm": 5.454410076141357, | |
| "learning_rate": 0.00020453267197277075, | |
| "loss": 3.77567626953125, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.9557213568960329, | |
| "grad_norm": 20.138710021972656, | |
| "learning_rate": 0.00020442890201002417, | |
| "loss": 3.7506854248046877, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 0.9567590565234987, | |
| "grad_norm": 2.0090079307556152, | |
| "learning_rate": 0.00020432513204727756, | |
| "loss": 3.8082257080078126, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 0.9577967561509645, | |
| "grad_norm": 2.6881604194641113, | |
| "learning_rate": 0.00020422136208453101, | |
| "loss": 4.051754150390625, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 0.9588344557784304, | |
| "grad_norm": 3.293210029602051, | |
| "learning_rate": 0.0002041175921217844, | |
| "loss": 3.702369384765625, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 0.9598721554058962, | |
| "grad_norm": 5.354658126831055, | |
| "learning_rate": 0.00020401382215903783, | |
| "loss": 3.8296829223632813, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.960909855033362, | |
| "grad_norm": 2.285318374633789, | |
| "learning_rate": 0.00020391005219629123, | |
| "loss": 3.8205487060546877, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 0.9619475546608279, | |
| "grad_norm": 3.3139116764068604, | |
| "learning_rate": 0.00020380628223354465, | |
| "loss": 3.9517453002929686, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 0.9629852542882937, | |
| "grad_norm": 4.242766380310059, | |
| "learning_rate": 0.0002037025122707981, | |
| "loss": 3.819052429199219, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 0.9640229539157595, | |
| "grad_norm": 11.361218452453613, | |
| "learning_rate": 0.0002035987423080515, | |
| "loss": 3.8673443603515625, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 0.9650606535432253, | |
| "grad_norm": 1.6263092756271362, | |
| "learning_rate": 0.0002034949723453049, | |
| "loss": 3.6743267822265624, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.9660983531706913, | |
| "grad_norm": 3.191160202026367, | |
| "learning_rate": 0.0002033912023825583, | |
| "loss": 3.85127685546875, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 0.9671360527981571, | |
| "grad_norm": 14.219719886779785, | |
| "learning_rate": 0.00020328743241981176, | |
| "loss": 3.8775042724609374, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 0.9681737524256229, | |
| "grad_norm": 2.592212200164795, | |
| "learning_rate": 0.00020318366245706515, | |
| "loss": 3.784809265136719, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 0.9692114520530887, | |
| "grad_norm": 2.058199644088745, | |
| "learning_rate": 0.00020307989249431857, | |
| "loss": 3.7654934692382813, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 0.9702491516805546, | |
| "grad_norm": 3.3060290813446045, | |
| "learning_rate": 0.000202976122531572, | |
| "loss": 3.78427734375, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.9712868513080204, | |
| "grad_norm": 5.642673492431641, | |
| "learning_rate": 0.0002028723525688254, | |
| "loss": 3.768431396484375, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 0.9723245509354862, | |
| "grad_norm": 2.416527271270752, | |
| "learning_rate": 0.00020276858260607884, | |
| "loss": 3.9477734375, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 0.973362250562952, | |
| "grad_norm": 6.023645877838135, | |
| "learning_rate": 0.00020266481264333223, | |
| "loss": 3.8290167236328125, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 0.9743999501904179, | |
| "grad_norm": 3.252999782562256, | |
| "learning_rate": 0.00020256104268058566, | |
| "loss": 3.959106750488281, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 0.9754376498178837, | |
| "grad_norm": 2.065927743911743, | |
| "learning_rate": 0.0002024572727178391, | |
| "loss": 3.868408508300781, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.9764753494453495, | |
| "grad_norm": 3.3688645362854004, | |
| "learning_rate": 0.0002023535027550925, | |
| "loss": 3.91245361328125, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 0.9775130490728153, | |
| "grad_norm": 3.004783868789673, | |
| "learning_rate": 0.00020224973279234592, | |
| "loss": 3.7105670166015625, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 0.9785507487002812, | |
| "grad_norm": 2.6519381999969482, | |
| "learning_rate": 0.00020214596282959932, | |
| "loss": 3.8060031127929688, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 0.979588448327747, | |
| "grad_norm": 2.3849129676818848, | |
| "learning_rate": 0.00020204219286685274, | |
| "loss": 3.7225299072265625, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 0.9806261479552129, | |
| "grad_norm": 2.5238912105560303, | |
| "learning_rate": 0.00020193842290410613, | |
| "loss": 3.6197088623046874, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.9816638475826788, | |
| "grad_norm": 7.388523101806641, | |
| "learning_rate": 0.00020183465294135958, | |
| "loss": 3.6996939086914065, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 0.9827015472101446, | |
| "grad_norm": 10.3375883102417, | |
| "learning_rate": 0.000201730882978613, | |
| "loss": 3.7547808837890626, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 0.9837392468376104, | |
| "grad_norm": 2.251610040664673, | |
| "learning_rate": 0.0002016271130158664, | |
| "loss": 3.794500732421875, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 0.9847769464650762, | |
| "grad_norm": 3.8766162395477295, | |
| "learning_rate": 0.00020152334305311982, | |
| "loss": 3.7538128662109376, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 0.9858146460925421, | |
| "grad_norm": 2.7171695232391357, | |
| "learning_rate": 0.00020141957309037324, | |
| "loss": 3.7826458740234377, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.9868523457200079, | |
| "grad_norm": 3.8345425128936768, | |
| "learning_rate": 0.00020131580312762667, | |
| "loss": 3.8197344970703124, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 0.9878900453474737, | |
| "grad_norm": 5.732568740844727, | |
| "learning_rate": 0.00020121203316488006, | |
| "loss": 3.84238525390625, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 0.9889277449749395, | |
| "grad_norm": 2.933835744857788, | |
| "learning_rate": 0.00020110826320213348, | |
| "loss": 3.8682632446289062, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 0.9899654446024054, | |
| "grad_norm": 6.234426021575928, | |
| "learning_rate": 0.00020100449323938693, | |
| "loss": 3.7140426635742188, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 0.9910031442298712, | |
| "grad_norm": 3.3652026653289795, | |
| "learning_rate": 0.00020090072327664033, | |
| "loss": 3.7597830200195315, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.992040843857337, | |
| "grad_norm": 3.030595541000366, | |
| "learning_rate": 0.00020079695331389375, | |
| "loss": 3.824953308105469, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 0.9930785434848028, | |
| "grad_norm": 2.6781022548675537, | |
| "learning_rate": 0.00020069318335114714, | |
| "loss": 3.71589599609375, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 0.9941162431122688, | |
| "grad_norm": 6.144374370574951, | |
| "learning_rate": 0.00020058941338840056, | |
| "loss": 3.856881408691406, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 0.9951539427397346, | |
| "grad_norm": 11.093416213989258, | |
| "learning_rate": 0.000200485643425654, | |
| "loss": 3.8529815673828125, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 0.9961916423672004, | |
| "grad_norm": 3.1640384197235107, | |
| "learning_rate": 0.0002003818734629074, | |
| "loss": 3.966211853027344, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.9972293419946662, | |
| "grad_norm": 4.370779037475586, | |
| "learning_rate": 0.00020027810350016083, | |
| "loss": 3.7798886108398437, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 0.9982670416221321, | |
| "grad_norm": 3.453723669052124, | |
| "learning_rate": 0.00020017433353741422, | |
| "loss": 3.8633013916015626, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 0.9993047412495979, | |
| "grad_norm": 2.1785902976989746, | |
| "learning_rate": 0.00020007056357466767, | |
| "loss": 3.7897879028320314, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 1.0003424408770638, | |
| "grad_norm": 7.7243971824646, | |
| "learning_rate": 0.00019996679361192107, | |
| "loss": 3.999345397949219, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 1.0013801405045295, | |
| "grad_norm": 4.7181925773620605, | |
| "learning_rate": 0.0001998630236491745, | |
| "loss": 3.6450360107421873, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 1.0024178401319954, | |
| "grad_norm": 5.74350643157959, | |
| "learning_rate": 0.0001997592536864279, | |
| "loss": 3.742356872558594, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 1.0034555397594613, | |
| "grad_norm": 4.781228065490723, | |
| "learning_rate": 0.0001996554837236813, | |
| "loss": 3.88675048828125, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 1.004493239386927, | |
| "grad_norm": 3.398968458175659, | |
| "learning_rate": 0.00019955171376093476, | |
| "loss": 3.604486083984375, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 1.005530939014393, | |
| "grad_norm": 2.33478045463562, | |
| "learning_rate": 0.00019944794379818815, | |
| "loss": 3.6777334594726563, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 1.0065686386418586, | |
| "grad_norm": 5.443575382232666, | |
| "learning_rate": 0.00019934417383544157, | |
| "loss": 3.71547119140625, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 1.0076063382693246, | |
| "grad_norm": 9.512263298034668, | |
| "learning_rate": 0.00019924040387269497, | |
| "loss": 3.7301199340820315, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 1.0086440378967905, | |
| "grad_norm": 7.4802985191345215, | |
| "learning_rate": 0.00019913663390994842, | |
| "loss": 3.924736328125, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 1.0096817375242562, | |
| "grad_norm": 3.0878612995147705, | |
| "learning_rate": 0.00019903286394720184, | |
| "loss": 3.802860107421875, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 1.010719437151722, | |
| "grad_norm": 3.557770252227783, | |
| "learning_rate": 0.00019892909398445523, | |
| "loss": 3.782970275878906, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 1.011757136779188, | |
| "grad_norm": 4.309437274932861, | |
| "learning_rate": 0.00019882532402170866, | |
| "loss": 3.7818194580078126, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 1.0127948364066537, | |
| "grad_norm": 9.057745933532715, | |
| "learning_rate": 0.00019872155405896205, | |
| "loss": 3.807467041015625, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 1.0138325360341196, | |
| "grad_norm": 3.3481385707855225, | |
| "learning_rate": 0.0001986177840962155, | |
| "loss": 3.7055014038085936, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 1.0148702356615853, | |
| "grad_norm": 5.001105308532715, | |
| "learning_rate": 0.00019851401413346892, | |
| "loss": 3.803979797363281, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 1.0159079352890512, | |
| "grad_norm": 2.7995588779449463, | |
| "learning_rate": 0.00019841024417072232, | |
| "loss": 3.784454650878906, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 1.0169456349165171, | |
| "grad_norm": 2.4021806716918945, | |
| "learning_rate": 0.00019830647420797574, | |
| "loss": 3.8534210205078123, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 1.0179833345439828, | |
| "grad_norm": 2.6125597953796387, | |
| "learning_rate": 0.00019820270424522916, | |
| "loss": 3.6783572387695314, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 1.0190210341714487, | |
| "grad_norm": 12.870917320251465, | |
| "learning_rate": 0.00019809893428248258, | |
| "loss": 3.833390808105469, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 1.0200587337989147, | |
| "grad_norm": 5.185585021972656, | |
| "learning_rate": 0.00019799516431973598, | |
| "loss": 3.7223880004882814, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 1.0210964334263803, | |
| "grad_norm": 1.9634087085723877, | |
| "learning_rate": 0.0001978913943569894, | |
| "loss": 3.6614044189453123, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 1.0221341330538463, | |
| "grad_norm": 5.82041072845459, | |
| "learning_rate": 0.00019778762439424285, | |
| "loss": 3.729730224609375, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 1.0231718326813122, | |
| "grad_norm": 5.905141353607178, | |
| "learning_rate": 0.00019768385443149624, | |
| "loss": 3.8260488891601563, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 1.0242095323087779, | |
| "grad_norm": 3.5444912910461426, | |
| "learning_rate": 0.00019758008446874966, | |
| "loss": 3.687132568359375, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 1.0252472319362438, | |
| "grad_norm": 7.397883892059326, | |
| "learning_rate": 0.00019747631450600306, | |
| "loss": 3.815035400390625, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 1.0262849315637095, | |
| "grad_norm": 4.467862129211426, | |
| "learning_rate": 0.00019737254454325648, | |
| "loss": 3.645810241699219, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 1.0273226311911754, | |
| "grad_norm": 7.824927806854248, | |
| "learning_rate": 0.0001972687745805099, | |
| "loss": 3.7502801513671873, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 1.0283603308186413, | |
| "grad_norm": 9.055319786071777, | |
| "learning_rate": 0.00019716500461776333, | |
| "loss": 3.895949401855469, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 1.029398030446107, | |
| "grad_norm": 2.499072313308716, | |
| "learning_rate": 0.00019706123465501675, | |
| "loss": 3.729786071777344, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 1.030435730073573, | |
| "grad_norm": 2.091538667678833, | |
| "learning_rate": 0.00019695746469227014, | |
| "loss": 3.6661376953125, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 1.0314734297010388, | |
| "grad_norm": 2.9895308017730713, | |
| "learning_rate": 0.0001968536947295236, | |
| "loss": 3.7620065307617185, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 1.0325111293285045, | |
| "grad_norm": 3.8646888732910156, | |
| "learning_rate": 0.00019674992476677699, | |
| "loss": 3.8454522705078125, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 1.0335488289559704, | |
| "grad_norm": 4.3288044929504395, | |
| "learning_rate": 0.0001966461548040304, | |
| "loss": 3.682370300292969, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 1.0345865285834361, | |
| "grad_norm": 1.888063907623291, | |
| "learning_rate": 0.00019654238484128383, | |
| "loss": 3.7136306762695312, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 1.035624228210902, | |
| "grad_norm": 2.9146947860717773, | |
| "learning_rate": 0.00019643861487853722, | |
| "loss": 3.7029214477539063, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 1.036661927838368, | |
| "grad_norm": 3.3660199642181396, | |
| "learning_rate": 0.00019633484491579067, | |
| "loss": 3.669721984863281, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 1.0376996274658337, | |
| "grad_norm": 3.8642494678497314, | |
| "learning_rate": 0.00019623107495304407, | |
| "loss": 3.718172302246094, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 1.0387373270932996, | |
| "grad_norm": 19.524248123168945, | |
| "learning_rate": 0.0001961273049902975, | |
| "loss": 3.8097552490234374, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 1.0397750267207655, | |
| "grad_norm": 2.175708293914795, | |
| "learning_rate": 0.00019602353502755089, | |
| "loss": 3.7663388061523437, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 1.0408127263482312, | |
| "grad_norm": 2.0963635444641113, | |
| "learning_rate": 0.00019591976506480433, | |
| "loss": 3.7331805419921875, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 1.041850425975697, | |
| "grad_norm": 4.1156134605407715, | |
| "learning_rate": 0.00019581599510205776, | |
| "loss": 3.7513092041015623, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 1.042888125603163, | |
| "grad_norm": 1.9364126920700073, | |
| "learning_rate": 0.00019571222513931115, | |
| "loss": 3.7811895751953126, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 1.0439258252306287, | |
| "grad_norm": 3.9929726123809814, | |
| "learning_rate": 0.00019560845517656457, | |
| "loss": 3.6916510009765626, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 1.0449635248580946, | |
| "grad_norm": 6.161198139190674, | |
| "learning_rate": 0.00019550468521381797, | |
| "loss": 3.735494384765625, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 1.0460012244855603, | |
| "grad_norm": 5.300504207611084, | |
| "learning_rate": 0.00019540091525107142, | |
| "loss": 3.6318603515625, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 1.0470389241130262, | |
| "grad_norm": 6.671936988830566, | |
| "learning_rate": 0.0001952971452883248, | |
| "loss": 3.753620300292969, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 1.0480766237404922, | |
| "grad_norm": 4.034755229949951, | |
| "learning_rate": 0.00019519337532557823, | |
| "loss": 3.6916033935546877, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 1.0491143233679578, | |
| "grad_norm": 2.8349599838256836, | |
| "learning_rate": 0.00019508960536283168, | |
| "loss": 3.6846957397460938, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 1.0501520229954238, | |
| "grad_norm": 4.222849369049072, | |
| "learning_rate": 0.00019498583540008508, | |
| "loss": 3.785768737792969, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 1.0511897226228897, | |
| "grad_norm": 7.210328102111816, | |
| "learning_rate": 0.0001948820654373385, | |
| "loss": 3.674949035644531, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 1.0522274222503554, | |
| "grad_norm": 4.031270503997803, | |
| "learning_rate": 0.0001947782954745919, | |
| "loss": 3.7858917236328127, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 1.0532651218778213, | |
| "grad_norm": 28.53989601135254, | |
| "learning_rate": 0.00019467452551184532, | |
| "loss": 3.8007437133789064, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 1.054302821505287, | |
| "grad_norm": 5.528784275054932, | |
| "learning_rate": 0.00019457075554909877, | |
| "loss": 3.624027099609375, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 1.055340521132753, | |
| "grad_norm": 3.1289713382720947, | |
| "learning_rate": 0.00019446698558635216, | |
| "loss": 3.7536968994140625, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 1.0563782207602188, | |
| "grad_norm": 2.9442858695983887, | |
| "learning_rate": 0.00019436321562360558, | |
| "loss": 3.569986572265625, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 1.0574159203876845, | |
| "grad_norm": 4.8674726486206055, | |
| "learning_rate": 0.00019425944566085898, | |
| "loss": 3.8215240478515624, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 1.0584536200151504, | |
| "grad_norm": 13.513835906982422, | |
| "learning_rate": 0.0001941556756981124, | |
| "loss": 3.6686697387695313, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 1.0594913196426163, | |
| "grad_norm": 3.146784543991089, | |
| "learning_rate": 0.00019405190573536582, | |
| "loss": 3.643824462890625, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 1.060529019270082, | |
| "grad_norm": 4.964068412780762, | |
| "learning_rate": 0.00019394813577261924, | |
| "loss": 3.748782043457031, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 1.061566718897548, | |
| "grad_norm": 3.178044557571411, | |
| "learning_rate": 0.00019384436580987266, | |
| "loss": 3.7086587524414063, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 1.0626044185250136, | |
| "grad_norm": 2.6959052085876465, | |
| "learning_rate": 0.00019374059584712606, | |
| "loss": 3.8190512084960937, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 1.0636421181524796, | |
| "grad_norm": 4.595401763916016, | |
| "learning_rate": 0.0001936368258843795, | |
| "loss": 3.6920120239257814, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 1.0646798177799455, | |
| "grad_norm": 3.383439064025879, | |
| "learning_rate": 0.0001935330559216329, | |
| "loss": 3.7616091918945314, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 1.0657175174074112, | |
| "grad_norm": 6.921218395233154, | |
| "learning_rate": 0.00019342928595888633, | |
| "loss": 3.8070159912109376, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 1.066755217034877, | |
| "grad_norm": 3.7757728099823, | |
| "learning_rate": 0.00019332551599613975, | |
| "loss": 3.64797119140625, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 1.067792916662343, | |
| "grad_norm": 5.452692985534668, | |
| "learning_rate": 0.00019322174603339314, | |
| "loss": 3.7128118896484374, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 1.0688306162898087, | |
| "grad_norm": 2.324277639389038, | |
| "learning_rate": 0.0001931179760706466, | |
| "loss": 3.5481451416015624, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 1.0698683159172746, | |
| "grad_norm": 2.998181104660034, | |
| "learning_rate": 0.00019301420610789999, | |
| "loss": 3.6443612670898435, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 1.0709060155447405, | |
| "grad_norm": 5.453862190246582, | |
| "learning_rate": 0.0001929104361451534, | |
| "loss": 3.7542648315429688, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 1.0719437151722062, | |
| "grad_norm": 7.444779396057129, | |
| "learning_rate": 0.0001928066661824068, | |
| "loss": 3.696410827636719, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 1.0729814147996721, | |
| "grad_norm": 4.7863569259643555, | |
| "learning_rate": 0.00019270289621966025, | |
| "loss": 3.6802603149414064, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 1.0740191144271378, | |
| "grad_norm": 2.9291558265686035, | |
| "learning_rate": 0.00019259912625691367, | |
| "loss": 3.7929959106445312, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 1.0750568140546037, | |
| "grad_norm": 3.2032582759857178, | |
| "learning_rate": 0.00019249535629416707, | |
| "loss": 3.6861895751953124, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 1.0760945136820697, | |
| "grad_norm": 3.1435580253601074, | |
| "learning_rate": 0.0001923915863314205, | |
| "loss": 3.799478759765625, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 1.0771322133095353, | |
| "grad_norm": 2.8310792446136475, | |
| "learning_rate": 0.00019228781636867388, | |
| "loss": 3.73474365234375, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 1.0781699129370013, | |
| "grad_norm": 2.285276174545288, | |
| "learning_rate": 0.00019218404640592733, | |
| "loss": 3.6168304443359376, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 1.0792076125644672, | |
| "grad_norm": 5.524131774902344, | |
| "learning_rate": 0.00019208027644318073, | |
| "loss": 3.710784912109375, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 1.0802453121919329, | |
| "grad_norm": 3.545400619506836, | |
| "learning_rate": 0.00019197650648043415, | |
| "loss": 3.6640530395507813, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 1.0812830118193988, | |
| "grad_norm": 3.101451873779297, | |
| "learning_rate": 0.0001918727365176876, | |
| "loss": 3.7735882568359376, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 1.0823207114468647, | |
| "grad_norm": 2.4820311069488525, | |
| "learning_rate": 0.000191768966554941, | |
| "loss": 3.6366726684570314, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 1.0833584110743304, | |
| "grad_norm": 26.539804458618164, | |
| "learning_rate": 0.00019166519659219442, | |
| "loss": 3.7211334228515627, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 1.0843961107017963, | |
| "grad_norm": 3.41780161857605, | |
| "learning_rate": 0.0001915614266294478, | |
| "loss": 3.60020263671875, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 1.085433810329262, | |
| "grad_norm": 2.689753293991089, | |
| "learning_rate": 0.00019145765666670123, | |
| "loss": 3.7544232177734376, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 1.086471509956728, | |
| "grad_norm": 2.2958478927612305, | |
| "learning_rate": 0.00019135388670395468, | |
| "loss": 3.849725646972656, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 1.0875092095841938, | |
| "grad_norm": 3.697185754776001, | |
| "learning_rate": 0.00019125011674120808, | |
| "loss": 3.813602294921875, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 1.0885469092116595, | |
| "grad_norm": 2.1992783546447754, | |
| "learning_rate": 0.0001911463467784615, | |
| "loss": 3.6952606201171876, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 1.0895846088391254, | |
| "grad_norm": 2.1027495861053467, | |
| "learning_rate": 0.0001910425768157149, | |
| "loss": 3.6720751953125, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 1.0906223084665914, | |
| "grad_norm": 2.2862184047698975, | |
| "learning_rate": 0.00019093880685296832, | |
| "loss": 3.729759521484375, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 1.091660008094057, | |
| "grad_norm": 2.060633659362793, | |
| "learning_rate": 0.00019083503689022174, | |
| "loss": 3.7085842895507812, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 1.092697707721523, | |
| "grad_norm": 2.636503219604492, | |
| "learning_rate": 0.00019073126692747516, | |
| "loss": 3.6184716796875, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 1.0937354073489887, | |
| "grad_norm": 7.98659086227417, | |
| "learning_rate": 0.00019062749696472858, | |
| "loss": 3.875008544921875, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 1.0947731069764546, | |
| "grad_norm": 3.7854599952697754, | |
| "learning_rate": 0.00019052372700198198, | |
| "loss": 3.8590658569335936, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 1.0958108066039205, | |
| "grad_norm": 9.304828643798828, | |
| "learning_rate": 0.00019041995703923543, | |
| "loss": 3.7910305786132814, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 1.0968485062313862, | |
| "grad_norm": 6.323867321014404, | |
| "learning_rate": 0.00019031618707648882, | |
| "loss": 3.763433532714844, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 1.097886205858852, | |
| "grad_norm": 5.698137283325195, | |
| "learning_rate": 0.00019021241711374224, | |
| "loss": 3.6159381103515624, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 1.098923905486318, | |
| "grad_norm": 80.88331604003906, | |
| "learning_rate": 0.00019010864715099564, | |
| "loss": 3.738255920410156, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 1.0999616051137837, | |
| "grad_norm": 4.7448577880859375, | |
| "learning_rate": 0.00019000487718824906, | |
| "loss": 3.6675250244140627, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 1.1009993047412496, | |
| "grad_norm": 5.72471809387207, | |
| "learning_rate": 0.0001899011072255025, | |
| "loss": 3.7835205078125, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 1.1020370043687153, | |
| "grad_norm": 3.3427250385284424, | |
| "learning_rate": 0.0001897973372627559, | |
| "loss": 3.6577874755859376, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 1.1030747039961812, | |
| "grad_norm": 15.587642669677734, | |
| "learning_rate": 0.00018969356730000932, | |
| "loss": 3.716649169921875, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 1.1041124036236472, | |
| "grad_norm": 4.485306262969971, | |
| "learning_rate": 0.00018958979733726272, | |
| "loss": 3.8367926025390626, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 1.1051501032511128, | |
| "grad_norm": 2.82476806640625, | |
| "learning_rate": 0.00018948602737451617, | |
| "loss": 3.7493435668945314, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 1.1061878028785788, | |
| "grad_norm": 15.561006546020508, | |
| "learning_rate": 0.0001893822574117696, | |
| "loss": 3.826619873046875, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 1.1072255025060447, | |
| "grad_norm": 2.592461109161377, | |
| "learning_rate": 0.00018927848744902299, | |
| "loss": 3.7684344482421874, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 1.1082632021335104, | |
| "grad_norm": 7.259844779968262, | |
| "learning_rate": 0.0001891747174862764, | |
| "loss": 3.758468017578125, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 1.1093009017609763, | |
| "grad_norm": 5.973848342895508, | |
| "learning_rate": 0.0001890709475235298, | |
| "loss": 3.638338317871094, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 1.1103386013884422, | |
| "grad_norm": 4.451427459716797, | |
| "learning_rate": 0.00018896717756078325, | |
| "loss": 3.788179626464844, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 1.111376301015908, | |
| "grad_norm": 4.0467143058776855, | |
| "learning_rate": 0.00018886340759803665, | |
| "loss": 3.7329791259765623, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 1.1124140006433738, | |
| "grad_norm": 5.440663814544678, | |
| "learning_rate": 0.00018875963763529007, | |
| "loss": 3.9233663940429686, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 1.1134517002708395, | |
| "grad_norm": 2.327005386352539, | |
| "learning_rate": 0.00018865586767254352, | |
| "loss": 3.688836975097656, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 1.1144893998983054, | |
| "grad_norm": 2.948439598083496, | |
| "learning_rate": 0.0001885520977097969, | |
| "loss": 3.623143310546875, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 1.1155270995257713, | |
| "grad_norm": 8.996918678283691, | |
| "learning_rate": 0.00018844832774705033, | |
| "loss": 3.6873675537109376, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 1.116564799153237, | |
| "grad_norm": 13.88825798034668, | |
| "learning_rate": 0.00018834455778430373, | |
| "loss": 3.889109802246094, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 1.117602498780703, | |
| "grad_norm": 4.712568283081055, | |
| "learning_rate": 0.00018824078782155715, | |
| "loss": 3.8336361694335936, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 1.1186401984081689, | |
| "grad_norm": 9.021018028259277, | |
| "learning_rate": 0.00018813701785881055, | |
| "loss": 3.818023681640625, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 1.1196778980356346, | |
| "grad_norm": 4.5635294914245605, | |
| "learning_rate": 0.000188033247896064, | |
| "loss": 3.8210824584960936, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 1.1207155976631005, | |
| "grad_norm": 6.118738651275635, | |
| "learning_rate": 0.00018792947793331742, | |
| "loss": 3.762948303222656, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 1.1217532972905664, | |
| "grad_norm": 6.2977824211120605, | |
| "learning_rate": 0.0001878257079705708, | |
| "loss": 3.7840338134765625, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 1.122790996918032, | |
| "grad_norm": 5.161929607391357, | |
| "learning_rate": 0.00018772193800782423, | |
| "loss": 3.8385690307617186, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 1.123828696545498, | |
| "grad_norm": 19.5078067779541, | |
| "learning_rate": 0.00018761816804507765, | |
| "loss": 3.708250732421875, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 1.1248663961729637, | |
| "grad_norm": 6.583184242248535, | |
| "learning_rate": 0.00018751439808233108, | |
| "loss": 3.6731692504882814, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 1.1259040958004296, | |
| "grad_norm": 2.8113479614257812, | |
| "learning_rate": 0.0001874106281195845, | |
| "loss": 3.776397705078125, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 1.1269417954278955, | |
| "grad_norm": 3.526796340942383, | |
| "learning_rate": 0.0001873068581568379, | |
| "loss": 3.713113098144531, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 1.1279794950553612, | |
| "grad_norm": 4.96720027923584, | |
| "learning_rate": 0.00018720308819409134, | |
| "loss": 3.758629150390625, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 1.1290171946828271, | |
| "grad_norm": 2.3801918029785156, | |
| "learning_rate": 0.00018709931823134474, | |
| "loss": 3.931161193847656, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 1.1300548943102928, | |
| "grad_norm": 5.336031913757324, | |
| "learning_rate": 0.00018699554826859816, | |
| "loss": 3.7431265258789064, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 1.1310925939377587, | |
| "grad_norm": 3.3115835189819336, | |
| "learning_rate": 0.00018689177830585155, | |
| "loss": 3.6016845703125, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 1.1321302935652247, | |
| "grad_norm": 3.2625627517700195, | |
| "learning_rate": 0.00018678800834310498, | |
| "loss": 3.8173687744140623, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 1.1331679931926903, | |
| "grad_norm": 3.4688777923583984, | |
| "learning_rate": 0.00018668423838035843, | |
| "loss": 3.7339138793945312, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 1.1342056928201563, | |
| "grad_norm": 5.170476913452148, | |
| "learning_rate": 0.00018658046841761182, | |
| "loss": 3.8035733032226564, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 1.1352433924476222, | |
| "grad_norm": 6.003453731536865, | |
| "learning_rate": 0.00018647669845486524, | |
| "loss": 3.7767242431640624, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 1.1362810920750879, | |
| "grad_norm": 3.4862396717071533, | |
| "learning_rate": 0.00018637292849211864, | |
| "loss": 3.643880615234375, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 1.1373187917025538, | |
| "grad_norm": 5.885380268096924, | |
| "learning_rate": 0.00018626915852937209, | |
| "loss": 3.7285040283203124, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 1.1383564913300197, | |
| "grad_norm": 2.839015245437622, | |
| "learning_rate": 0.00018616538856662548, | |
| "loss": 3.7614910888671873, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 1.1393941909574854, | |
| "grad_norm": 10.154685020446777, | |
| "learning_rate": 0.0001860616186038789, | |
| "loss": 3.635873107910156, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 1.1404318905849513, | |
| "grad_norm": 11.110898971557617, | |
| "learning_rate": 0.00018595784864113232, | |
| "loss": 3.690367431640625, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 1.141469590212417, | |
| "grad_norm": 2.4880504608154297, | |
| "learning_rate": 0.00018585407867838572, | |
| "loss": 3.69529541015625, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 1.142507289839883, | |
| "grad_norm": 12.104265213012695, | |
| "learning_rate": 0.00018575030871563917, | |
| "loss": 3.81604736328125, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 1.1435449894673488, | |
| "grad_norm": 4.529385089874268, | |
| "learning_rate": 0.00018564653875289256, | |
| "loss": 3.847531433105469, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 1.1445826890948145, | |
| "grad_norm": 4.51477575302124, | |
| "learning_rate": 0.00018554276879014598, | |
| "loss": 3.6786367797851565, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 1.1456203887222804, | |
| "grad_norm": 3.946871757507324, | |
| "learning_rate": 0.00018543899882739943, | |
| "loss": 3.7411343383789064, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 1.1466580883497464, | |
| "grad_norm": 24.773929595947266, | |
| "learning_rate": 0.00018533522886465283, | |
| "loss": 3.6971206665039062, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 1.147695787977212, | |
| "grad_norm": 4.848511695861816, | |
| "learning_rate": 0.00018523145890190625, | |
| "loss": 3.6610791015625, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 1.148733487604678, | |
| "grad_norm": 3.155839681625366, | |
| "learning_rate": 0.00018512768893915965, | |
| "loss": 3.6824301147460936, | |
| "step": 110700 | |
| }, | |
| { | |
| "epoch": 1.1497711872321439, | |
| "grad_norm": 3.4173624515533447, | |
| "learning_rate": 0.00018502391897641307, | |
| "loss": 3.729654541015625, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 1.1508088868596096, | |
| "grad_norm": 3.1743650436401367, | |
| "learning_rate": 0.00018492014901366646, | |
| "loss": 3.752574157714844, | |
| "step": 110900 | |
| }, | |
| { | |
| "epoch": 1.1518465864870755, | |
| "grad_norm": 5.655935287475586, | |
| "learning_rate": 0.0001848163790509199, | |
| "loss": 3.6886166381835936, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 1.1528842861145412, | |
| "grad_norm": 2.8840067386627197, | |
| "learning_rate": 0.00018471260908817333, | |
| "loss": 3.8322817993164064, | |
| "step": 111100 | |
| }, | |
| { | |
| "epoch": 1.153921985742007, | |
| "grad_norm": 4.1215057373046875, | |
| "learning_rate": 0.00018460883912542673, | |
| "loss": 3.634107971191406, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 1.154959685369473, | |
| "grad_norm": 8.988388061523438, | |
| "learning_rate": 0.00018450506916268018, | |
| "loss": 3.813481750488281, | |
| "step": 111300 | |
| }, | |
| { | |
| "epoch": 1.1559973849969387, | |
| "grad_norm": 4.154327869415283, | |
| "learning_rate": 0.00018440129919993357, | |
| "loss": 3.792846374511719, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 1.1570350846244046, | |
| "grad_norm": 5.43167781829834, | |
| "learning_rate": 0.000184297529237187, | |
| "loss": 3.695276794433594, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 1.1580727842518705, | |
| "grad_norm": 2.1235880851745605, | |
| "learning_rate": 0.0001841937592744404, | |
| "loss": 3.7109506225585935, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 1.1591104838793362, | |
| "grad_norm": 3.2670278549194336, | |
| "learning_rate": 0.0001840899893116938, | |
| "loss": 3.779457702636719, | |
| "step": 111700 | |
| }, | |
| { | |
| "epoch": 1.1601481835068022, | |
| "grad_norm": 4.596736431121826, | |
| "learning_rate": 0.00018398621934894726, | |
| "loss": 3.690837097167969, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 1.161185883134268, | |
| "grad_norm": 5.063496112823486, | |
| "learning_rate": 0.00018388244938620065, | |
| "loss": 3.7899896240234376, | |
| "step": 111900 | |
| }, | |
| { | |
| "epoch": 1.1622235827617338, | |
| "grad_norm": 3.2700915336608887, | |
| "learning_rate": 0.00018377867942345408, | |
| "loss": 3.7538375854492188, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 1.1632612823891997, | |
| "grad_norm": 2.544558048248291, | |
| "learning_rate": 0.00018367490946070747, | |
| "loss": 3.7601394653320312, | |
| "step": 112100 | |
| }, | |
| { | |
| "epoch": 1.1642989820166654, | |
| "grad_norm": 6.950151443481445, | |
| "learning_rate": 0.0001835711394979609, | |
| "loss": 3.797687683105469, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 1.1653366816441313, | |
| "grad_norm": 2.161999464035034, | |
| "learning_rate": 0.00018346736953521434, | |
| "loss": 3.7408486938476564, | |
| "step": 112300 | |
| }, | |
| { | |
| "epoch": 1.1663743812715972, | |
| "grad_norm": 2.824725866317749, | |
| "learning_rate": 0.00018336359957246774, | |
| "loss": 3.708443298339844, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 1.167412080899063, | |
| "grad_norm": 11.807979583740234, | |
| "learning_rate": 0.00018325982960972116, | |
| "loss": 3.6650485229492187, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 1.1684497805265288, | |
| "grad_norm": 12.751113891601562, | |
| "learning_rate": 0.00018315605964697455, | |
| "loss": 3.5273590087890625, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 1.1694874801539945, | |
| "grad_norm": 3.0161349773406982, | |
| "learning_rate": 0.000183052289684228, | |
| "loss": 3.8431436157226564, | |
| "step": 112700 | |
| }, | |
| { | |
| "epoch": 1.1705251797814604, | |
| "grad_norm": 8.852095603942871, | |
| "learning_rate": 0.0001829485197214814, | |
| "loss": 3.6667901611328126, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 1.1715628794089263, | |
| "grad_norm": 16.80730438232422, | |
| "learning_rate": 0.00018284474975873482, | |
| "loss": 3.7361489868164064, | |
| "step": 112900 | |
| }, | |
| { | |
| "epoch": 1.172600579036392, | |
| "grad_norm": 4.340658187866211, | |
| "learning_rate": 0.00018274097979598824, | |
| "loss": 3.7126028442382815, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 1.173638278663858, | |
| "grad_norm": 2.2295515537261963, | |
| "learning_rate": 0.00018263720983324164, | |
| "loss": 3.6620779418945313, | |
| "step": 113100 | |
| }, | |
| { | |
| "epoch": 1.1746759782913239, | |
| "grad_norm": 3.5379912853240967, | |
| "learning_rate": 0.00018253343987049509, | |
| "loss": 3.60224609375, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 1.1757136779187896, | |
| "grad_norm": 3.174776315689087, | |
| "learning_rate": 0.00018242966990774848, | |
| "loss": 3.7180682373046876, | |
| "step": 113300 | |
| }, | |
| { | |
| "epoch": 1.1767513775462555, | |
| "grad_norm": 4.343127250671387, | |
| "learning_rate": 0.0001823258999450019, | |
| "loss": 3.7377755737304685, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 1.1777890771737214, | |
| "grad_norm": 21.170530319213867, | |
| "learning_rate": 0.0001822221299822553, | |
| "loss": 3.752294921875, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 1.178826776801187, | |
| "grad_norm": 4.612101078033447, | |
| "learning_rate": 0.00018211836001950875, | |
| "loss": 3.8363751220703124, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 1.179864476428653, | |
| "grad_norm": 6.276144981384277, | |
| "learning_rate": 0.00018201459005676217, | |
| "loss": 3.713616943359375, | |
| "step": 113700 | |
| }, | |
| { | |
| "epoch": 1.1809021760561187, | |
| "grad_norm": 10.716604232788086, | |
| "learning_rate": 0.00018191082009401556, | |
| "loss": 3.629880676269531, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 1.1819398756835846, | |
| "grad_norm": 2.2933573722839355, | |
| "learning_rate": 0.00018180705013126898, | |
| "loss": 3.8490249633789064, | |
| "step": 113900 | |
| }, | |
| { | |
| "epoch": 1.1829775753110505, | |
| "grad_norm": 4.147966384887695, | |
| "learning_rate": 0.00018170328016852238, | |
| "loss": 3.5557064819335937, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 1.1840152749385162, | |
| "grad_norm": 3.122669219970703, | |
| "learning_rate": 0.00018159951020577583, | |
| "loss": 3.73438232421875, | |
| "step": 114100 | |
| }, | |
| { | |
| "epoch": 1.1850529745659821, | |
| "grad_norm": 9.210347175598145, | |
| "learning_rate": 0.00018149574024302925, | |
| "loss": 3.6972500610351564, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 1.186090674193448, | |
| "grad_norm": 17.161890029907227, | |
| "learning_rate": 0.00018139197028028265, | |
| "loss": 3.819235534667969, | |
| "step": 114300 | |
| }, | |
| { | |
| "epoch": 1.1871283738209137, | |
| "grad_norm": 5.225100040435791, | |
| "learning_rate": 0.0001812882003175361, | |
| "loss": 3.7081121826171874, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 1.1881660734483797, | |
| "grad_norm": 8.891063690185547, | |
| "learning_rate": 0.0001811844303547895, | |
| "loss": 3.7459030151367188, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 1.1892037730758456, | |
| "grad_norm": 3.465555429458618, | |
| "learning_rate": 0.0001810806603920429, | |
| "loss": 3.7495687866210936, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 1.1902414727033113, | |
| "grad_norm": 2.962984561920166, | |
| "learning_rate": 0.0001809768904292963, | |
| "loss": 3.620650329589844, | |
| "step": 114700 | |
| }, | |
| { | |
| "epoch": 1.1912791723307772, | |
| "grad_norm": 66.27200317382812, | |
| "learning_rate": 0.00018087312046654973, | |
| "loss": 3.8266671752929686, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 1.1923168719582429, | |
| "grad_norm": 10.21193790435791, | |
| "learning_rate": 0.00018076935050380318, | |
| "loss": 3.7377734375, | |
| "step": 114900 | |
| }, | |
| { | |
| "epoch": 1.1933545715857088, | |
| "grad_norm": 4.959332466125488, | |
| "learning_rate": 0.00018066558054105657, | |
| "loss": 3.767408752441406, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 1.1943922712131747, | |
| "grad_norm": 4.304464817047119, | |
| "learning_rate": 0.00018056181057831, | |
| "loss": 3.793067626953125, | |
| "step": 115100 | |
| }, | |
| { | |
| "epoch": 1.1954299708406404, | |
| "grad_norm": 4.872037887573242, | |
| "learning_rate": 0.0001804580406155634, | |
| "loss": 3.754971923828125, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 1.1964676704681063, | |
| "grad_norm": 5.543403625488281, | |
| "learning_rate": 0.0001803542706528168, | |
| "loss": 3.6738140869140623, | |
| "step": 115300 | |
| }, | |
| { | |
| "epoch": 1.1975053700955722, | |
| "grad_norm": 4.535797595977783, | |
| "learning_rate": 0.00018025050069007023, | |
| "loss": 3.6706658935546876, | |
| "step": 115400 | |
| }, | |
| { | |
| "epoch": 1.198543069723038, | |
| "grad_norm": 3.987654209136963, | |
| "learning_rate": 0.00018014673072732365, | |
| "loss": 3.7104837036132814, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 1.1995807693505038, | |
| "grad_norm": 4.604912757873535, | |
| "learning_rate": 0.00018004296076457708, | |
| "loss": 3.7295111083984374, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 1.2006184689779698, | |
| "grad_norm": 7.51154088973999, | |
| "learning_rate": 0.00017993919080183047, | |
| "loss": 3.882249755859375, | |
| "step": 115700 | |
| }, | |
| { | |
| "epoch": 1.2016561686054354, | |
| "grad_norm": 7.570425987243652, | |
| "learning_rate": 0.00017983542083908392, | |
| "loss": 3.7709280395507814, | |
| "step": 115800 | |
| }, | |
| { | |
| "epoch": 1.2026938682329014, | |
| "grad_norm": 7.528663635253906, | |
| "learning_rate": 0.00017973165087633731, | |
| "loss": 3.744920654296875, | |
| "step": 115900 | |
| }, | |
| { | |
| "epoch": 1.203731567860367, | |
| "grad_norm": 4.613593578338623, | |
| "learning_rate": 0.00017962788091359074, | |
| "loss": 3.81932373046875, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 1.204769267487833, | |
| "grad_norm": 4.6101508140563965, | |
| "learning_rate": 0.00017952411095084416, | |
| "loss": 3.701668701171875, | |
| "step": 116100 | |
| }, | |
| { | |
| "epoch": 1.2058069671152989, | |
| "grad_norm": 3.3336641788482666, | |
| "learning_rate": 0.00017942034098809755, | |
| "loss": 3.5936102294921874, | |
| "step": 116200 | |
| }, | |
| { | |
| "epoch": 1.2068446667427646, | |
| "grad_norm": 8.796258926391602, | |
| "learning_rate": 0.000179316571025351, | |
| "loss": 3.6812298583984373, | |
| "step": 116300 | |
| }, | |
| { | |
| "epoch": 1.2078823663702305, | |
| "grad_norm": 2.9002747535705566, | |
| "learning_rate": 0.0001792128010626044, | |
| "loss": 3.79119873046875, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 1.2089200659976962, | |
| "grad_norm": 3.5677108764648438, | |
| "learning_rate": 0.00017910903109985782, | |
| "loss": 3.868831787109375, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 1.209957765625162, | |
| "grad_norm": 10.07345199584961, | |
| "learning_rate": 0.00017900526113711121, | |
| "loss": 3.8205535888671873, | |
| "step": 116600 | |
| }, | |
| { | |
| "epoch": 1.210995465252628, | |
| "grad_norm": 2.9789609909057617, | |
| "learning_rate": 0.00017890149117436466, | |
| "loss": 3.655535888671875, | |
| "step": 116700 | |
| }, | |
| { | |
| "epoch": 1.2120331648800937, | |
| "grad_norm": 7.362621784210205, | |
| "learning_rate": 0.00017879772121161808, | |
| "loss": 3.5663858032226563, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 1.2130708645075596, | |
| "grad_norm": 3.515774726867676, | |
| "learning_rate": 0.00017869395124887148, | |
| "loss": 3.64054443359375, | |
| "step": 116900 | |
| }, | |
| { | |
| "epoch": 1.2141085641350255, | |
| "grad_norm": 2.5356316566467285, | |
| "learning_rate": 0.0001785901812861249, | |
| "loss": 3.621481628417969, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 1.2151462637624912, | |
| "grad_norm": 4.910796642303467, | |
| "learning_rate": 0.0001784864113233783, | |
| "loss": 3.6991619873046875, | |
| "step": 117100 | |
| }, | |
| { | |
| "epoch": 1.2161839633899572, | |
| "grad_norm": 4.202451705932617, | |
| "learning_rate": 0.00017838264136063175, | |
| "loss": 3.8038519287109374, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 1.217221663017423, | |
| "grad_norm": 4.467262268066406, | |
| "learning_rate": 0.00017827887139788514, | |
| "loss": 3.771558837890625, | |
| "step": 117300 | |
| }, | |
| { | |
| "epoch": 1.2182593626448888, | |
| "grad_norm": 3.9160234928131104, | |
| "learning_rate": 0.00017817510143513856, | |
| "loss": 3.7639215087890623, | |
| "step": 117400 | |
| }, | |
| { | |
| "epoch": 1.2192970622723547, | |
| "grad_norm": 4.396745681762695, | |
| "learning_rate": 0.000178071331472392, | |
| "loss": 3.68260498046875, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 1.2203347618998204, | |
| "grad_norm": 3.5205559730529785, | |
| "learning_rate": 0.0001779675615096454, | |
| "loss": 3.6396359252929686, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 1.2213724615272863, | |
| "grad_norm": 3.1027088165283203, | |
| "learning_rate": 0.00017786379154689883, | |
| "loss": 3.5732858276367185, | |
| "step": 117700 | |
| }, | |
| { | |
| "epoch": 1.2224101611547522, | |
| "grad_norm": 2.6304574012756348, | |
| "learning_rate": 0.00017776002158415222, | |
| "loss": 3.508619384765625, | |
| "step": 117800 | |
| }, | |
| { | |
| "epoch": 1.223447860782218, | |
| "grad_norm": 2.9613137245178223, | |
| "learning_rate": 0.00017765625162140564, | |
| "loss": 3.65043212890625, | |
| "step": 117900 | |
| }, | |
| { | |
| "epoch": 1.2244855604096838, | |
| "grad_norm": 3.6579976081848145, | |
| "learning_rate": 0.0001775524816586591, | |
| "loss": 3.805189514160156, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 1.2255232600371497, | |
| "grad_norm": 2.3908674716949463, | |
| "learning_rate": 0.0001774487116959125, | |
| "loss": 3.608123474121094, | |
| "step": 118100 | |
| }, | |
| { | |
| "epoch": 1.2265609596646154, | |
| "grad_norm": 3.335692882537842, | |
| "learning_rate": 0.0001773449417331659, | |
| "loss": 3.707095947265625, | |
| "step": 118200 | |
| }, | |
| { | |
| "epoch": 1.2275986592920813, | |
| "grad_norm": 5.722865581512451, | |
| "learning_rate": 0.0001772411717704193, | |
| "loss": 3.7158029174804685, | |
| "step": 118300 | |
| }, | |
| { | |
| "epoch": 1.2286363589195473, | |
| "grad_norm": 9.1022310256958, | |
| "learning_rate": 0.00017713740180767273, | |
| "loss": 3.7301669311523438, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 1.229674058547013, | |
| "grad_norm": 5.698774814605713, | |
| "learning_rate": 0.00017703363184492615, | |
| "loss": 3.638455810546875, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 1.2307117581744789, | |
| "grad_norm": 2.373983144760132, | |
| "learning_rate": 0.00017692986188217957, | |
| "loss": 3.6596408081054688, | |
| "step": 118600 | |
| }, | |
| { | |
| "epoch": 1.2317494578019446, | |
| "grad_norm": 8.193933486938477, | |
| "learning_rate": 0.000176826091919433, | |
| "loss": 3.670250244140625, | |
| "step": 118700 | |
| }, | |
| { | |
| "epoch": 1.2327871574294105, | |
| "grad_norm": 4.394575119018555, | |
| "learning_rate": 0.0001767223219566864, | |
| "loss": 3.7637249755859377, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 1.2338248570568764, | |
| "grad_norm": 8.713273048400879, | |
| "learning_rate": 0.00017661855199393984, | |
| "loss": 3.7907025146484377, | |
| "step": 118900 | |
| }, | |
| { | |
| "epoch": 1.234862556684342, | |
| "grad_norm": 2.0170185565948486, | |
| "learning_rate": 0.00017651478203119323, | |
| "loss": 3.638475036621094, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 1.235900256311808, | |
| "grad_norm": 14.477542877197266, | |
| "learning_rate": 0.00017641101206844665, | |
| "loss": 3.6606521606445312, | |
| "step": 119100 | |
| }, | |
| { | |
| "epoch": 1.236937955939274, | |
| "grad_norm": 3.3395235538482666, | |
| "learning_rate": 0.00017630724210570005, | |
| "loss": 3.5342837524414064, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 1.2379756555667396, | |
| "grad_norm": 3.269758701324463, | |
| "learning_rate": 0.00017620347214295347, | |
| "loss": 3.5976416015625, | |
| "step": 119300 | |
| }, | |
| { | |
| "epoch": 1.2390133551942055, | |
| "grad_norm": 7.099674224853516, | |
| "learning_rate": 0.00017609970218020692, | |
| "loss": 3.599384460449219, | |
| "step": 119400 | |
| }, | |
| { | |
| "epoch": 1.2400510548216714, | |
| "grad_norm": 2.358044385910034, | |
| "learning_rate": 0.00017599593221746031, | |
| "loss": 3.4857781982421874, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 1.2410887544491371, | |
| "grad_norm": 5.485024929046631, | |
| "learning_rate": 0.00017589216225471374, | |
| "loss": 3.69429931640625, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 1.242126454076603, | |
| "grad_norm": 5.038040637969971, | |
| "learning_rate": 0.00017578839229196713, | |
| "loss": 3.599921875, | |
| "step": 119700 | |
| }, | |
| { | |
| "epoch": 1.2431641537040687, | |
| "grad_norm": 6.716040134429932, | |
| "learning_rate": 0.00017568462232922058, | |
| "loss": 3.555647888183594, | |
| "step": 119800 | |
| }, | |
| { | |
| "epoch": 1.2442018533315347, | |
| "grad_norm": 9.499709129333496, | |
| "learning_rate": 0.000175580852366474, | |
| "loss": 3.740644836425781, | |
| "step": 119900 | |
| }, | |
| { | |
| "epoch": 1.2452395529590006, | |
| "grad_norm": 2.5602540969848633, | |
| "learning_rate": 0.0001754770824037274, | |
| "loss": 3.7783831787109374, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.2462772525864663, | |
| "grad_norm": 5.06706428527832, | |
| "learning_rate": 0.00017537331244098082, | |
| "loss": 3.7457623291015625, | |
| "step": 120100 | |
| }, | |
| { | |
| "epoch": 1.2473149522139322, | |
| "grad_norm": 4.963079452514648, | |
| "learning_rate": 0.00017526954247823421, | |
| "loss": 3.726761474609375, | |
| "step": 120200 | |
| }, | |
| { | |
| "epoch": 1.2483526518413979, | |
| "grad_norm": 4.604287624359131, | |
| "learning_rate": 0.00017516577251548766, | |
| "loss": 3.8796881103515624, | |
| "step": 120300 | |
| }, | |
| { | |
| "epoch": 1.2493903514688638, | |
| "grad_norm": 7.884790897369385, | |
| "learning_rate": 0.00017506200255274106, | |
| "loss": 3.7173165893554687, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 1.2504280510963297, | |
| "grad_norm": 7.230984687805176, | |
| "learning_rate": 0.00017495823258999448, | |
| "loss": 3.7296737670898437, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 1.2514657507237956, | |
| "grad_norm": 4.4041032791137695, | |
| "learning_rate": 0.00017485446262724793, | |
| "loss": 3.695928039550781, | |
| "step": 120600 | |
| }, | |
| { | |
| "epoch": 1.2525034503512613, | |
| "grad_norm": 4.800326347351074, | |
| "learning_rate": 0.00017475069266450132, | |
| "loss": 3.692496032714844, | |
| "step": 120700 | |
| }, | |
| { | |
| "epoch": 1.2535411499787272, | |
| "grad_norm": 4.20355224609375, | |
| "learning_rate": 0.00017464692270175475, | |
| "loss": 3.724625549316406, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 1.254578849606193, | |
| "grad_norm": 8.89311408996582, | |
| "learning_rate": 0.00017454315273900814, | |
| "loss": 3.6060061645507813, | |
| "step": 120900 | |
| }, | |
| { | |
| "epoch": 1.2556165492336588, | |
| "grad_norm": 3.7018239498138428, | |
| "learning_rate": 0.00017443938277626156, | |
| "loss": 3.7614715576171873, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 1.2566542488611248, | |
| "grad_norm": 3.2457141876220703, | |
| "learning_rate": 0.00017433561281351496, | |
| "loss": 3.729616394042969, | |
| "step": 121100 | |
| }, | |
| { | |
| "epoch": 1.2576919484885904, | |
| "grad_norm": 9.342671394348145, | |
| "learning_rate": 0.0001742318428507684, | |
| "loss": 3.717445068359375, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 1.2587296481160564, | |
| "grad_norm": 3.293091058731079, | |
| "learning_rate": 0.00017412807288802183, | |
| "loss": 3.7832305908203123, | |
| "step": 121300 | |
| }, | |
| { | |
| "epoch": 1.259767347743522, | |
| "grad_norm": 4.222780704498291, | |
| "learning_rate": 0.00017402430292527522, | |
| "loss": 3.7384588623046877, | |
| "step": 121400 | |
| }, | |
| { | |
| "epoch": 1.260805047370988, | |
| "grad_norm": 3.0761492252349854, | |
| "learning_rate": 0.00017392053296252867, | |
| "loss": 3.7555526733398437, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 1.261842746998454, | |
| "grad_norm": 2.887803554534912, | |
| "learning_rate": 0.00017381676299978207, | |
| "loss": 3.695442810058594, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 1.2628804466259196, | |
| "grad_norm": 3.7166850566864014, | |
| "learning_rate": 0.0001737129930370355, | |
| "loss": 3.815606689453125, | |
| "step": 121700 | |
| }, | |
| { | |
| "epoch": 1.2639181462533855, | |
| "grad_norm": 12.183484077453613, | |
| "learning_rate": 0.0001736092230742889, | |
| "loss": 3.637664794921875, | |
| "step": 121800 | |
| }, | |
| { | |
| "epoch": 1.2649558458808512, | |
| "grad_norm": 3.1364870071411133, | |
| "learning_rate": 0.0001735054531115423, | |
| "loss": 3.6319699096679687, | |
| "step": 121900 | |
| }, | |
| { | |
| "epoch": 1.265993545508317, | |
| "grad_norm": 4.354419708251953, | |
| "learning_rate": 0.00017340168314879575, | |
| "loss": 3.786130065917969, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 1.267031245135783, | |
| "grad_norm": 4.645047664642334, | |
| "learning_rate": 0.00017329791318604915, | |
| "loss": 3.7552008056640624, | |
| "step": 122100 | |
| }, | |
| { | |
| "epoch": 1.268068944763249, | |
| "grad_norm": 4.269083499908447, | |
| "learning_rate": 0.00017319414322330257, | |
| "loss": 3.7506790161132812, | |
| "step": 122200 | |
| }, | |
| { | |
| "epoch": 1.2691066443907146, | |
| "grad_norm": 5.066195011138916, | |
| "learning_rate": 0.00017309037326055597, | |
| "loss": 3.788629455566406, | |
| "step": 122300 | |
| }, | |
| { | |
| "epoch": 1.2701443440181805, | |
| "grad_norm": 5.5616021156311035, | |
| "learning_rate": 0.0001729866032978094, | |
| "loss": 3.6688613891601562, | |
| "step": 122400 | |
| }, | |
| { | |
| "epoch": 1.2711820436456462, | |
| "grad_norm": 3.1797661781311035, | |
| "learning_rate": 0.00017288283333506284, | |
| "loss": 3.718145751953125, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 1.2722197432731122, | |
| "grad_norm": 3.063791275024414, | |
| "learning_rate": 0.00017277906337231623, | |
| "loss": 3.66003662109375, | |
| "step": 122600 | |
| }, | |
| { | |
| "epoch": 1.273257442900578, | |
| "grad_norm": 24.703685760498047, | |
| "learning_rate": 0.00017267529340956965, | |
| "loss": 3.697345886230469, | |
| "step": 122700 | |
| }, | |
| { | |
| "epoch": 1.2742951425280438, | |
| "grad_norm": 4.573358058929443, | |
| "learning_rate": 0.00017257152344682305, | |
| "loss": 3.770580139160156, | |
| "step": 122800 | |
| }, | |
| { | |
| "epoch": 1.2753328421555097, | |
| "grad_norm": 6.073929309844971, | |
| "learning_rate": 0.0001724677534840765, | |
| "loss": 3.570367736816406, | |
| "step": 122900 | |
| }, | |
| { | |
| "epoch": 1.2763705417829754, | |
| "grad_norm": 4.804381847381592, | |
| "learning_rate": 0.0001723639835213299, | |
| "loss": 3.7930453491210936, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 1.2774082414104413, | |
| "grad_norm": 7.542964935302734, | |
| "learning_rate": 0.00017226021355858331, | |
| "loss": 3.6680117797851564, | |
| "step": 123100 | |
| }, | |
| { | |
| "epoch": 1.2784459410379072, | |
| "grad_norm": 7.110779285430908, | |
| "learning_rate": 0.00017215644359583674, | |
| "loss": 3.645113830566406, | |
| "step": 123200 | |
| }, | |
| { | |
| "epoch": 1.2794836406653731, | |
| "grad_norm": 5.410161018371582, | |
| "learning_rate": 0.00017205267363309013, | |
| "loss": 3.7428775024414063, | |
| "step": 123300 | |
| }, | |
| { | |
| "epoch": 1.2805213402928388, | |
| "grad_norm": 4.089752197265625, | |
| "learning_rate": 0.00017194890367034358, | |
| "loss": 3.7075924682617187, | |
| "step": 123400 | |
| }, | |
| { | |
| "epoch": 1.2815590399203047, | |
| "grad_norm": 5.877744197845459, | |
| "learning_rate": 0.00017184513370759697, | |
| "loss": 3.546766662597656, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 1.2825967395477704, | |
| "grad_norm": 4.295921802520752, | |
| "learning_rate": 0.0001717413637448504, | |
| "loss": 3.5129269409179686, | |
| "step": 123600 | |
| }, | |
| { | |
| "epoch": 1.2836344391752363, | |
| "grad_norm": 7.998104572296143, | |
| "learning_rate": 0.00017163759378210385, | |
| "loss": 3.6661138916015625, | |
| "step": 123700 | |
| }, | |
| { | |
| "epoch": 1.2846721388027023, | |
| "grad_norm": 4.939531326293945, | |
| "learning_rate": 0.00017153382381935724, | |
| "loss": 3.665038757324219, | |
| "step": 123800 | |
| }, | |
| { | |
| "epoch": 1.285709838430168, | |
| "grad_norm": 6.5936384201049805, | |
| "learning_rate": 0.00017143005385661066, | |
| "loss": 3.6241445922851563, | |
| "step": 123900 | |
| }, | |
| { | |
| "epoch": 1.2867475380576339, | |
| "grad_norm": 4.765341281890869, | |
| "learning_rate": 0.00017132628389386406, | |
| "loss": 3.651435546875, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 1.2877852376850996, | |
| "grad_norm": 5.4220147132873535, | |
| "learning_rate": 0.00017122251393111748, | |
| "loss": 3.8530377197265624, | |
| "step": 124100 | |
| }, | |
| { | |
| "epoch": 1.2888229373125655, | |
| "grad_norm": 5.066165447235107, | |
| "learning_rate": 0.00017111874396837087, | |
| "loss": 3.6765261840820314, | |
| "step": 124200 | |
| }, | |
| { | |
| "epoch": 1.2898606369400314, | |
| "grad_norm": 2.871612787246704, | |
| "learning_rate": 0.00017101497400562432, | |
| "loss": 3.7530276489257814, | |
| "step": 124300 | |
| }, | |
| { | |
| "epoch": 1.2908983365674973, | |
| "grad_norm": 3.5445234775543213, | |
| "learning_rate": 0.00017091120404287774, | |
| "loss": 3.65380126953125, | |
| "step": 124400 | |
| }, | |
| { | |
| "epoch": 1.291936036194963, | |
| "grad_norm": 12.712068557739258, | |
| "learning_rate": 0.00017080743408013114, | |
| "loss": 3.651844787597656, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 1.292973735822429, | |
| "grad_norm": 5.535710334777832, | |
| "learning_rate": 0.0001707036641173846, | |
| "loss": 3.648440246582031, | |
| "step": 124600 | |
| }, | |
| { | |
| "epoch": 1.2940114354498946, | |
| "grad_norm": 6.527225017547607, | |
| "learning_rate": 0.00017059989415463798, | |
| "loss": 3.6168035888671874, | |
| "step": 124700 | |
| }, | |
| { | |
| "epoch": 1.2950491350773605, | |
| "grad_norm": 3.675743579864502, | |
| "learning_rate": 0.0001704961241918914, | |
| "loss": 3.689391784667969, | |
| "step": 124800 | |
| }, | |
| { | |
| "epoch": 1.2960868347048264, | |
| "grad_norm": 7.041729927062988, | |
| "learning_rate": 0.0001703923542291448, | |
| "loss": 3.6547369384765624, | |
| "step": 124900 | |
| }, | |
| { | |
| "epoch": 1.2971245343322921, | |
| "grad_norm": 2.5913071632385254, | |
| "learning_rate": 0.00017028858426639822, | |
| "loss": 3.803846740722656, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 1.298162233959758, | |
| "grad_norm": 5.099416732788086, | |
| "learning_rate": 0.00017018481430365167, | |
| "loss": 3.661207580566406, | |
| "step": 125100 | |
| }, | |
| { | |
| "epoch": 1.2991999335872237, | |
| "grad_norm": 3.8206946849823, | |
| "learning_rate": 0.00017008104434090507, | |
| "loss": 3.552643127441406, | |
| "step": 125200 | |
| }, | |
| { | |
| "epoch": 1.3002376332146897, | |
| "grad_norm": 3.769073247909546, | |
| "learning_rate": 0.0001699772743781585, | |
| "loss": 3.842325439453125, | |
| "step": 125300 | |
| }, | |
| { | |
| "epoch": 1.3012753328421556, | |
| "grad_norm": 2.529937744140625, | |
| "learning_rate": 0.00016987350441541188, | |
| "loss": 3.676832275390625, | |
| "step": 125400 | |
| }, | |
| { | |
| "epoch": 1.3023130324696213, | |
| "grad_norm": 7.345049858093262, | |
| "learning_rate": 0.0001697697344526653, | |
| "loss": 3.6286630249023437, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 1.3033507320970872, | |
| "grad_norm": 7.380908012390137, | |
| "learning_rate": 0.00016966596448991875, | |
| "loss": 3.6627023315429685, | |
| "step": 125600 | |
| }, | |
| { | |
| "epoch": 1.3043884317245529, | |
| "grad_norm": 2.8857064247131348, | |
| "learning_rate": 0.00016956219452717215, | |
| "loss": 3.641376953125, | |
| "step": 125700 | |
| }, | |
| { | |
| "epoch": 1.3054261313520188, | |
| "grad_norm": 6.945189476013184, | |
| "learning_rate": 0.00016945842456442557, | |
| "loss": 3.606731262207031, | |
| "step": 125800 | |
| }, | |
| { | |
| "epoch": 1.3064638309794847, | |
| "grad_norm": 6.422026634216309, | |
| "learning_rate": 0.00016935465460167897, | |
| "loss": 3.5785845947265624, | |
| "step": 125900 | |
| }, | |
| { | |
| "epoch": 1.3075015306069506, | |
| "grad_norm": 8.35920524597168, | |
| "learning_rate": 0.00016925088463893241, | |
| "loss": 3.6259381103515627, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 1.3085392302344163, | |
| "grad_norm": 8.193489074707031, | |
| "learning_rate": 0.0001691471146761858, | |
| "loss": 3.7568353271484374, | |
| "step": 126100 | |
| }, | |
| { | |
| "epoch": 1.3095769298618822, | |
| "grad_norm": 5.267637252807617, | |
| "learning_rate": 0.00016904334471343923, | |
| "loss": 3.757891845703125, | |
| "step": 126200 | |
| }, | |
| { | |
| "epoch": 1.310614629489348, | |
| "grad_norm": 3.3981618881225586, | |
| "learning_rate": 0.00016893957475069265, | |
| "loss": 3.6808877563476563, | |
| "step": 126300 | |
| }, | |
| { | |
| "epoch": 1.3116523291168138, | |
| "grad_norm": 11.042278289794922, | |
| "learning_rate": 0.00016883580478794605, | |
| "loss": 3.5690008544921876, | |
| "step": 126400 | |
| }, | |
| { | |
| "epoch": 1.3126900287442798, | |
| "grad_norm": 12.522445678710938, | |
| "learning_rate": 0.0001687320348251995, | |
| "loss": 3.675894775390625, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 1.3137277283717455, | |
| "grad_norm": 4.374575138092041, | |
| "learning_rate": 0.0001686282648624529, | |
| "loss": 3.8043743896484377, | |
| "step": 126600 | |
| }, | |
| { | |
| "epoch": 1.3147654279992114, | |
| "grad_norm": 2.7740325927734375, | |
| "learning_rate": 0.00016852449489970631, | |
| "loss": 3.7183938598632813, | |
| "step": 126700 | |
| }, | |
| { | |
| "epoch": 1.315803127626677, | |
| "grad_norm": 16.38130760192871, | |
| "learning_rate": 0.0001684207249369597, | |
| "loss": 3.7160101318359375, | |
| "step": 126800 | |
| }, | |
| { | |
| "epoch": 1.316840827254143, | |
| "grad_norm": 9.450004577636719, | |
| "learning_rate": 0.00016831695497421316, | |
| "loss": 3.6377835083007812, | |
| "step": 126900 | |
| }, | |
| { | |
| "epoch": 1.317878526881609, | |
| "grad_norm": 8.669651985168457, | |
| "learning_rate": 0.00016821318501146658, | |
| "loss": 3.5026895141601564, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 1.3189162265090748, | |
| "grad_norm": 4.877604007720947, | |
| "learning_rate": 0.00016810941504871997, | |
| "loss": 3.6808175659179687, | |
| "step": 127100 | |
| }, | |
| { | |
| "epoch": 1.3199539261365405, | |
| "grad_norm": 9.553235054016113, | |
| "learning_rate": 0.0001680056450859734, | |
| "loss": 3.706498718261719, | |
| "step": 127200 | |
| }, | |
| { | |
| "epoch": 1.3209916257640064, | |
| "grad_norm": 4.275841236114502, | |
| "learning_rate": 0.0001679018751232268, | |
| "loss": 3.752271728515625, | |
| "step": 127300 | |
| }, | |
| { | |
| "epoch": 1.322029325391472, | |
| "grad_norm": 7.115382671356201, | |
| "learning_rate": 0.00016779810516048024, | |
| "loss": 3.721490783691406, | |
| "step": 127400 | |
| }, | |
| { | |
| "epoch": 1.323067025018938, | |
| "grad_norm": 3.066580057144165, | |
| "learning_rate": 0.00016769433519773366, | |
| "loss": 3.67330322265625, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 1.324104724646404, | |
| "grad_norm": 3.145909547805786, | |
| "learning_rate": 0.00016759056523498706, | |
| "loss": 3.7071697998046873, | |
| "step": 127600 | |
| }, | |
| { | |
| "epoch": 1.3251424242738696, | |
| "grad_norm": 3.342615842819214, | |
| "learning_rate": 0.0001674867952722405, | |
| "loss": 3.68224853515625, | |
| "step": 127700 | |
| }, | |
| { | |
| "epoch": 1.3261801239013356, | |
| "grad_norm": 4.780127048492432, | |
| "learning_rate": 0.0001673830253094939, | |
| "loss": 3.914273986816406, | |
| "step": 127800 | |
| }, | |
| { | |
| "epoch": 1.3272178235288012, | |
| "grad_norm": 8.07118034362793, | |
| "learning_rate": 0.00016727925534674732, | |
| "loss": 3.6639437866210938, | |
| "step": 127900 | |
| }, | |
| { | |
| "epoch": 1.3282555231562672, | |
| "grad_norm": 6.763175964355469, | |
| "learning_rate": 0.00016717548538400072, | |
| "loss": 3.62579345703125, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 1.329293222783733, | |
| "grad_norm": 12.123154640197754, | |
| "learning_rate": 0.00016707171542125414, | |
| "loss": 3.721268615722656, | |
| "step": 128100 | |
| }, | |
| { | |
| "epoch": 1.330330922411199, | |
| "grad_norm": 3.787297010421753, | |
| "learning_rate": 0.0001669679454585076, | |
| "loss": 3.7412783813476564, | |
| "step": 128200 | |
| }, | |
| { | |
| "epoch": 1.3313686220386647, | |
| "grad_norm": 2.629784107208252, | |
| "learning_rate": 0.00016686417549576098, | |
| "loss": 3.7266500854492186, | |
| "step": 128300 | |
| }, | |
| { | |
| "epoch": 1.3324063216661306, | |
| "grad_norm": 2.8463058471679688, | |
| "learning_rate": 0.0001667604055330144, | |
| "loss": 3.56947021484375, | |
| "step": 128400 | |
| }, | |
| { | |
| "epoch": 1.3334440212935963, | |
| "grad_norm": 3.5442264080047607, | |
| "learning_rate": 0.0001666566355702678, | |
| "loss": 3.6988034057617187, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 1.3344817209210622, | |
| "grad_norm": 3.726022243499756, | |
| "learning_rate": 0.00016655286560752122, | |
| "loss": 3.6229156494140624, | |
| "step": 128600 | |
| }, | |
| { | |
| "epoch": 1.3355194205485281, | |
| "grad_norm": 5.090481758117676, | |
| "learning_rate": 0.00016644909564477464, | |
| "loss": 3.5555209350585937, | |
| "step": 128700 | |
| }, | |
| { | |
| "epoch": 1.3365571201759938, | |
| "grad_norm": 5.148849964141846, | |
| "learning_rate": 0.00016634532568202807, | |
| "loss": 3.723890380859375, | |
| "step": 128800 | |
| }, | |
| { | |
| "epoch": 1.3375948198034597, | |
| "grad_norm": 7.033978462219238, | |
| "learning_rate": 0.0001662415557192815, | |
| "loss": 3.6295504760742188, | |
| "step": 128900 | |
| }, | |
| { | |
| "epoch": 1.3386325194309254, | |
| "grad_norm": 5.022918701171875, | |
| "learning_rate": 0.00016613778575653488, | |
| "loss": 3.604397888183594, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 1.3396702190583913, | |
| "grad_norm": 3.9396724700927734, | |
| "learning_rate": 0.00016603401579378833, | |
| "loss": 3.740953369140625, | |
| "step": 129100 | |
| }, | |
| { | |
| "epoch": 1.3407079186858573, | |
| "grad_norm": 4.96920919418335, | |
| "learning_rate": 0.00016593024583104173, | |
| "loss": 3.6454959106445313, | |
| "step": 129200 | |
| }, | |
| { | |
| "epoch": 1.341745618313323, | |
| "grad_norm": 3.2997357845306396, | |
| "learning_rate": 0.00016582647586829515, | |
| "loss": 3.64101806640625, | |
| "step": 129300 | |
| }, | |
| { | |
| "epoch": 1.3427833179407889, | |
| "grad_norm": 12.793081283569336, | |
| "learning_rate": 0.00016572270590554857, | |
| "loss": 3.537852478027344, | |
| "step": 129400 | |
| }, | |
| { | |
| "epoch": 1.3438210175682546, | |
| "grad_norm": 7.696393013000488, | |
| "learning_rate": 0.00016561893594280197, | |
| "loss": 3.6636843872070313, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 1.3448587171957205, | |
| "grad_norm": 4.841111183166504, | |
| "learning_rate": 0.00016551516598005541, | |
| "loss": 3.6766192626953127, | |
| "step": 129600 | |
| }, | |
| { | |
| "epoch": 1.3458964168231864, | |
| "grad_norm": 2.822445869445801, | |
| "learning_rate": 0.0001654113960173088, | |
| "loss": 3.5910659790039063, | |
| "step": 129700 | |
| }, | |
| { | |
| "epoch": 1.3469341164506523, | |
| "grad_norm": 7.020183086395264, | |
| "learning_rate": 0.00016530762605456223, | |
| "loss": 3.6770706176757812, | |
| "step": 129800 | |
| }, | |
| { | |
| "epoch": 1.347971816078118, | |
| "grad_norm": 3.323997974395752, | |
| "learning_rate": 0.00016520385609181563, | |
| "loss": 3.673494567871094, | |
| "step": 129900 | |
| }, | |
| { | |
| "epoch": 1.349009515705584, | |
| "grad_norm": 12.734125137329102, | |
| "learning_rate": 0.00016510008612906907, | |
| "loss": 3.645369873046875, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 1.3500472153330496, | |
| "grad_norm": 6.959007740020752, | |
| "learning_rate": 0.0001649963161663225, | |
| "loss": 3.5545895385742186, | |
| "step": 130100 | |
| }, | |
| { | |
| "epoch": 1.3510849149605155, | |
| "grad_norm": 5.492075443267822, | |
| "learning_rate": 0.0001648925462035759, | |
| "loss": 3.73270263671875, | |
| "step": 130200 | |
| }, | |
| { | |
| "epoch": 1.3521226145879814, | |
| "grad_norm": 5.578936576843262, | |
| "learning_rate": 0.0001647887762408293, | |
| "loss": 3.633159484863281, | |
| "step": 130300 | |
| }, | |
| { | |
| "epoch": 1.3531603142154471, | |
| "grad_norm": 4.073727607727051, | |
| "learning_rate": 0.0001646850062780827, | |
| "loss": 3.7094195556640623, | |
| "step": 130400 | |
| }, | |
| { | |
| "epoch": 1.354198013842913, | |
| "grad_norm": 3.7967214584350586, | |
| "learning_rate": 0.00016458123631533616, | |
| "loss": 3.6143753051757814, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 1.3552357134703787, | |
| "grad_norm": 5.993916034698486, | |
| "learning_rate": 0.00016447746635258955, | |
| "loss": 3.722456359863281, | |
| "step": 130600 | |
| }, | |
| { | |
| "epoch": 1.3562734130978447, | |
| "grad_norm": 4.235459327697754, | |
| "learning_rate": 0.00016437369638984297, | |
| "loss": 3.7401913452148436, | |
| "step": 130700 | |
| }, | |
| { | |
| "epoch": 1.3573111127253106, | |
| "grad_norm": 13.88862133026123, | |
| "learning_rate": 0.00016426992642709642, | |
| "loss": 3.746804504394531, | |
| "step": 130800 | |
| }, | |
| { | |
| "epoch": 1.3583488123527765, | |
| "grad_norm": 5.165769100189209, | |
| "learning_rate": 0.00016416615646434982, | |
| "loss": 3.74326416015625, | |
| "step": 130900 | |
| }, | |
| { | |
| "epoch": 1.3593865119802422, | |
| "grad_norm": 3.6813595294952393, | |
| "learning_rate": 0.00016406238650160324, | |
| "loss": 3.617030029296875, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 1.360424211607708, | |
| "grad_norm": 5.9350152015686035, | |
| "learning_rate": 0.00016395861653885663, | |
| "loss": 3.873332214355469, | |
| "step": 131100 | |
| }, | |
| { | |
| "epoch": 1.3614619112351738, | |
| "grad_norm": 4.220798969268799, | |
| "learning_rate": 0.00016385484657611006, | |
| "loss": 3.6584405517578125, | |
| "step": 131200 | |
| }, | |
| { | |
| "epoch": 1.3624996108626397, | |
| "grad_norm": 21.21164894104004, | |
| "learning_rate": 0.0001637510766133635, | |
| "loss": 3.617677917480469, | |
| "step": 131300 | |
| }, | |
| { | |
| "epoch": 1.3635373104901056, | |
| "grad_norm": 5.271477699279785, | |
| "learning_rate": 0.0001636473066506169, | |
| "loss": 3.5792852783203126, | |
| "step": 131400 | |
| }, | |
| { | |
| "epoch": 1.3645750101175713, | |
| "grad_norm": 4.747986316680908, | |
| "learning_rate": 0.00016354353668787032, | |
| "loss": 3.6235577392578127, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 1.3656127097450372, | |
| "grad_norm": 3.8399877548217773, | |
| "learning_rate": 0.00016343976672512372, | |
| "loss": 3.780206604003906, | |
| "step": 131600 | |
| }, | |
| { | |
| "epoch": 1.366650409372503, | |
| "grad_norm": 7.428284645080566, | |
| "learning_rate": 0.00016333599676237714, | |
| "loss": 3.600271911621094, | |
| "step": 131700 | |
| }, | |
| { | |
| "epoch": 1.3676881089999688, | |
| "grad_norm": 4.4645304679870605, | |
| "learning_rate": 0.00016323222679963056, | |
| "loss": 3.6348703002929685, | |
| "step": 131800 | |
| }, | |
| { | |
| "epoch": 1.3687258086274348, | |
| "grad_norm": 4.429653167724609, | |
| "learning_rate": 0.00016312845683688398, | |
| "loss": 3.704706726074219, | |
| "step": 131900 | |
| }, | |
| { | |
| "epoch": 1.3697635082549007, | |
| "grad_norm": 4.308233737945557, | |
| "learning_rate": 0.0001630246868741374, | |
| "loss": 3.704057312011719, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 1.3708012078823664, | |
| "grad_norm": 12.334646224975586, | |
| "learning_rate": 0.0001629209169113908, | |
| "loss": 3.6710003662109374, | |
| "step": 132100 | |
| }, | |
| { | |
| "epoch": 1.3718389075098323, | |
| "grad_norm": 5.286363124847412, | |
| "learning_rate": 0.00016281714694864425, | |
| "loss": 3.6472879028320313, | |
| "step": 132200 | |
| }, | |
| { | |
| "epoch": 1.372876607137298, | |
| "grad_norm": 3.0022027492523193, | |
| "learning_rate": 0.00016271337698589764, | |
| "loss": 3.867461853027344, | |
| "step": 132300 | |
| }, | |
| { | |
| "epoch": 1.373914306764764, | |
| "grad_norm": 3.6052401065826416, | |
| "learning_rate": 0.00016260960702315107, | |
| "loss": 3.465709533691406, | |
| "step": 132400 | |
| }, | |
| { | |
| "epoch": 1.3749520063922298, | |
| "grad_norm": 4.250115871429443, | |
| "learning_rate": 0.00016250583706040446, | |
| "loss": 3.6189974975585937, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 1.3759897060196955, | |
| "grad_norm": 4.520415306091309, | |
| "learning_rate": 0.00016240206709765788, | |
| "loss": 3.697256774902344, | |
| "step": 132600 | |
| }, | |
| { | |
| "epoch": 1.3770274056471614, | |
| "grad_norm": 3.608278751373291, | |
| "learning_rate": 0.00016229829713491133, | |
| "loss": 3.6748687744140627, | |
| "step": 132700 | |
| }, | |
| { | |
| "epoch": 1.3780651052746271, | |
| "grad_norm": 3.6304538249969482, | |
| "learning_rate": 0.00016219452717216473, | |
| "loss": 3.6889605712890625, | |
| "step": 132800 | |
| }, | |
| { | |
| "epoch": 1.379102804902093, | |
| "grad_norm": 4.484381675720215, | |
| "learning_rate": 0.00016209075720941815, | |
| "loss": 3.667810974121094, | |
| "step": 132900 | |
| }, | |
| { | |
| "epoch": 1.380140504529559, | |
| "grad_norm": 12.79962158203125, | |
| "learning_rate": 0.00016198698724667154, | |
| "loss": 3.901937255859375, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 1.3811782041570246, | |
| "grad_norm": 3.6465935707092285, | |
| "learning_rate": 0.000161883217283925, | |
| "loss": 3.6334658813476564, | |
| "step": 133100 | |
| }, | |
| { | |
| "epoch": 1.3822159037844906, | |
| "grad_norm": 2.5269343852996826, | |
| "learning_rate": 0.00016177944732117841, | |
| "loss": 3.6968539428710936, | |
| "step": 133200 | |
| }, | |
| { | |
| "epoch": 1.3832536034119562, | |
| "grad_norm": 4.01210880279541, | |
| "learning_rate": 0.0001616756773584318, | |
| "loss": 3.4310296630859374, | |
| "step": 133300 | |
| }, | |
| { | |
| "epoch": 1.3842913030394222, | |
| "grad_norm": 4.493933200836182, | |
| "learning_rate": 0.00016157190739568523, | |
| "loss": 3.719140930175781, | |
| "step": 133400 | |
| }, | |
| { | |
| "epoch": 1.385329002666888, | |
| "grad_norm": 3.25607967376709, | |
| "learning_rate": 0.00016146813743293863, | |
| "loss": 3.6992584228515626, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 1.386366702294354, | |
| "grad_norm": 6.134942054748535, | |
| "learning_rate": 0.00016136436747019207, | |
| "loss": 3.748294677734375, | |
| "step": 133600 | |
| }, | |
| { | |
| "epoch": 1.3874044019218197, | |
| "grad_norm": 3.706012725830078, | |
| "learning_rate": 0.00016126059750744547, | |
| "loss": 3.586408996582031, | |
| "step": 133700 | |
| }, | |
| { | |
| "epoch": 1.3884421015492856, | |
| "grad_norm": 5.05728816986084, | |
| "learning_rate": 0.0001611568275446989, | |
| "loss": 3.7400482177734373, | |
| "step": 133800 | |
| }, | |
| { | |
| "epoch": 1.3894798011767513, | |
| "grad_norm": 4.292380332946777, | |
| "learning_rate": 0.00016105305758195234, | |
| "loss": 3.7132363891601563, | |
| "step": 133900 | |
| }, | |
| { | |
| "epoch": 1.3905175008042172, | |
| "grad_norm": 9.770214080810547, | |
| "learning_rate": 0.00016094928761920573, | |
| "loss": 3.5888162231445313, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 1.3915552004316831, | |
| "grad_norm": 9.073437690734863, | |
| "learning_rate": 0.00016084551765645916, | |
| "loss": 3.6239898681640623, | |
| "step": 134100 | |
| }, | |
| { | |
| "epoch": 1.3925929000591488, | |
| "grad_norm": 5.210220813751221, | |
| "learning_rate": 0.00016074174769371255, | |
| "loss": 3.4854669189453125, | |
| "step": 134200 | |
| }, | |
| { | |
| "epoch": 1.3936305996866147, | |
| "grad_norm": 5.995209693908691, | |
| "learning_rate": 0.00016063797773096597, | |
| "loss": 3.6248184204101563, | |
| "step": 134300 | |
| }, | |
| { | |
| "epoch": 1.3946682993140804, | |
| "grad_norm": 8.040777206420898, | |
| "learning_rate": 0.00016053420776821937, | |
| "loss": 3.767200622558594, | |
| "step": 134400 | |
| }, | |
| { | |
| "epoch": 1.3957059989415463, | |
| "grad_norm": 6.153497695922852, | |
| "learning_rate": 0.00016043043780547282, | |
| "loss": 3.6283489990234377, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 1.3967436985690123, | |
| "grad_norm": 3.4162278175354004, | |
| "learning_rate": 0.00016032666784272624, | |
| "loss": 3.6065017700195314, | |
| "step": 134600 | |
| }, | |
| { | |
| "epoch": 1.3977813981964782, | |
| "grad_norm": 3.4524638652801514, | |
| "learning_rate": 0.00016022289787997963, | |
| "loss": 3.6301129150390623, | |
| "step": 134700 | |
| }, | |
| { | |
| "epoch": 1.3988190978239439, | |
| "grad_norm": 6.9367804527282715, | |
| "learning_rate": 0.00016011912791723308, | |
| "loss": 3.6796551513671876, | |
| "step": 134800 | |
| }, | |
| { | |
| "epoch": 1.3998567974514098, | |
| "grad_norm": 3.629422903060913, | |
| "learning_rate": 0.00016001535795448648, | |
| "loss": 3.745485534667969, | |
| "step": 134900 | |
| }, | |
| { | |
| "epoch": 1.4008944970788755, | |
| "grad_norm": 3.658010959625244, | |
| "learning_rate": 0.0001599115879917399, | |
| "loss": 3.6311688232421875, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 1.4019321967063414, | |
| "grad_norm": 16.63618278503418, | |
| "learning_rate": 0.00015980781802899332, | |
| "loss": 3.6807235717773437, | |
| "step": 135100 | |
| }, | |
| { | |
| "epoch": 1.4029698963338073, | |
| "grad_norm": 6.354872703552246, | |
| "learning_rate": 0.00015970404806624672, | |
| "loss": 3.5296261596679686, | |
| "step": 135200 | |
| }, | |
| { | |
| "epoch": 1.404007595961273, | |
| "grad_norm": 7.496634483337402, | |
| "learning_rate": 0.00015960027810350017, | |
| "loss": 3.5905780029296874, | |
| "step": 135300 | |
| }, | |
| { | |
| "epoch": 1.405045295588739, | |
| "grad_norm": 2.790278673171997, | |
| "learning_rate": 0.00015949650814075356, | |
| "loss": 3.544078369140625, | |
| "step": 135400 | |
| }, | |
| { | |
| "epoch": 1.4060829952162046, | |
| "grad_norm": 5.150670528411865, | |
| "learning_rate": 0.00015939273817800698, | |
| "loss": 3.7144375610351563, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 1.4071206948436705, | |
| "grad_norm": 5.606545448303223, | |
| "learning_rate": 0.00015928896821526038, | |
| "loss": 3.719892578125, | |
| "step": 135600 | |
| }, | |
| { | |
| "epoch": 1.4081583944711364, | |
| "grad_norm": 15.23755931854248, | |
| "learning_rate": 0.0001591851982525138, | |
| "loss": 3.649613952636719, | |
| "step": 135700 | |
| }, | |
| { | |
| "epoch": 1.4091960940986021, | |
| "grad_norm": 20.73650550842285, | |
| "learning_rate": 0.00015908142828976725, | |
| "loss": 3.6828762817382814, | |
| "step": 135800 | |
| }, | |
| { | |
| "epoch": 1.410233793726068, | |
| "grad_norm": 8.400344848632812, | |
| "learning_rate": 0.00015897765832702064, | |
| "loss": 3.6613919067382814, | |
| "step": 135900 | |
| }, | |
| { | |
| "epoch": 1.411271493353534, | |
| "grad_norm": 2.5724685192108154, | |
| "learning_rate": 0.00015887388836427407, | |
| "loss": 3.657626037597656, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 1.4123091929809997, | |
| "grad_norm": 19.325956344604492, | |
| "learning_rate": 0.00015877011840152746, | |
| "loss": 3.8178024291992188, | |
| "step": 136100 | |
| }, | |
| { | |
| "epoch": 1.4133468926084656, | |
| "grad_norm": 2.402404308319092, | |
| "learning_rate": 0.0001586663484387809, | |
| "loss": 3.59340576171875, | |
| "step": 136200 | |
| }, | |
| { | |
| "epoch": 1.4143845922359315, | |
| "grad_norm": 6.188352108001709, | |
| "learning_rate": 0.00015856257847603433, | |
| "loss": 3.6710971069335936, | |
| "step": 136300 | |
| }, | |
| { | |
| "epoch": 1.4154222918633972, | |
| "grad_norm": 4.21588659286499, | |
| "learning_rate": 0.00015845880851328773, | |
| "loss": 3.721273193359375, | |
| "step": 136400 | |
| }, | |
| { | |
| "epoch": 1.416459991490863, | |
| "grad_norm": 4.4968485832214355, | |
| "learning_rate": 0.00015835503855054115, | |
| "loss": 3.6669491577148436, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 1.4174976911183288, | |
| "grad_norm": 7.214438438415527, | |
| "learning_rate": 0.00015825126858779454, | |
| "loss": 3.799635925292969, | |
| "step": 136600 | |
| }, | |
| { | |
| "epoch": 1.4185353907457947, | |
| "grad_norm": 7.262329578399658, | |
| "learning_rate": 0.000158147498625048, | |
| "loss": 3.807882995605469, | |
| "step": 136700 | |
| }, | |
| { | |
| "epoch": 1.4195730903732606, | |
| "grad_norm": 3.5909628868103027, | |
| "learning_rate": 0.00015804372866230139, | |
| "loss": 3.7313577270507814, | |
| "step": 136800 | |
| }, | |
| { | |
| "epoch": 1.4206107900007263, | |
| "grad_norm": 10.205459594726562, | |
| "learning_rate": 0.0001579399586995548, | |
| "loss": 3.675950622558594, | |
| "step": 136900 | |
| }, | |
| { | |
| "epoch": 1.4216484896281922, | |
| "grad_norm": 5.25307559967041, | |
| "learning_rate": 0.00015783618873680826, | |
| "loss": 3.6014810180664063, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 1.422686189255658, | |
| "grad_norm": 42.26997756958008, | |
| "learning_rate": 0.00015773241877406165, | |
| "loss": 3.6278192138671876, | |
| "step": 137100 | |
| }, | |
| { | |
| "epoch": 1.4237238888831238, | |
| "grad_norm": 6.092323303222656, | |
| "learning_rate": 0.00015762864881131507, | |
| "loss": 3.555603332519531, | |
| "step": 137200 | |
| }, | |
| { | |
| "epoch": 1.4247615885105898, | |
| "grad_norm": 2.74434232711792, | |
| "learning_rate": 0.00015752487884856847, | |
| "loss": 3.5426220703125, | |
| "step": 137300 | |
| }, | |
| { | |
| "epoch": 1.4257992881380557, | |
| "grad_norm": 13.12152099609375, | |
| "learning_rate": 0.0001574211088858219, | |
| "loss": 3.7107192993164064, | |
| "step": 137400 | |
| }, | |
| { | |
| "epoch": 1.4268369877655214, | |
| "grad_norm": 3.9462010860443115, | |
| "learning_rate": 0.00015731733892307529, | |
| "loss": 3.5455560302734375, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 1.4278746873929873, | |
| "grad_norm": 3.7687721252441406, | |
| "learning_rate": 0.00015721356896032873, | |
| "loss": 3.630052490234375, | |
| "step": 137600 | |
| }, | |
| { | |
| "epoch": 1.428912387020453, | |
| "grad_norm": 4.470894813537598, | |
| "learning_rate": 0.00015710979899758216, | |
| "loss": 3.627494201660156, | |
| "step": 137700 | |
| }, | |
| { | |
| "epoch": 1.429950086647919, | |
| "grad_norm": 4.3846259117126465, | |
| "learning_rate": 0.00015700602903483555, | |
| "loss": 3.5804782104492188, | |
| "step": 137800 | |
| }, | |
| { | |
| "epoch": 1.4309877862753848, | |
| "grad_norm": 3.9794013500213623, | |
| "learning_rate": 0.000156902259072089, | |
| "loss": 3.739950866699219, | |
| "step": 137900 | |
| }, | |
| { | |
| "epoch": 1.4320254859028505, | |
| "grad_norm": 10.886957168579102, | |
| "learning_rate": 0.0001567984891093424, | |
| "loss": 3.7072845458984376, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 1.4330631855303164, | |
| "grad_norm": 4.187902927398682, | |
| "learning_rate": 0.00015669471914659582, | |
| "loss": 3.64345703125, | |
| "step": 138100 | |
| }, | |
| { | |
| "epoch": 1.4341008851577821, | |
| "grad_norm": 32.209293365478516, | |
| "learning_rate": 0.00015659094918384924, | |
| "loss": 3.6210546875, | |
| "step": 138200 | |
| }, | |
| { | |
| "epoch": 1.435138584785248, | |
| "grad_norm": 3.12260365486145, | |
| "learning_rate": 0.00015648717922110263, | |
| "loss": 3.7005911254882813, | |
| "step": 138300 | |
| }, | |
| { | |
| "epoch": 1.436176284412714, | |
| "grad_norm": 6.220150470733643, | |
| "learning_rate": 0.00015638340925835608, | |
| "loss": 3.7236618041992187, | |
| "step": 138400 | |
| }, | |
| { | |
| "epoch": 1.4372139840401799, | |
| "grad_norm": 2.38154673576355, | |
| "learning_rate": 0.00015627963929560948, | |
| "loss": 3.633033447265625, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 1.4382516836676456, | |
| "grad_norm": 7.884495258331299, | |
| "learning_rate": 0.0001561758693328629, | |
| "loss": 3.5666903686523437, | |
| "step": 138600 | |
| }, | |
| { | |
| "epoch": 1.4392893832951115, | |
| "grad_norm": 3.8970346450805664, | |
| "learning_rate": 0.0001560720993701163, | |
| "loss": 3.6862808227539063, | |
| "step": 138700 | |
| }, | |
| { | |
| "epoch": 1.4403270829225772, | |
| "grad_norm": 3.273268461227417, | |
| "learning_rate": 0.00015596832940736972, | |
| "loss": 3.6251177978515625, | |
| "step": 138800 | |
| }, | |
| { | |
| "epoch": 1.441364782550043, | |
| "grad_norm": 3.0285887718200684, | |
| "learning_rate": 0.00015586455944462317, | |
| "loss": 3.61291015625, | |
| "step": 138900 | |
| }, | |
| { | |
| "epoch": 1.442402482177509, | |
| "grad_norm": 3.4767589569091797, | |
| "learning_rate": 0.00015576078948187656, | |
| "loss": 3.6781646728515627, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 1.4434401818049747, | |
| "grad_norm": 156.1669158935547, | |
| "learning_rate": 0.00015565701951912998, | |
| "loss": 3.6272451782226565, | |
| "step": 139100 | |
| }, | |
| { | |
| "epoch": 1.4444778814324406, | |
| "grad_norm": 2.3591196537017822, | |
| "learning_rate": 0.00015555324955638338, | |
| "loss": 3.589447021484375, | |
| "step": 139200 | |
| }, | |
| { | |
| "epoch": 1.4455155810599063, | |
| "grad_norm": 3.8040847778320312, | |
| "learning_rate": 0.00015544947959363683, | |
| "loss": 3.64208251953125, | |
| "step": 139300 | |
| }, | |
| { | |
| "epoch": 1.4465532806873722, | |
| "grad_norm": 2.655759811401367, | |
| "learning_rate": 0.00015534570963089022, | |
| "loss": 3.671148376464844, | |
| "step": 139400 | |
| }, | |
| { | |
| "epoch": 1.4475909803148381, | |
| "grad_norm": 7.29696798324585, | |
| "learning_rate": 0.00015524193966814364, | |
| "loss": 3.751770324707031, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 1.4486286799423038, | |
| "grad_norm": 6.334928035736084, | |
| "learning_rate": 0.00015513816970539706, | |
| "loss": 3.7970040893554686, | |
| "step": 139600 | |
| }, | |
| { | |
| "epoch": 1.4496663795697697, | |
| "grad_norm": 6.7520623207092285, | |
| "learning_rate": 0.00015503439974265046, | |
| "loss": 3.6929965209960938, | |
| "step": 139700 | |
| }, | |
| { | |
| "epoch": 1.4507040791972354, | |
| "grad_norm": 10.428074836730957, | |
| "learning_rate": 0.0001549306297799039, | |
| "loss": 3.734377136230469, | |
| "step": 139800 | |
| }, | |
| { | |
| "epoch": 1.4517417788247013, | |
| "grad_norm": 8.371795654296875, | |
| "learning_rate": 0.0001548268598171573, | |
| "loss": 3.6029412841796873, | |
| "step": 139900 | |
| }, | |
| { | |
| "epoch": 1.4527794784521673, | |
| "grad_norm": 3.291740894317627, | |
| "learning_rate": 0.00015472308985441073, | |
| "loss": 3.6670523071289063, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 1.4538171780796332, | |
| "grad_norm": 7.120608806610107, | |
| "learning_rate": 0.00015461931989166417, | |
| "loss": 3.638569030761719, | |
| "step": 140100 | |
| }, | |
| { | |
| "epoch": 1.4548548777070989, | |
| "grad_norm": 6.361410617828369, | |
| "learning_rate": 0.00015451554992891757, | |
| "loss": 3.661440734863281, | |
| "step": 140200 | |
| }, | |
| { | |
| "epoch": 1.4558925773345648, | |
| "grad_norm": 3.5337114334106445, | |
| "learning_rate": 0.000154411779966171, | |
| "loss": 3.69423828125, | |
| "step": 140300 | |
| }, | |
| { | |
| "epoch": 1.4569302769620305, | |
| "grad_norm": 8.946898460388184, | |
| "learning_rate": 0.00015430801000342439, | |
| "loss": 3.636510925292969, | |
| "step": 140400 | |
| }, | |
| { | |
| "epoch": 1.4579679765894964, | |
| "grad_norm": 3.5454866886138916, | |
| "learning_rate": 0.0001542042400406778, | |
| "loss": 3.833760986328125, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 1.4590056762169623, | |
| "grad_norm": 20.629167556762695, | |
| "learning_rate": 0.0001541004700779312, | |
| "loss": 3.740248718261719, | |
| "step": 140600 | |
| }, | |
| { | |
| "epoch": 1.460043375844428, | |
| "grad_norm": 3.0284929275512695, | |
| "learning_rate": 0.00015399670011518465, | |
| "loss": 3.6760980224609376, | |
| "step": 140700 | |
| }, | |
| { | |
| "epoch": 1.461081075471894, | |
| "grad_norm": 4.971894264221191, | |
| "learning_rate": 0.00015389293015243807, | |
| "loss": 3.600714111328125, | |
| "step": 140800 | |
| }, | |
| { | |
| "epoch": 1.4621187750993596, | |
| "grad_norm": 3.689394950866699, | |
| "learning_rate": 0.00015378916018969147, | |
| "loss": 3.5257861328125, | |
| "step": 140900 | |
| }, | |
| { | |
| "epoch": 1.4631564747268255, | |
| "grad_norm": 4.305582523345947, | |
| "learning_rate": 0.00015368539022694492, | |
| "loss": 3.66658447265625, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 1.4641941743542914, | |
| "grad_norm": 12.191847801208496, | |
| "learning_rate": 0.0001535816202641983, | |
| "loss": 3.5539178466796875, | |
| "step": 141100 | |
| }, | |
| { | |
| "epoch": 1.4652318739817574, | |
| "grad_norm": 5.9276814460754395, | |
| "learning_rate": 0.00015347785030145173, | |
| "loss": 3.712036437988281, | |
| "step": 141200 | |
| }, | |
| { | |
| "epoch": 1.466269573609223, | |
| "grad_norm": 7.3767008781433105, | |
| "learning_rate": 0.00015337408033870513, | |
| "loss": 3.688995361328125, | |
| "step": 141300 | |
| }, | |
| { | |
| "epoch": 1.467307273236689, | |
| "grad_norm": 4.156796932220459, | |
| "learning_rate": 0.00015327031037595855, | |
| "loss": 3.5971023559570314, | |
| "step": 141400 | |
| }, | |
| { | |
| "epoch": 1.4683449728641547, | |
| "grad_norm": 3.876843214035034, | |
| "learning_rate": 0.000153166540413212, | |
| "loss": 3.7138726806640623, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 1.4693826724916206, | |
| "grad_norm": 2.5647096633911133, | |
| "learning_rate": 0.0001530627704504654, | |
| "loss": 3.575816650390625, | |
| "step": 141600 | |
| }, | |
| { | |
| "epoch": 1.4704203721190865, | |
| "grad_norm": 6.341168403625488, | |
| "learning_rate": 0.00015295900048771882, | |
| "loss": 3.675234375, | |
| "step": 141700 | |
| }, | |
| { | |
| "epoch": 1.4714580717465522, | |
| "grad_norm": 11.66984748840332, | |
| "learning_rate": 0.0001528552305249722, | |
| "loss": 3.5949581909179686, | |
| "step": 141800 | |
| }, | |
| { | |
| "epoch": 1.472495771374018, | |
| "grad_norm": 2.7472872734069824, | |
| "learning_rate": 0.00015275146056222563, | |
| "loss": 3.4315753173828125, | |
| "step": 141900 | |
| }, | |
| { | |
| "epoch": 1.4735334710014838, | |
| "grad_norm": 2.7182295322418213, | |
| "learning_rate": 0.00015264769059947908, | |
| "loss": 3.580435791015625, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 1.4745711706289497, | |
| "grad_norm": 7.28167200088501, | |
| "learning_rate": 0.00015254392063673248, | |
| "loss": 3.6344500732421876, | |
| "step": 142100 | |
| }, | |
| { | |
| "epoch": 1.4756088702564156, | |
| "grad_norm": 3.1541340351104736, | |
| "learning_rate": 0.0001524401506739859, | |
| "loss": 3.6579803466796874, | |
| "step": 142200 | |
| }, | |
| { | |
| "epoch": 1.4766465698838815, | |
| "grad_norm": 4.42963171005249, | |
| "learning_rate": 0.0001523363807112393, | |
| "loss": 3.5743417358398437, | |
| "step": 142300 | |
| }, | |
| { | |
| "epoch": 1.4776842695113472, | |
| "grad_norm": 7.278059005737305, | |
| "learning_rate": 0.00015223261074849274, | |
| "loss": 3.7834173583984376, | |
| "step": 142400 | |
| }, | |
| { | |
| "epoch": 1.4787219691388132, | |
| "grad_norm": 10.52426528930664, | |
| "learning_rate": 0.00015212884078574614, | |
| "loss": 3.6968179321289063, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 1.4797596687662788, | |
| "grad_norm": 3.5773837566375732, | |
| "learning_rate": 0.00015202507082299956, | |
| "loss": 3.6810809326171876, | |
| "step": 142600 | |
| }, | |
| { | |
| "epoch": 1.4807973683937448, | |
| "grad_norm": 3.344587802886963, | |
| "learning_rate": 0.00015192130086025298, | |
| "loss": 3.6345669555664064, | |
| "step": 142700 | |
| }, | |
| { | |
| "epoch": 1.4818350680212107, | |
| "grad_norm": 6.329004287719727, | |
| "learning_rate": 0.00015181753089750638, | |
| "loss": 3.647319641113281, | |
| "step": 142800 | |
| }, | |
| { | |
| "epoch": 1.4828727676486764, | |
| "grad_norm": 6.577507495880127, | |
| "learning_rate": 0.00015171376093475983, | |
| "loss": 3.5769888305664064, | |
| "step": 142900 | |
| }, | |
| { | |
| "epoch": 1.4839104672761423, | |
| "grad_norm": 4.545724391937256, | |
| "learning_rate": 0.00015160999097201322, | |
| "loss": 3.583935546875, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 1.484948166903608, | |
| "grad_norm": 13.324125289916992, | |
| "learning_rate": 0.00015150622100926664, | |
| "loss": 3.612706604003906, | |
| "step": 143100 | |
| }, | |
| { | |
| "epoch": 1.485985866531074, | |
| "grad_norm": 4.545955657958984, | |
| "learning_rate": 0.00015140245104652004, | |
| "loss": 3.4066473388671876, | |
| "step": 143200 | |
| }, | |
| { | |
| "epoch": 1.4870235661585398, | |
| "grad_norm": 8.517041206359863, | |
| "learning_rate": 0.00015129868108377349, | |
| "loss": 3.6258444213867187, | |
| "step": 143300 | |
| }, | |
| { | |
| "epoch": 1.4880612657860055, | |
| "grad_norm": 5.813758373260498, | |
| "learning_rate": 0.0001511949111210269, | |
| "loss": 3.686318054199219, | |
| "step": 143400 | |
| }, | |
| { | |
| "epoch": 1.4890989654134714, | |
| "grad_norm": 6.236087322235107, | |
| "learning_rate": 0.0001510911411582803, | |
| "loss": 3.7458810424804687, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 1.4901366650409371, | |
| "grad_norm": 5.874231815338135, | |
| "learning_rate": 0.00015098737119553373, | |
| "loss": 3.6481814575195313, | |
| "step": 143600 | |
| }, | |
| { | |
| "epoch": 1.491174364668403, | |
| "grad_norm": 7.229684829711914, | |
| "learning_rate": 0.00015088360123278712, | |
| "loss": 3.6855035400390626, | |
| "step": 143700 | |
| }, | |
| { | |
| "epoch": 1.492212064295869, | |
| "grad_norm": 7.212390422821045, | |
| "learning_rate": 0.00015077983127004057, | |
| "loss": 3.750265808105469, | |
| "step": 143800 | |
| }, | |
| { | |
| "epoch": 1.4932497639233349, | |
| "grad_norm": 5.408252239227295, | |
| "learning_rate": 0.000150676061307294, | |
| "loss": 3.5695159912109373, | |
| "step": 143900 | |
| }, | |
| { | |
| "epoch": 1.4942874635508006, | |
| "grad_norm": 8.125064849853516, | |
| "learning_rate": 0.00015057229134454739, | |
| "loss": 3.642791442871094, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 1.4953251631782665, | |
| "grad_norm": 5.047210216522217, | |
| "learning_rate": 0.00015046852138180083, | |
| "loss": 3.588906555175781, | |
| "step": 144100 | |
| }, | |
| { | |
| "epoch": 1.4963628628057322, | |
| "grad_norm": 2.775951623916626, | |
| "learning_rate": 0.00015036475141905423, | |
| "loss": 3.672796325683594, | |
| "step": 144200 | |
| }, | |
| { | |
| "epoch": 1.497400562433198, | |
| "grad_norm": 7.114427089691162, | |
| "learning_rate": 0.00015026098145630765, | |
| "loss": 3.7460537719726563, | |
| "step": 144300 | |
| }, | |
| { | |
| "epoch": 1.498438262060664, | |
| "grad_norm": 4.1067585945129395, | |
| "learning_rate": 0.00015015721149356105, | |
| "loss": 3.4047305297851564, | |
| "step": 144400 | |
| }, | |
| { | |
| "epoch": 1.4994759616881297, | |
| "grad_norm": 6.3360276222229, | |
| "learning_rate": 0.00015005344153081447, | |
| "loss": 3.6055087280273437, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 1.5005136613155956, | |
| "grad_norm": 3.8499081134796143, | |
| "learning_rate": 0.0001499496715680679, | |
| "loss": 3.6976129150390626, | |
| "step": 144600 | |
| }, | |
| { | |
| "epoch": 1.5015513609430613, | |
| "grad_norm": 4.669349193572998, | |
| "learning_rate": 0.0001498459016053213, | |
| "loss": 3.6301043701171873, | |
| "step": 144700 | |
| }, | |
| { | |
| "epoch": 1.5025890605705272, | |
| "grad_norm": 12.484715461730957, | |
| "learning_rate": 0.0001497421316425747, | |
| "loss": 3.6376629638671876, | |
| "step": 144800 | |
| }, | |
| { | |
| "epoch": 1.5036267601979931, | |
| "grad_norm": 3.1881167888641357, | |
| "learning_rate": 0.00014963836167982816, | |
| "loss": 3.688013000488281, | |
| "step": 144900 | |
| }, | |
| { | |
| "epoch": 1.504664459825459, | |
| "grad_norm": 3.1999073028564453, | |
| "learning_rate": 0.00014953459171708158, | |
| "loss": 3.767580871582031, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 1.5057021594529247, | |
| "grad_norm": 2.503138303756714, | |
| "learning_rate": 0.00014943082175433497, | |
| "loss": 3.772780456542969, | |
| "step": 145100 | |
| }, | |
| { | |
| "epoch": 1.5067398590803904, | |
| "grad_norm": 5.124083995819092, | |
| "learning_rate": 0.0001493270517915884, | |
| "loss": 3.709577941894531, | |
| "step": 145200 | |
| }, | |
| { | |
| "epoch": 1.5077775587078563, | |
| "grad_norm": 12.24608039855957, | |
| "learning_rate": 0.00014922328182884182, | |
| "loss": 3.46869140625, | |
| "step": 145300 | |
| }, | |
| { | |
| "epoch": 1.5088152583353223, | |
| "grad_norm": 11.273271560668945, | |
| "learning_rate": 0.0001491195118660952, | |
| "loss": 3.4797503662109377, | |
| "step": 145400 | |
| }, | |
| { | |
| "epoch": 1.5098529579627882, | |
| "grad_norm": 60.867916107177734, | |
| "learning_rate": 0.00014901574190334866, | |
| "loss": 3.54853515625, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 1.5108906575902539, | |
| "grad_norm": 4.276978969573975, | |
| "learning_rate": 0.00014891197194060206, | |
| "loss": 3.908219299316406, | |
| "step": 145600 | |
| }, | |
| { | |
| "epoch": 1.5119283572177198, | |
| "grad_norm": 2.901015281677246, | |
| "learning_rate": 0.00014880820197785548, | |
| "loss": 3.4694091796875, | |
| "step": 145700 | |
| }, | |
| { | |
| "epoch": 1.5129660568451855, | |
| "grad_norm": 2.3719887733459473, | |
| "learning_rate": 0.0001487044320151089, | |
| "loss": 3.7670758056640623, | |
| "step": 145800 | |
| }, | |
| { | |
| "epoch": 1.5140037564726514, | |
| "grad_norm": 2.4967026710510254, | |
| "learning_rate": 0.0001486006620523623, | |
| "loss": 3.635834045410156, | |
| "step": 145900 | |
| }, | |
| { | |
| "epoch": 1.5150414561001173, | |
| "grad_norm": 3.604675769805908, | |
| "learning_rate": 0.00014849689208961572, | |
| "loss": 3.5507608032226563, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 1.5160791557275832, | |
| "grad_norm": 5.442782402038574, | |
| "learning_rate": 0.00014839312212686916, | |
| "loss": 3.5730636596679686, | |
| "step": 146100 | |
| }, | |
| { | |
| "epoch": 1.517116855355049, | |
| "grad_norm": 3.7341339588165283, | |
| "learning_rate": 0.00014828935216412256, | |
| "loss": 3.569194641113281, | |
| "step": 146200 | |
| }, | |
| { | |
| "epoch": 1.5181545549825146, | |
| "grad_norm": 12.070112228393555, | |
| "learning_rate": 0.00014818558220137598, | |
| "loss": 3.60053955078125, | |
| "step": 146300 | |
| }, | |
| { | |
| "epoch": 1.5191922546099805, | |
| "grad_norm": 5.036438941955566, | |
| "learning_rate": 0.0001480818122386294, | |
| "loss": 3.7114804077148436, | |
| "step": 146400 | |
| }, | |
| { | |
| "epoch": 1.5202299542374464, | |
| "grad_norm": 10.83106803894043, | |
| "learning_rate": 0.0001479780422758828, | |
| "loss": 3.5428836059570314, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 1.5212676538649124, | |
| "grad_norm": 9.07150650024414, | |
| "learning_rate": 0.00014787427231313622, | |
| "loss": 3.6087515258789065, | |
| "step": 146600 | |
| }, | |
| { | |
| "epoch": 1.522305353492378, | |
| "grad_norm": 3.6539382934570312, | |
| "learning_rate": 0.00014777050235038964, | |
| "loss": 3.6974029541015625, | |
| "step": 146700 | |
| }, | |
| { | |
| "epoch": 1.523343053119844, | |
| "grad_norm": 2.5568654537200928, | |
| "learning_rate": 0.00014766673238764306, | |
| "loss": 3.7100448608398438, | |
| "step": 146800 | |
| }, | |
| { | |
| "epoch": 1.5243807527473097, | |
| "grad_norm": 5.767122745513916, | |
| "learning_rate": 0.00014756296242489649, | |
| "loss": 3.494932861328125, | |
| "step": 146900 | |
| }, | |
| { | |
| "epoch": 1.5254184523747756, | |
| "grad_norm": 5.006596088409424, | |
| "learning_rate": 0.00014745919246214988, | |
| "loss": 3.804518737792969, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 1.5264561520022415, | |
| "grad_norm": 3.907433271408081, | |
| "learning_rate": 0.0001473554224994033, | |
| "loss": 3.6617333984375, | |
| "step": 147100 | |
| }, | |
| { | |
| "epoch": 1.5274938516297074, | |
| "grad_norm": 6.253331184387207, | |
| "learning_rate": 0.00014725165253665672, | |
| "loss": 3.611311950683594, | |
| "step": 147200 | |
| }, | |
| { | |
| "epoch": 1.528531551257173, | |
| "grad_norm": 5.735301494598389, | |
| "learning_rate": 0.00014714788257391015, | |
| "loss": 3.605543518066406, | |
| "step": 147300 | |
| }, | |
| { | |
| "epoch": 1.5295692508846388, | |
| "grad_norm": 1.7375198602676392, | |
| "learning_rate": 0.00014704411261116357, | |
| "loss": 3.6379776000976562, | |
| "step": 147400 | |
| }, | |
| { | |
| "epoch": 1.5306069505121047, | |
| "grad_norm": 4.913732051849365, | |
| "learning_rate": 0.000146940342648417, | |
| "loss": 3.757569580078125, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 1.5316446501395706, | |
| "grad_norm": 3.887519598007202, | |
| "learning_rate": 0.00014683657268567039, | |
| "loss": 3.654621887207031, | |
| "step": 147600 | |
| }, | |
| { | |
| "epoch": 1.5326823497670365, | |
| "grad_norm": 45.76445007324219, | |
| "learning_rate": 0.0001467328027229238, | |
| "loss": 3.611448059082031, | |
| "step": 147700 | |
| }, | |
| { | |
| "epoch": 1.5337200493945022, | |
| "grad_norm": 3.629575729370117, | |
| "learning_rate": 0.00014662903276017723, | |
| "loss": 3.6693844604492187, | |
| "step": 147800 | |
| }, | |
| { | |
| "epoch": 1.5347577490219682, | |
| "grad_norm": 2.453900098800659, | |
| "learning_rate": 0.00014652526279743062, | |
| "loss": 3.6880978393554686, | |
| "step": 147900 | |
| }, | |
| { | |
| "epoch": 1.5357954486494338, | |
| "grad_norm": 3.411557674407959, | |
| "learning_rate": 0.00014642149283468407, | |
| "loss": 3.656671447753906, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 1.5368331482768998, | |
| "grad_norm": 3.5617477893829346, | |
| "learning_rate": 0.0001463177228719375, | |
| "loss": 3.706895446777344, | |
| "step": 148100 | |
| }, | |
| { | |
| "epoch": 1.5378708479043657, | |
| "grad_norm": 3.5422544479370117, | |
| "learning_rate": 0.0001462139529091909, | |
| "loss": 3.605690612792969, | |
| "step": 148200 | |
| }, | |
| { | |
| "epoch": 1.5389085475318316, | |
| "grad_norm": 3.9814698696136475, | |
| "learning_rate": 0.0001461101829464443, | |
| "loss": 3.6530465698242187, | |
| "step": 148300 | |
| }, | |
| { | |
| "epoch": 1.5399462471592973, | |
| "grad_norm": 10.028122901916504, | |
| "learning_rate": 0.00014600641298369773, | |
| "loss": 3.623879089355469, | |
| "step": 148400 | |
| }, | |
| { | |
| "epoch": 1.540983946786763, | |
| "grad_norm": 3.4206697940826416, | |
| "learning_rate": 0.00014590264302095113, | |
| "loss": 3.517763366699219, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 1.542021646414229, | |
| "grad_norm": 3.4238781929016113, | |
| "learning_rate": 0.00014579887305820455, | |
| "loss": 3.52829833984375, | |
| "step": 148600 | |
| }, | |
| { | |
| "epoch": 1.5430593460416948, | |
| "grad_norm": 58.35453414916992, | |
| "learning_rate": 0.00014569510309545797, | |
| "loss": 3.682017517089844, | |
| "step": 148700 | |
| }, | |
| { | |
| "epoch": 1.5440970456691607, | |
| "grad_norm": 4.933131217956543, | |
| "learning_rate": 0.0001455913331327114, | |
| "loss": 3.577257080078125, | |
| "step": 148800 | |
| }, | |
| { | |
| "epoch": 1.5451347452966264, | |
| "grad_norm": 17.892318725585938, | |
| "learning_rate": 0.00014548756316996482, | |
| "loss": 3.710743713378906, | |
| "step": 148900 | |
| }, | |
| { | |
| "epoch": 1.5461724449240921, | |
| "grad_norm": 6.2961249351501465, | |
| "learning_rate": 0.0001453837932072182, | |
| "loss": 3.6647821044921876, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 1.547210144551558, | |
| "grad_norm": 4.278889179229736, | |
| "learning_rate": 0.00014528002324447163, | |
| "loss": 3.613748779296875, | |
| "step": 149100 | |
| }, | |
| { | |
| "epoch": 1.548247844179024, | |
| "grad_norm": 3.2785260677337646, | |
| "learning_rate": 0.00014517625328172505, | |
| "loss": 3.6411376953125, | |
| "step": 149200 | |
| }, | |
| { | |
| "epoch": 1.5492855438064899, | |
| "grad_norm": 3.227151393890381, | |
| "learning_rate": 0.00014507248331897848, | |
| "loss": 3.758666687011719, | |
| "step": 149300 | |
| }, | |
| { | |
| "epoch": 1.5503232434339556, | |
| "grad_norm": 2.6391334533691406, | |
| "learning_rate": 0.0001449687133562319, | |
| "loss": 3.5469485473632814, | |
| "step": 149400 | |
| }, | |
| { | |
| "epoch": 1.5513609430614215, | |
| "grad_norm": 2.5920772552490234, | |
| "learning_rate": 0.00014486494339348532, | |
| "loss": 3.621335754394531, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 1.5523986426888872, | |
| "grad_norm": 2.864225387573242, | |
| "learning_rate": 0.00014476117343073872, | |
| "loss": 3.6408596801757813, | |
| "step": 149600 | |
| }, | |
| { | |
| "epoch": 1.553436342316353, | |
| "grad_norm": 4.697976112365723, | |
| "learning_rate": 0.00014465740346799214, | |
| "loss": 3.6993423461914063, | |
| "step": 149700 | |
| }, | |
| { | |
| "epoch": 1.554474041943819, | |
| "grad_norm": 4.074455738067627, | |
| "learning_rate": 0.00014455363350524556, | |
| "loss": 3.6419488525390626, | |
| "step": 149800 | |
| }, | |
| { | |
| "epoch": 1.555511741571285, | |
| "grad_norm": 2.933537721633911, | |
| "learning_rate": 0.00014444986354249898, | |
| "loss": 3.622572326660156, | |
| "step": 149900 | |
| }, | |
| { | |
| "epoch": 1.5565494411987506, | |
| "grad_norm": 5.856564521789551, | |
| "learning_rate": 0.0001443460935797524, | |
| "loss": 3.7532833862304686, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 1.5575871408262163, | |
| "grad_norm": 4.24385929107666, | |
| "learning_rate": 0.00014424232361700583, | |
| "loss": 3.67490234375, | |
| "step": 150100 | |
| }, | |
| { | |
| "epoch": 1.5586248404536822, | |
| "grad_norm": 5.053845405578613, | |
| "learning_rate": 0.00014413855365425922, | |
| "loss": 3.7350125122070312, | |
| "step": 150200 | |
| }, | |
| { | |
| "epoch": 1.5596625400811481, | |
| "grad_norm": 3.423252582550049, | |
| "learning_rate": 0.00014403478369151264, | |
| "loss": 3.522652893066406, | |
| "step": 150300 | |
| }, | |
| { | |
| "epoch": 1.560700239708614, | |
| "grad_norm": 8.40445327758789, | |
| "learning_rate": 0.00014393101372876606, | |
| "loss": 3.480498962402344, | |
| "step": 150400 | |
| }, | |
| { | |
| "epoch": 1.5617379393360797, | |
| "grad_norm": 3.1955294609069824, | |
| "learning_rate": 0.00014382724376601946, | |
| "loss": 3.6813082885742188, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 1.5627756389635457, | |
| "grad_norm": 6.0853681564331055, | |
| "learning_rate": 0.0001437234738032729, | |
| "loss": 3.6238223266601564, | |
| "step": 150600 | |
| }, | |
| { | |
| "epoch": 1.5638133385910113, | |
| "grad_norm": 5.178461074829102, | |
| "learning_rate": 0.0001436197038405263, | |
| "loss": 3.6469857788085935, | |
| "step": 150700 | |
| }, | |
| { | |
| "epoch": 1.5648510382184773, | |
| "grad_norm": 8.24820613861084, | |
| "learning_rate": 0.00014351593387777972, | |
| "loss": 3.6198629760742187, | |
| "step": 150800 | |
| }, | |
| { | |
| "epoch": 1.5658887378459432, | |
| "grad_norm": 4.228358745574951, | |
| "learning_rate": 0.00014341216391503315, | |
| "loss": 3.4716970825195315, | |
| "step": 150900 | |
| }, | |
| { | |
| "epoch": 1.566926437473409, | |
| "grad_norm": 3.555584192276001, | |
| "learning_rate": 0.00014330839395228654, | |
| "loss": 3.739703369140625, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 1.5679641371008748, | |
| "grad_norm": 5.781318187713623, | |
| "learning_rate": 0.00014320462398953996, | |
| "loss": 3.5981024169921874, | |
| "step": 151100 | |
| }, | |
| { | |
| "epoch": 1.5690018367283405, | |
| "grad_norm": 6.903919696807861, | |
| "learning_rate": 0.0001431008540267934, | |
| "loss": 3.5764788818359374, | |
| "step": 151200 | |
| }, | |
| { | |
| "epoch": 1.5700395363558064, | |
| "grad_norm": 3.584331512451172, | |
| "learning_rate": 0.0001429970840640468, | |
| "loss": 3.6005426025390626, | |
| "step": 151300 | |
| }, | |
| { | |
| "epoch": 1.5710772359832723, | |
| "grad_norm": 4.393853664398193, | |
| "learning_rate": 0.00014289331410130023, | |
| "loss": 3.78184814453125, | |
| "step": 151400 | |
| }, | |
| { | |
| "epoch": 1.5721149356107382, | |
| "grad_norm": 2.4552299976348877, | |
| "learning_rate": 0.00014278954413855365, | |
| "loss": 3.7241311645507813, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 1.573152635238204, | |
| "grad_norm": 6.105810642242432, | |
| "learning_rate": 0.00014268577417580705, | |
| "loss": 3.6668280029296874, | |
| "step": 151600 | |
| }, | |
| { | |
| "epoch": 1.5741903348656698, | |
| "grad_norm": 5.4593939781188965, | |
| "learning_rate": 0.00014258200421306047, | |
| "loss": 3.6350604248046876, | |
| "step": 151700 | |
| }, | |
| { | |
| "epoch": 1.5752280344931355, | |
| "grad_norm": 8.01681900024414, | |
| "learning_rate": 0.0001424782342503139, | |
| "loss": 3.636524658203125, | |
| "step": 151800 | |
| }, | |
| { | |
| "epoch": 1.5762657341206014, | |
| "grad_norm": 27.08595848083496, | |
| "learning_rate": 0.0001423744642875673, | |
| "loss": 3.7312826538085937, | |
| "step": 151900 | |
| }, | |
| { | |
| "epoch": 1.5773034337480674, | |
| "grad_norm": 3.227189064025879, | |
| "learning_rate": 0.00014227069432482073, | |
| "loss": 3.54576416015625, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 1.5783411333755333, | |
| "grad_norm": 3.922788619995117, | |
| "learning_rate": 0.00014216692436207413, | |
| "loss": 3.762126770019531, | |
| "step": 152100 | |
| }, | |
| { | |
| "epoch": 1.579378833002999, | |
| "grad_norm": 11.172755241394043, | |
| "learning_rate": 0.00014206315439932755, | |
| "loss": 3.6238665771484375, | |
| "step": 152200 | |
| }, | |
| { | |
| "epoch": 1.5804165326304647, | |
| "grad_norm": 4.898155212402344, | |
| "learning_rate": 0.00014195938443658097, | |
| "loss": 3.5397454833984376, | |
| "step": 152300 | |
| }, | |
| { | |
| "epoch": 1.5814542322579306, | |
| "grad_norm": 4.228941440582275, | |
| "learning_rate": 0.0001418556144738344, | |
| "loss": 3.482630615234375, | |
| "step": 152400 | |
| }, | |
| { | |
| "epoch": 1.5824919318853965, | |
| "grad_norm": 3.2711164951324463, | |
| "learning_rate": 0.00014175184451108782, | |
| "loss": 3.55691162109375, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 1.5835296315128624, | |
| "grad_norm": 4.924630641937256, | |
| "learning_rate": 0.00014164807454834124, | |
| "loss": 3.6941983032226564, | |
| "step": 152600 | |
| }, | |
| { | |
| "epoch": 1.584567331140328, | |
| "grad_norm": 4.247806072235107, | |
| "learning_rate": 0.00014154430458559463, | |
| "loss": 3.704905700683594, | |
| "step": 152700 | |
| }, | |
| { | |
| "epoch": 1.5856050307677938, | |
| "grad_norm": 5.901268482208252, | |
| "learning_rate": 0.00014144053462284805, | |
| "loss": 3.4900387573242186, | |
| "step": 152800 | |
| }, | |
| { | |
| "epoch": 1.5866427303952597, | |
| "grad_norm": 2.9829347133636475, | |
| "learning_rate": 0.00014133676466010148, | |
| "loss": 3.560227966308594, | |
| "step": 152900 | |
| }, | |
| { | |
| "epoch": 1.5876804300227256, | |
| "grad_norm": 3.3158979415893555, | |
| "learning_rate": 0.00014123299469735487, | |
| "loss": 3.6083251953125, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 1.5887181296501915, | |
| "grad_norm": 3.4291346073150635, | |
| "learning_rate": 0.00014112922473460832, | |
| "loss": 3.634643859863281, | |
| "step": 153100 | |
| }, | |
| { | |
| "epoch": 1.5897558292776572, | |
| "grad_norm": 6.855015754699707, | |
| "learning_rate": 0.00014102545477186174, | |
| "loss": 3.5994863891601563, | |
| "step": 153200 | |
| }, | |
| { | |
| "epoch": 1.5907935289051232, | |
| "grad_norm": 5.0481133460998535, | |
| "learning_rate": 0.00014092168480911514, | |
| "loss": 3.7016021728515627, | |
| "step": 153300 | |
| }, | |
| { | |
| "epoch": 1.5918312285325888, | |
| "grad_norm": 7.888632297515869, | |
| "learning_rate": 0.00014081791484636856, | |
| "loss": 3.531593017578125, | |
| "step": 153400 | |
| }, | |
| { | |
| "epoch": 1.5928689281600548, | |
| "grad_norm": 3.533106565475464, | |
| "learning_rate": 0.00014071414488362198, | |
| "loss": 3.671497497558594, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 1.5939066277875207, | |
| "grad_norm": 3.2950990200042725, | |
| "learning_rate": 0.00014061037492087538, | |
| "loss": 3.6725836181640625, | |
| "step": 153600 | |
| }, | |
| { | |
| "epoch": 1.5949443274149866, | |
| "grad_norm": 5.21208381652832, | |
| "learning_rate": 0.00014050660495812882, | |
| "loss": 3.6607846069335936, | |
| "step": 153700 | |
| }, | |
| { | |
| "epoch": 1.5959820270424523, | |
| "grad_norm": 2.718191385269165, | |
| "learning_rate": 0.00014040283499538222, | |
| "loss": 3.607443542480469, | |
| "step": 153800 | |
| }, | |
| { | |
| "epoch": 1.597019726669918, | |
| "grad_norm": 3.6571433544158936, | |
| "learning_rate": 0.00014029906503263564, | |
| "loss": 3.675062255859375, | |
| "step": 153900 | |
| }, | |
| { | |
| "epoch": 1.598057426297384, | |
| "grad_norm": 2.440661907196045, | |
| "learning_rate": 0.00014019529506988906, | |
| "loss": 3.4682305908203124, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 1.5990951259248498, | |
| "grad_norm": 4.171643257141113, | |
| "learning_rate": 0.00014009152510714246, | |
| "loss": 3.682950134277344, | |
| "step": 154100 | |
| }, | |
| { | |
| "epoch": 1.6001328255523157, | |
| "grad_norm": 7.624752998352051, | |
| "learning_rate": 0.00013998775514439588, | |
| "loss": 3.67526611328125, | |
| "step": 154200 | |
| }, | |
| { | |
| "epoch": 1.6011705251797814, | |
| "grad_norm": 7.279924392700195, | |
| "learning_rate": 0.0001398839851816493, | |
| "loss": 3.6037884521484376, | |
| "step": 154300 | |
| }, | |
| { | |
| "epoch": 1.6022082248072473, | |
| "grad_norm": 3.2470226287841797, | |
| "learning_rate": 0.00013978021521890272, | |
| "loss": 3.658772277832031, | |
| "step": 154400 | |
| }, | |
| { | |
| "epoch": 1.603245924434713, | |
| "grad_norm": 5.602239608764648, | |
| "learning_rate": 0.00013967644525615615, | |
| "loss": 3.5984457397460936, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 1.604283624062179, | |
| "grad_norm": 3.6453311443328857, | |
| "learning_rate": 0.00013957267529340957, | |
| "loss": 3.388334045410156, | |
| "step": 154600 | |
| }, | |
| { | |
| "epoch": 1.6053213236896449, | |
| "grad_norm": 6.957507610321045, | |
| "learning_rate": 0.00013946890533066296, | |
| "loss": 3.617900695800781, | |
| "step": 154700 | |
| }, | |
| { | |
| "epoch": 1.6063590233171108, | |
| "grad_norm": 15.978106498718262, | |
| "learning_rate": 0.00013936513536791638, | |
| "loss": 3.514501647949219, | |
| "step": 154800 | |
| }, | |
| { | |
| "epoch": 1.6073967229445765, | |
| "grad_norm": 4.719081401824951, | |
| "learning_rate": 0.0001392613654051698, | |
| "loss": 3.6095266723632813, | |
| "step": 154900 | |
| }, | |
| { | |
| "epoch": 1.6084344225720422, | |
| "grad_norm": 3.6483592987060547, | |
| "learning_rate": 0.00013915759544242323, | |
| "loss": 3.6144635009765627, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 1.609472122199508, | |
| "grad_norm": 3.3481674194335938, | |
| "learning_rate": 0.00013905382547967665, | |
| "loss": 3.5398931884765625, | |
| "step": 155100 | |
| }, | |
| { | |
| "epoch": 1.610509821826974, | |
| "grad_norm": 6.413243293762207, | |
| "learning_rate": 0.00013895005551693007, | |
| "loss": 3.6416336059570313, | |
| "step": 155200 | |
| }, | |
| { | |
| "epoch": 1.61154752145444, | |
| "grad_norm": 7.17488431930542, | |
| "learning_rate": 0.00013884628555418347, | |
| "loss": 3.717559814453125, | |
| "step": 155300 | |
| }, | |
| { | |
| "epoch": 1.6125852210819056, | |
| "grad_norm": 6.735267162322998, | |
| "learning_rate": 0.0001387425155914369, | |
| "loss": 3.6701150512695313, | |
| "step": 155400 | |
| }, | |
| { | |
| "epoch": 1.6136229207093713, | |
| "grad_norm": 3.489192008972168, | |
| "learning_rate": 0.0001386387456286903, | |
| "loss": 3.607757263183594, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 1.6146606203368372, | |
| "grad_norm": 4.3538360595703125, | |
| "learning_rate": 0.00013853497566594373, | |
| "loss": 3.6339166259765623, | |
| "step": 155600 | |
| }, | |
| { | |
| "epoch": 1.6156983199643031, | |
| "grad_norm": 17.20830535888672, | |
| "learning_rate": 0.00013843120570319715, | |
| "loss": 3.42401611328125, | |
| "step": 155700 | |
| }, | |
| { | |
| "epoch": 1.616736019591769, | |
| "grad_norm": 2.5314135551452637, | |
| "learning_rate": 0.00013832743574045055, | |
| "loss": 3.527308349609375, | |
| "step": 155800 | |
| }, | |
| { | |
| "epoch": 1.617773719219235, | |
| "grad_norm": 4.076705455780029, | |
| "learning_rate": 0.00013822366577770397, | |
| "loss": 3.5527752685546874, | |
| "step": 155900 | |
| }, | |
| { | |
| "epoch": 1.6188114188467007, | |
| "grad_norm": 3.8894543647766113, | |
| "learning_rate": 0.0001381198958149574, | |
| "loss": 3.68035400390625, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 1.6198491184741663, | |
| "grad_norm": 17.054737091064453, | |
| "learning_rate": 0.0001380161258522108, | |
| "loss": 3.4780517578125, | |
| "step": 156100 | |
| }, | |
| { | |
| "epoch": 1.6208868181016323, | |
| "grad_norm": 20.06046485900879, | |
| "learning_rate": 0.0001379123558894642, | |
| "loss": 3.5436282348632813, | |
| "step": 156200 | |
| }, | |
| { | |
| "epoch": 1.6219245177290982, | |
| "grad_norm": 3.36186146736145, | |
| "learning_rate": 0.00013780858592671766, | |
| "loss": 3.6915762329101565, | |
| "step": 156300 | |
| }, | |
| { | |
| "epoch": 1.622962217356564, | |
| "grad_norm": 3.333552360534668, | |
| "learning_rate": 0.00013770481596397105, | |
| "loss": 3.551458740234375, | |
| "step": 156400 | |
| }, | |
| { | |
| "epoch": 1.6239999169840298, | |
| "grad_norm": 16.679468154907227, | |
| "learning_rate": 0.00013760104600122448, | |
| "loss": 3.5686306762695312, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 1.6250376166114955, | |
| "grad_norm": 3.8986880779266357, | |
| "learning_rate": 0.0001374972760384779, | |
| "loss": 3.6233151245117186, | |
| "step": 156600 | |
| }, | |
| { | |
| "epoch": 1.6260753162389614, | |
| "grad_norm": 5.065491199493408, | |
| "learning_rate": 0.0001373935060757313, | |
| "loss": 3.6737161254882813, | |
| "step": 156700 | |
| }, | |
| { | |
| "epoch": 1.6271130158664273, | |
| "grad_norm": 16.096450805664062, | |
| "learning_rate": 0.00013728973611298471, | |
| "loss": 3.6823269653320314, | |
| "step": 156800 | |
| }, | |
| { | |
| "epoch": 1.6281507154938932, | |
| "grad_norm": 3.939023733139038, | |
| "learning_rate": 0.00013718596615023814, | |
| "loss": 3.655545349121094, | |
| "step": 156900 | |
| }, | |
| { | |
| "epoch": 1.629188415121359, | |
| "grad_norm": 5.221971035003662, | |
| "learning_rate": 0.00013708219618749156, | |
| "loss": 3.5299761962890623, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 1.6302261147488248, | |
| "grad_norm": 4.515364646911621, | |
| "learning_rate": 0.00013697842622474498, | |
| "loss": 3.5957623291015626, | |
| "step": 157100 | |
| }, | |
| { | |
| "epoch": 1.6312638143762905, | |
| "grad_norm": 2.1334664821624756, | |
| "learning_rate": 0.00013687465626199838, | |
| "loss": 3.5724642944335936, | |
| "step": 157200 | |
| }, | |
| { | |
| "epoch": 1.6323015140037564, | |
| "grad_norm": 3.8212311267852783, | |
| "learning_rate": 0.0001367708862992518, | |
| "loss": 3.5870269775390624, | |
| "step": 157300 | |
| }, | |
| { | |
| "epoch": 1.6333392136312224, | |
| "grad_norm": 7.132654666900635, | |
| "learning_rate": 0.00013666711633650522, | |
| "loss": 3.5734619140625, | |
| "step": 157400 | |
| }, | |
| { | |
| "epoch": 1.6343769132586883, | |
| "grad_norm": 4.568203926086426, | |
| "learning_rate": 0.00013656334637375864, | |
| "loss": 3.6052120971679686, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 1.635414612886154, | |
| "grad_norm": 6.630765438079834, | |
| "learning_rate": 0.00013645957641101206, | |
| "loss": 3.7074453735351565, | |
| "step": 157600 | |
| }, | |
| { | |
| "epoch": 1.6364523125136197, | |
| "grad_norm": 9.513466835021973, | |
| "learning_rate": 0.00013635580644826549, | |
| "loss": 3.4421658325195312, | |
| "step": 157700 | |
| }, | |
| { | |
| "epoch": 1.6374900121410856, | |
| "grad_norm": 3.5600993633270264, | |
| "learning_rate": 0.00013625203648551888, | |
| "loss": 3.472029724121094, | |
| "step": 157800 | |
| }, | |
| { | |
| "epoch": 1.6385277117685515, | |
| "grad_norm": 3.796132802963257, | |
| "learning_rate": 0.0001361482665227723, | |
| "loss": 3.700109558105469, | |
| "step": 157900 | |
| }, | |
| { | |
| "epoch": 1.6395654113960174, | |
| "grad_norm": 5.419138431549072, | |
| "learning_rate": 0.00013604449656002572, | |
| "loss": 3.525767517089844, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 1.640603111023483, | |
| "grad_norm": 7.728092193603516, | |
| "learning_rate": 0.00013594072659727912, | |
| "loss": 3.4612411499023437, | |
| "step": 158100 | |
| }, | |
| { | |
| "epoch": 1.641640810650949, | |
| "grad_norm": 5.094764232635498, | |
| "learning_rate": 0.00013583695663453257, | |
| "loss": 3.5728485107421877, | |
| "step": 158200 | |
| }, | |
| { | |
| "epoch": 1.6426785102784147, | |
| "grad_norm": 7.930044174194336, | |
| "learning_rate": 0.000135733186671786, | |
| "loss": 3.547598571777344, | |
| "step": 158300 | |
| }, | |
| { | |
| "epoch": 1.6437162099058806, | |
| "grad_norm": 3.853911876678467, | |
| "learning_rate": 0.00013562941670903938, | |
| "loss": 3.5331781005859373, | |
| "step": 158400 | |
| }, | |
| { | |
| "epoch": 1.6447539095333465, | |
| "grad_norm": 14.153372764587402, | |
| "learning_rate": 0.0001355256467462928, | |
| "loss": 3.5483056640625, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 1.6457916091608125, | |
| "grad_norm": 4.353669166564941, | |
| "learning_rate": 0.00013542187678354623, | |
| "loss": 3.5902810668945313, | |
| "step": 158600 | |
| }, | |
| { | |
| "epoch": 1.6468293087882782, | |
| "grad_norm": 3.16603946685791, | |
| "learning_rate": 0.00013531810682079962, | |
| "loss": 3.5274386596679688, | |
| "step": 158700 | |
| }, | |
| { | |
| "epoch": 1.6478670084157439, | |
| "grad_norm": 5.928895950317383, | |
| "learning_rate": 0.00013521433685805307, | |
| "loss": 3.662962646484375, | |
| "step": 158800 | |
| }, | |
| { | |
| "epoch": 1.6489047080432098, | |
| "grad_norm": 4.497453689575195, | |
| "learning_rate": 0.00013511056689530647, | |
| "loss": 3.6771749877929687, | |
| "step": 158900 | |
| }, | |
| { | |
| "epoch": 1.6499424076706757, | |
| "grad_norm": 6.737712383270264, | |
| "learning_rate": 0.0001350067969325599, | |
| "loss": 3.546751708984375, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 1.6509801072981416, | |
| "grad_norm": 3.984771490097046, | |
| "learning_rate": 0.0001349030269698133, | |
| "loss": 3.5879977416992186, | |
| "step": 159100 | |
| }, | |
| { | |
| "epoch": 1.6520178069256073, | |
| "grad_norm": 7.267343521118164, | |
| "learning_rate": 0.0001347992570070667, | |
| "loss": 3.5557431030273436, | |
| "step": 159200 | |
| }, | |
| { | |
| "epoch": 1.653055506553073, | |
| "grad_norm": 5.349457263946533, | |
| "learning_rate": 0.00013469548704432013, | |
| "loss": 3.6174130249023437, | |
| "step": 159300 | |
| }, | |
| { | |
| "epoch": 1.654093206180539, | |
| "grad_norm": 3.6522059440612793, | |
| "learning_rate": 0.00013459171708157358, | |
| "loss": 3.609751892089844, | |
| "step": 159400 | |
| }, | |
| { | |
| "epoch": 1.6551309058080048, | |
| "grad_norm": 5.704461574554443, | |
| "learning_rate": 0.00013448794711882697, | |
| "loss": 3.5679837036132813, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 1.6561686054354707, | |
| "grad_norm": 5.23817777633667, | |
| "learning_rate": 0.0001343841771560804, | |
| "loss": 3.5738253784179688, | |
| "step": 159600 | |
| }, | |
| { | |
| "epoch": 1.6572063050629366, | |
| "grad_norm": 12.301040649414062, | |
| "learning_rate": 0.00013428040719333382, | |
| "loss": 3.587038879394531, | |
| "step": 159700 | |
| }, | |
| { | |
| "epoch": 1.6582440046904023, | |
| "grad_norm": 6.761283874511719, | |
| "learning_rate": 0.0001341766372305872, | |
| "loss": 3.521001281738281, | |
| "step": 159800 | |
| }, | |
| { | |
| "epoch": 1.659281704317868, | |
| "grad_norm": 5.411608695983887, | |
| "learning_rate": 0.00013407286726784063, | |
| "loss": 3.473619384765625, | |
| "step": 159900 | |
| }, | |
| { | |
| "epoch": 1.660319403945334, | |
| "grad_norm": 14.189502716064453, | |
| "learning_rate": 0.00013396909730509405, | |
| "loss": 3.5413604736328126, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 1.6613571035727999, | |
| "grad_norm": 3.0541956424713135, | |
| "learning_rate": 0.00013386532734234748, | |
| "loss": 3.5548626708984377, | |
| "step": 160100 | |
| }, | |
| { | |
| "epoch": 1.6623948032002658, | |
| "grad_norm": 3.2475764751434326, | |
| "learning_rate": 0.0001337615573796009, | |
| "loss": 3.5887530517578123, | |
| "step": 160200 | |
| }, | |
| { | |
| "epoch": 1.6634325028277315, | |
| "grad_norm": 4.810506343841553, | |
| "learning_rate": 0.00013365778741685432, | |
| "loss": 3.6068450927734377, | |
| "step": 160300 | |
| }, | |
| { | |
| "epoch": 1.6644702024551972, | |
| "grad_norm": 11.347721099853516, | |
| "learning_rate": 0.00013355401745410771, | |
| "loss": 3.663785705566406, | |
| "step": 160400 | |
| }, | |
| { | |
| "epoch": 1.665507902082663, | |
| "grad_norm": 2.9197380542755127, | |
| "learning_rate": 0.00013345024749136114, | |
| "loss": 3.6435916137695314, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 1.666545601710129, | |
| "grad_norm": 5.3932037353515625, | |
| "learning_rate": 0.00013334647752861456, | |
| "loss": 3.6256121826171874, | |
| "step": 160600 | |
| }, | |
| { | |
| "epoch": 1.667583301337595, | |
| "grad_norm": 3.6826651096343994, | |
| "learning_rate": 0.00013324270756586798, | |
| "loss": 3.60268798828125, | |
| "step": 160700 | |
| }, | |
| { | |
| "epoch": 1.6686210009650606, | |
| "grad_norm": 4.883547782897949, | |
| "learning_rate": 0.0001331389376031214, | |
| "loss": 3.508822326660156, | |
| "step": 160800 | |
| }, | |
| { | |
| "epoch": 1.6696587005925265, | |
| "grad_norm": 3.1789474487304688, | |
| "learning_rate": 0.0001330351676403748, | |
| "loss": 3.5955624389648437, | |
| "step": 160900 | |
| }, | |
| { | |
| "epoch": 1.6706964002199922, | |
| "grad_norm": 3.8428354263305664, | |
| "learning_rate": 0.00013293139767762822, | |
| "loss": 3.6681442260742188, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 1.6717340998474581, | |
| "grad_norm": 5.440670490264893, | |
| "learning_rate": 0.00013282762771488164, | |
| "loss": 3.65127197265625, | |
| "step": 161100 | |
| }, | |
| { | |
| "epoch": 1.672771799474924, | |
| "grad_norm": 4.737522125244141, | |
| "learning_rate": 0.00013272385775213504, | |
| "loss": 3.757344055175781, | |
| "step": 161200 | |
| }, | |
| { | |
| "epoch": 1.67380949910239, | |
| "grad_norm": 5.953054428100586, | |
| "learning_rate": 0.00013262008778938848, | |
| "loss": 3.690797119140625, | |
| "step": 161300 | |
| }, | |
| { | |
| "epoch": 1.6748471987298557, | |
| "grad_norm": 8.720730781555176, | |
| "learning_rate": 0.0001325163178266419, | |
| "loss": 3.790602722167969, | |
| "step": 161400 | |
| }, | |
| { | |
| "epoch": 1.6758848983573214, | |
| "grad_norm": 3.9143240451812744, | |
| "learning_rate": 0.0001324125478638953, | |
| "loss": 3.439073486328125, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 1.6769225979847873, | |
| "grad_norm": 4.572363376617432, | |
| "learning_rate": 0.00013230877790114872, | |
| "loss": 3.5498342895507813, | |
| "step": 161600 | |
| }, | |
| { | |
| "epoch": 1.6779602976122532, | |
| "grad_norm": 9.166924476623535, | |
| "learning_rate": 0.00013220500793840215, | |
| "loss": 3.479727478027344, | |
| "step": 161700 | |
| }, | |
| { | |
| "epoch": 1.678997997239719, | |
| "grad_norm": 2.0057218074798584, | |
| "learning_rate": 0.00013210123797565554, | |
| "loss": 3.72489990234375, | |
| "step": 161800 | |
| }, | |
| { | |
| "epoch": 1.6800356968671848, | |
| "grad_norm": 4.892455101013184, | |
| "learning_rate": 0.000131997468012909, | |
| "loss": 3.6359210205078125, | |
| "step": 161900 | |
| }, | |
| { | |
| "epoch": 1.6810733964946507, | |
| "grad_norm": 8.374796867370605, | |
| "learning_rate": 0.00013189369805016238, | |
| "loss": 3.5657424926757812, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 1.6821110961221164, | |
| "grad_norm": 3.702462911605835, | |
| "learning_rate": 0.0001317899280874158, | |
| "loss": 3.6002679443359376, | |
| "step": 162100 | |
| }, | |
| { | |
| "epoch": 1.6831487957495823, | |
| "grad_norm": 6.6382856369018555, | |
| "learning_rate": 0.00013168615812466923, | |
| "loss": 3.5055661010742187, | |
| "step": 162200 | |
| }, | |
| { | |
| "epoch": 1.6841864953770482, | |
| "grad_norm": 4.067321300506592, | |
| "learning_rate": 0.00013158238816192262, | |
| "loss": 3.6370770263671877, | |
| "step": 162300 | |
| }, | |
| { | |
| "epoch": 1.6852241950045141, | |
| "grad_norm": 6.839338779449463, | |
| "learning_rate": 0.00013147861819917604, | |
| "loss": 3.68888671875, | |
| "step": 162400 | |
| }, | |
| { | |
| "epoch": 1.6862618946319798, | |
| "grad_norm": 4.304868221282959, | |
| "learning_rate": 0.00013137484823642947, | |
| "loss": 3.5517013549804686, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 1.6872995942594455, | |
| "grad_norm": 6.149030685424805, | |
| "learning_rate": 0.0001312710782736829, | |
| "loss": 3.535697326660156, | |
| "step": 162600 | |
| }, | |
| { | |
| "epoch": 1.6883372938869114, | |
| "grad_norm": 3.3684825897216797, | |
| "learning_rate": 0.0001311673083109363, | |
| "loss": 3.4286175537109376, | |
| "step": 162700 | |
| }, | |
| { | |
| "epoch": 1.6893749935143774, | |
| "grad_norm": 3.4294440746307373, | |
| "learning_rate": 0.00013106353834818973, | |
| "loss": 3.443184509277344, | |
| "step": 162800 | |
| }, | |
| { | |
| "epoch": 1.6904126931418433, | |
| "grad_norm": 4.177918434143066, | |
| "learning_rate": 0.00013095976838544313, | |
| "loss": 3.6785324096679686, | |
| "step": 162900 | |
| }, | |
| { | |
| "epoch": 1.691450392769309, | |
| "grad_norm": 3.914222478866577, | |
| "learning_rate": 0.00013085599842269655, | |
| "loss": 3.6343704223632813, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 1.6924880923967747, | |
| "grad_norm": 10.268918991088867, | |
| "learning_rate": 0.00013075222845994997, | |
| "loss": 3.625147399902344, | |
| "step": 163100 | |
| }, | |
| { | |
| "epoch": 1.6935257920242406, | |
| "grad_norm": 3.8632876873016357, | |
| "learning_rate": 0.0001306484584972034, | |
| "loss": 3.62834228515625, | |
| "step": 163200 | |
| }, | |
| { | |
| "epoch": 1.6945634916517065, | |
| "grad_norm": 3.8029658794403076, | |
| "learning_rate": 0.00013054468853445681, | |
| "loss": 3.4555462646484374, | |
| "step": 163300 | |
| }, | |
| { | |
| "epoch": 1.6956011912791724, | |
| "grad_norm": 3.983098030090332, | |
| "learning_rate": 0.00013044091857171024, | |
| "loss": 3.6773056030273437, | |
| "step": 163400 | |
| }, | |
| { | |
| "epoch": 1.6966388909066383, | |
| "grad_norm": 3.1625497341156006, | |
| "learning_rate": 0.00013033714860896363, | |
| "loss": 3.525480041503906, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 1.697676590534104, | |
| "grad_norm": 6.201349258422852, | |
| "learning_rate": 0.00013023337864621705, | |
| "loss": 3.626365051269531, | |
| "step": 163600 | |
| }, | |
| { | |
| "epoch": 1.6987142901615697, | |
| "grad_norm": 4.032458782196045, | |
| "learning_rate": 0.00013012960868347048, | |
| "loss": 3.5092694091796877, | |
| "step": 163700 | |
| }, | |
| { | |
| "epoch": 1.6997519897890356, | |
| "grad_norm": 3.9698915481567383, | |
| "learning_rate": 0.0001300258387207239, | |
| "loss": 3.273734436035156, | |
| "step": 163800 | |
| }, | |
| { | |
| "epoch": 1.7007896894165015, | |
| "grad_norm": 9.877572059631348, | |
| "learning_rate": 0.00012992206875797732, | |
| "loss": 3.576407775878906, | |
| "step": 163900 | |
| }, | |
| { | |
| "epoch": 1.7018273890439675, | |
| "grad_norm": 14.561692237854004, | |
| "learning_rate": 0.00012981829879523071, | |
| "loss": 3.6638983154296874, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 1.7028650886714332, | |
| "grad_norm": 2.6718385219573975, | |
| "learning_rate": 0.00012971452883248414, | |
| "loss": 3.671317138671875, | |
| "step": 164100 | |
| }, | |
| { | |
| "epoch": 1.7039027882988989, | |
| "grad_norm": 3.6662535667419434, | |
| "learning_rate": 0.00012961075886973756, | |
| "loss": 3.648578796386719, | |
| "step": 164200 | |
| }, | |
| { | |
| "epoch": 1.7049404879263648, | |
| "grad_norm": 4.04230260848999, | |
| "learning_rate": 0.00012950698890699095, | |
| "loss": 3.4332769775390624, | |
| "step": 164300 | |
| }, | |
| { | |
| "epoch": 1.7059781875538307, | |
| "grad_norm": 9.336248397827148, | |
| "learning_rate": 0.00012940321894424437, | |
| "loss": 3.6213333129882814, | |
| "step": 164400 | |
| }, | |
| { | |
| "epoch": 1.7070158871812966, | |
| "grad_norm": 5.882486820220947, | |
| "learning_rate": 0.00012929944898149782, | |
| "loss": 3.525044250488281, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 1.7080535868087623, | |
| "grad_norm": 6.984238624572754, | |
| "learning_rate": 0.00012919567901875122, | |
| "loss": 3.6717626953125, | |
| "step": 164600 | |
| }, | |
| { | |
| "epoch": 1.7090912864362282, | |
| "grad_norm": 19.616052627563477, | |
| "learning_rate": 0.00012909190905600464, | |
| "loss": 3.5099832153320314, | |
| "step": 164700 | |
| }, | |
| { | |
| "epoch": 1.710128986063694, | |
| "grad_norm": 8.419858932495117, | |
| "learning_rate": 0.00012898813909325806, | |
| "loss": 3.624603576660156, | |
| "step": 164800 | |
| }, | |
| { | |
| "epoch": 1.7111666856911598, | |
| "grad_norm": 3.145763397216797, | |
| "learning_rate": 0.00012888436913051146, | |
| "loss": 3.5627670288085938, | |
| "step": 164900 | |
| }, | |
| { | |
| "epoch": 1.7122043853186257, | |
| "grad_norm": 2.620919704437256, | |
| "learning_rate": 0.00012878059916776488, | |
| "loss": 3.556968994140625, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 1.7132420849460916, | |
| "grad_norm": 3.6687073707580566, | |
| "learning_rate": 0.0001286768292050183, | |
| "loss": 3.590003662109375, | |
| "step": 165100 | |
| }, | |
| { | |
| "epoch": 1.7142797845735573, | |
| "grad_norm": 3.51960825920105, | |
| "learning_rate": 0.00012857305924227172, | |
| "loss": 3.443156433105469, | |
| "step": 165200 | |
| }, | |
| { | |
| "epoch": 1.715317484201023, | |
| "grad_norm": 7.178112030029297, | |
| "learning_rate": 0.00012846928927952514, | |
| "loss": 3.5516900634765625, | |
| "step": 165300 | |
| }, | |
| { | |
| "epoch": 1.716355183828489, | |
| "grad_norm": 3.60011887550354, | |
| "learning_rate": 0.00012836551931677857, | |
| "loss": 3.5771609497070314, | |
| "step": 165400 | |
| }, | |
| { | |
| "epoch": 1.7173928834559549, | |
| "grad_norm": 5.902312278747559, | |
| "learning_rate": 0.00012826174935403196, | |
| "loss": 3.590467529296875, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 1.7184305830834208, | |
| "grad_norm": 2.6880180835723877, | |
| "learning_rate": 0.00012815797939128538, | |
| "loss": 3.6772579956054687, | |
| "step": 165600 | |
| }, | |
| { | |
| "epoch": 1.7194682827108865, | |
| "grad_norm": 4.136773109436035, | |
| "learning_rate": 0.0001280542094285388, | |
| "loss": 3.7336956787109377, | |
| "step": 165700 | |
| }, | |
| { | |
| "epoch": 1.7205059823383524, | |
| "grad_norm": 5.155696392059326, | |
| "learning_rate": 0.00012795043946579223, | |
| "loss": 3.4659573364257814, | |
| "step": 165800 | |
| }, | |
| { | |
| "epoch": 1.721543681965818, | |
| "grad_norm": 5.531459331512451, | |
| "learning_rate": 0.00012784666950304565, | |
| "loss": 3.4835992431640626, | |
| "step": 165900 | |
| }, | |
| { | |
| "epoch": 1.722581381593284, | |
| "grad_norm": 6.343237400054932, | |
| "learning_rate": 0.00012774289954029904, | |
| "loss": 3.5382821655273435, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 1.72361908122075, | |
| "grad_norm": 2.731682538986206, | |
| "learning_rate": 0.00012763912957755247, | |
| "loss": 3.426122131347656, | |
| "step": 166100 | |
| }, | |
| { | |
| "epoch": 1.7246567808482158, | |
| "grad_norm": 5.487903594970703, | |
| "learning_rate": 0.0001275353596148059, | |
| "loss": 3.5763626098632812, | |
| "step": 166200 | |
| }, | |
| { | |
| "epoch": 1.7256944804756815, | |
| "grad_norm": 6.798583984375, | |
| "learning_rate": 0.00012743158965205928, | |
| "loss": 3.439584045410156, | |
| "step": 166300 | |
| }, | |
| { | |
| "epoch": 1.7267321801031472, | |
| "grad_norm": 18.596773147583008, | |
| "learning_rate": 0.00012732781968931273, | |
| "loss": 3.4846591186523437, | |
| "step": 166400 | |
| }, | |
| { | |
| "epoch": 1.7277698797306131, | |
| "grad_norm": 9.826458930969238, | |
| "learning_rate": 0.00012722404972656615, | |
| "loss": 3.410422668457031, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 1.728807579358079, | |
| "grad_norm": 5.076817035675049, | |
| "learning_rate": 0.00012712027976381955, | |
| "loss": 3.5888720703125, | |
| "step": 166600 | |
| }, | |
| { | |
| "epoch": 1.729845278985545, | |
| "grad_norm": 2.289203405380249, | |
| "learning_rate": 0.00012701650980107297, | |
| "loss": 3.6262445068359375, | |
| "step": 166700 | |
| }, | |
| { | |
| "epoch": 1.7308829786130107, | |
| "grad_norm": 2.4246132373809814, | |
| "learning_rate": 0.0001269127398383264, | |
| "loss": 3.5331646728515627, | |
| "step": 166800 | |
| }, | |
| { | |
| "epoch": 1.7319206782404764, | |
| "grad_norm": 20.16929054260254, | |
| "learning_rate": 0.0001268089698755798, | |
| "loss": 3.3934396362304686, | |
| "step": 166900 | |
| }, | |
| { | |
| "epoch": 1.7329583778679423, | |
| "grad_norm": 4.409317970275879, | |
| "learning_rate": 0.00012670519991283324, | |
| "loss": 3.46904052734375, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 1.7339960774954082, | |
| "grad_norm": 3.533935308456421, | |
| "learning_rate": 0.00012660142995008663, | |
| "loss": 3.6115313720703126, | |
| "step": 167100 | |
| }, | |
| { | |
| "epoch": 1.735033777122874, | |
| "grad_norm": 3.760765790939331, | |
| "learning_rate": 0.00012649765998734005, | |
| "loss": 3.7661947631835937, | |
| "step": 167200 | |
| }, | |
| { | |
| "epoch": 1.7360714767503398, | |
| "grad_norm": 3.174926996231079, | |
| "learning_rate": 0.00012639389002459348, | |
| "loss": 3.4038616943359377, | |
| "step": 167300 | |
| }, | |
| { | |
| "epoch": 1.7371091763778057, | |
| "grad_norm": 4.701259136199951, | |
| "learning_rate": 0.00012629012006184687, | |
| "loss": 3.575841064453125, | |
| "step": 167400 | |
| }, | |
| { | |
| "epoch": 1.7381468760052714, | |
| "grad_norm": 4.684348106384277, | |
| "learning_rate": 0.0001261863500991003, | |
| "loss": 3.650244140625, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 1.7391845756327373, | |
| "grad_norm": 5.04356575012207, | |
| "learning_rate": 0.00012608258013635374, | |
| "loss": 3.512914733886719, | |
| "step": 167600 | |
| }, | |
| { | |
| "epoch": 1.7402222752602032, | |
| "grad_norm": 4.33563232421875, | |
| "learning_rate": 0.00012597881017360714, | |
| "loss": 3.462794189453125, | |
| "step": 167700 | |
| }, | |
| { | |
| "epoch": 1.7412599748876691, | |
| "grad_norm": 3.108952522277832, | |
| "learning_rate": 0.00012587504021086056, | |
| "loss": 3.6481967163085938, | |
| "step": 167800 | |
| }, | |
| { | |
| "epoch": 1.7422976745151348, | |
| "grad_norm": 7.204711437225342, | |
| "learning_rate": 0.00012577127024811398, | |
| "loss": 3.3575787353515625, | |
| "step": 167900 | |
| }, | |
| { | |
| "epoch": 1.7433353741426005, | |
| "grad_norm": 9.035337448120117, | |
| "learning_rate": 0.00012566750028536737, | |
| "loss": 3.5675091552734375, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 1.7443730737700665, | |
| "grad_norm": 5.063663005828857, | |
| "learning_rate": 0.0001255637303226208, | |
| "loss": 3.48505615234375, | |
| "step": 168100 | |
| }, | |
| { | |
| "epoch": 1.7454107733975324, | |
| "grad_norm": 3.2425074577331543, | |
| "learning_rate": 0.00012545996035987422, | |
| "loss": 3.6897207641601564, | |
| "step": 168200 | |
| }, | |
| { | |
| "epoch": 1.7464484730249983, | |
| "grad_norm": 5.356579303741455, | |
| "learning_rate": 0.00012535619039712764, | |
| "loss": 3.5673727416992187, | |
| "step": 168300 | |
| }, | |
| { | |
| "epoch": 1.747486172652464, | |
| "grad_norm": 4.124982833862305, | |
| "learning_rate": 0.00012525242043438106, | |
| "loss": 3.512673034667969, | |
| "step": 168400 | |
| }, | |
| { | |
| "epoch": 1.74852387227993, | |
| "grad_norm": 4.768991470336914, | |
| "learning_rate": 0.00012514865047163448, | |
| "loss": 3.5959738159179686, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 1.7495615719073956, | |
| "grad_norm": 9.657281875610352, | |
| "learning_rate": 0.00012504488050888788, | |
| "loss": 3.528682861328125, | |
| "step": 168600 | |
| }, | |
| { | |
| "epoch": 1.7505992715348615, | |
| "grad_norm": 2.538902759552002, | |
| "learning_rate": 0.0001249411105461413, | |
| "loss": 3.4649612426757814, | |
| "step": 168700 | |
| }, | |
| { | |
| "epoch": 1.7516369711623274, | |
| "grad_norm": 4.286279201507568, | |
| "learning_rate": 0.00012483734058339472, | |
| "loss": 3.5286309814453123, | |
| "step": 168800 | |
| }, | |
| { | |
| "epoch": 1.7526746707897933, | |
| "grad_norm": 15.081319808959961, | |
| "learning_rate": 0.00012473357062064814, | |
| "loss": 3.492412414550781, | |
| "step": 168900 | |
| }, | |
| { | |
| "epoch": 1.753712370417259, | |
| "grad_norm": 2.91190767288208, | |
| "learning_rate": 0.00012462980065790157, | |
| "loss": 3.4919317626953124, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 1.7547500700447247, | |
| "grad_norm": 3.788306713104248, | |
| "learning_rate": 0.00012452603069515496, | |
| "loss": 3.587347412109375, | |
| "step": 169100 | |
| }, | |
| { | |
| "epoch": 1.7557877696721906, | |
| "grad_norm": 4.830081462860107, | |
| "learning_rate": 0.00012442226073240838, | |
| "loss": 3.6080587768554686, | |
| "step": 169200 | |
| }, | |
| { | |
| "epoch": 1.7568254692996566, | |
| "grad_norm": 4.777892112731934, | |
| "learning_rate": 0.0001243184907696618, | |
| "loss": 3.653542175292969, | |
| "step": 169300 | |
| }, | |
| { | |
| "epoch": 1.7578631689271225, | |
| "grad_norm": 8.966485977172852, | |
| "learning_rate": 0.0001242147208069152, | |
| "loss": 3.55691650390625, | |
| "step": 169400 | |
| }, | |
| { | |
| "epoch": 1.7589008685545882, | |
| "grad_norm": 1.9701244831085205, | |
| "learning_rate": 0.00012411095084416865, | |
| "loss": 3.587906799316406, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 1.759938568182054, | |
| "grad_norm": 12.719783782958984, | |
| "learning_rate": 0.00012400718088142207, | |
| "loss": 3.413060302734375, | |
| "step": 169600 | |
| }, | |
| { | |
| "epoch": 1.7609762678095198, | |
| "grad_norm": 3.8632144927978516, | |
| "learning_rate": 0.00012390341091867547, | |
| "loss": 3.6044146728515627, | |
| "step": 169700 | |
| }, | |
| { | |
| "epoch": 1.7620139674369857, | |
| "grad_norm": 5.806576251983643, | |
| "learning_rate": 0.0001237996409559289, | |
| "loss": 3.59072509765625, | |
| "step": 169800 | |
| }, | |
| { | |
| "epoch": 1.7630516670644516, | |
| "grad_norm": 7.052939414978027, | |
| "learning_rate": 0.0001236958709931823, | |
| "loss": 3.4161257934570313, | |
| "step": 169900 | |
| }, | |
| { | |
| "epoch": 1.7640893666919175, | |
| "grad_norm": 4.090539455413818, | |
| "learning_rate": 0.0001235921010304357, | |
| "loss": 3.4862603759765625, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 1.7651270663193832, | |
| "grad_norm": 8.032806396484375, | |
| "learning_rate": 0.00012348833106768913, | |
| "loss": 3.5226229858398437, | |
| "step": 170100 | |
| }, | |
| { | |
| "epoch": 1.766164765946849, | |
| "grad_norm": 7.900229454040527, | |
| "learning_rate": 0.00012338456110494255, | |
| "loss": 3.428408203125, | |
| "step": 170200 | |
| }, | |
| { | |
| "epoch": 1.7672024655743148, | |
| "grad_norm": 3.3465304374694824, | |
| "learning_rate": 0.00012328079114219597, | |
| "loss": 3.4806304931640626, | |
| "step": 170300 | |
| }, | |
| { | |
| "epoch": 1.7682401652017807, | |
| "grad_norm": 2.737323522567749, | |
| "learning_rate": 0.0001231770211794494, | |
| "loss": 3.5239492797851564, | |
| "step": 170400 | |
| }, | |
| { | |
| "epoch": 1.7692778648292466, | |
| "grad_norm": 5.74827766418457, | |
| "learning_rate": 0.00012307325121670281, | |
| "loss": 3.5097976684570313, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 1.7703155644567123, | |
| "grad_norm": 6.033031463623047, | |
| "learning_rate": 0.0001229694812539562, | |
| "loss": 3.4570046997070314, | |
| "step": 170600 | |
| }, | |
| { | |
| "epoch": 1.771353264084178, | |
| "grad_norm": 8.032061576843262, | |
| "learning_rate": 0.00012286571129120963, | |
| "loss": 3.560968017578125, | |
| "step": 170700 | |
| }, | |
| { | |
| "epoch": 1.772390963711644, | |
| "grad_norm": 4.955009460449219, | |
| "learning_rate": 0.00012276194132846305, | |
| "loss": 3.54818115234375, | |
| "step": 170800 | |
| }, | |
| { | |
| "epoch": 1.7734286633391099, | |
| "grad_norm": 10.685212135314941, | |
| "learning_rate": 0.00012265817136571647, | |
| "loss": 3.5968731689453124, | |
| "step": 170900 | |
| }, | |
| { | |
| "epoch": 1.7744663629665758, | |
| "grad_norm": 6.002890110015869, | |
| "learning_rate": 0.0001225544014029699, | |
| "loss": 3.6380169677734373, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 1.7755040625940415, | |
| "grad_norm": 2.442901849746704, | |
| "learning_rate": 0.0001224506314402233, | |
| "loss": 3.546981201171875, | |
| "step": 171100 | |
| }, | |
| { | |
| "epoch": 1.7765417622215074, | |
| "grad_norm": 7.106812000274658, | |
| "learning_rate": 0.0001223468614774767, | |
| "loss": 3.4353497314453123, | |
| "step": 171200 | |
| }, | |
| { | |
| "epoch": 1.777579461848973, | |
| "grad_norm": 4.951285362243652, | |
| "learning_rate": 0.00012224309151473014, | |
| "loss": 3.5619387817382813, | |
| "step": 171300 | |
| }, | |
| { | |
| "epoch": 1.778617161476439, | |
| "grad_norm": 4.533148765563965, | |
| "learning_rate": 0.00012213932155198356, | |
| "loss": 3.4085040283203125, | |
| "step": 171400 | |
| }, | |
| { | |
| "epoch": 1.779654861103905, | |
| "grad_norm": 3.1281020641326904, | |
| "learning_rate": 0.00012203555158923698, | |
| "loss": 3.5755316162109376, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 1.7806925607313708, | |
| "grad_norm": 3.2438437938690186, | |
| "learning_rate": 0.00012193178162649039, | |
| "loss": 3.419034118652344, | |
| "step": 171600 | |
| }, | |
| { | |
| "epoch": 1.7817302603588365, | |
| "grad_norm": 6.113760948181152, | |
| "learning_rate": 0.0001218280116637438, | |
| "loss": 3.4608731079101562, | |
| "step": 171700 | |
| }, | |
| { | |
| "epoch": 1.7827679599863022, | |
| "grad_norm": 3.805856227874756, | |
| "learning_rate": 0.00012172424170099722, | |
| "loss": 3.542497253417969, | |
| "step": 171800 | |
| }, | |
| { | |
| "epoch": 1.7838056596137681, | |
| "grad_norm": 11.923066139221191, | |
| "learning_rate": 0.00012162047173825063, | |
| "loss": 3.5580120849609376, | |
| "step": 171900 | |
| }, | |
| { | |
| "epoch": 1.784843359241234, | |
| "grad_norm": 7.653703212738037, | |
| "learning_rate": 0.00012151670177550405, | |
| "loss": 3.464120178222656, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 1.7858810588687, | |
| "grad_norm": 4.955140113830566, | |
| "learning_rate": 0.00012141293181275747, | |
| "loss": 3.5985858154296877, | |
| "step": 172100 | |
| }, | |
| { | |
| "epoch": 1.7869187584961657, | |
| "grad_norm": 2.7006173133850098, | |
| "learning_rate": 0.00012130916185001089, | |
| "loss": 3.608409423828125, | |
| "step": 172200 | |
| }, | |
| { | |
| "epoch": 1.7879564581236316, | |
| "grad_norm": 10.799352645874023, | |
| "learning_rate": 0.0001212053918872643, | |
| "loss": 3.5314166259765627, | |
| "step": 172300 | |
| }, | |
| { | |
| "epoch": 1.7889941577510973, | |
| "grad_norm": 2.7497682571411133, | |
| "learning_rate": 0.00012110162192451772, | |
| "loss": 3.5095343017578124, | |
| "step": 172400 | |
| }, | |
| { | |
| "epoch": 1.7900318573785632, | |
| "grad_norm": 3.47670316696167, | |
| "learning_rate": 0.00012099785196177113, | |
| "loss": 3.508272705078125, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 1.791069557006029, | |
| "grad_norm": 5.199550151824951, | |
| "learning_rate": 0.00012089408199902454, | |
| "loss": 3.503916015625, | |
| "step": 172600 | |
| }, | |
| { | |
| "epoch": 1.792107256633495, | |
| "grad_norm": 5.3487043380737305, | |
| "learning_rate": 0.00012079031203627797, | |
| "loss": 3.63627685546875, | |
| "step": 172700 | |
| }, | |
| { | |
| "epoch": 1.7931449562609607, | |
| "grad_norm": 4.6182074546813965, | |
| "learning_rate": 0.00012068654207353138, | |
| "loss": 3.517708740234375, | |
| "step": 172800 | |
| }, | |
| { | |
| "epoch": 1.7941826558884264, | |
| "grad_norm": 2.607217788696289, | |
| "learning_rate": 0.0001205827721107848, | |
| "loss": 3.555519714355469, | |
| "step": 172900 | |
| }, | |
| { | |
| "epoch": 1.7952203555158923, | |
| "grad_norm": 9.180208206176758, | |
| "learning_rate": 0.00012047900214803821, | |
| "loss": 3.5748587036132813, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 1.7962580551433582, | |
| "grad_norm": 5.080584526062012, | |
| "learning_rate": 0.00012037523218529164, | |
| "loss": 3.5299716186523438, | |
| "step": 173100 | |
| }, | |
| { | |
| "epoch": 1.7972957547708241, | |
| "grad_norm": 2.5319409370422363, | |
| "learning_rate": 0.00012027146222254504, | |
| "loss": 3.5544561767578124, | |
| "step": 173200 | |
| }, | |
| { | |
| "epoch": 1.7983334543982898, | |
| "grad_norm": 4.81158447265625, | |
| "learning_rate": 0.00012016769225979848, | |
| "loss": 3.6039208984375, | |
| "step": 173300 | |
| }, | |
| { | |
| "epoch": 1.7993711540257558, | |
| "grad_norm": Infinity, | |
| "learning_rate": 0.00012006392229705189, | |
| "loss": 3.439290771484375, | |
| "step": 173400 | |
| }, | |
| { | |
| "epoch": 1.8004088536532215, | |
| "grad_norm": 2.6214425563812256, | |
| "learning_rate": 0.00011996015233430531, | |
| "loss": 3.670186767578125, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 1.8014465532806874, | |
| "grad_norm": 2.7172493934631348, | |
| "learning_rate": 0.00011985638237155872, | |
| "loss": 3.5838592529296873, | |
| "step": 173600 | |
| }, | |
| { | |
| "epoch": 1.8024842529081533, | |
| "grad_norm": 8.898774147033691, | |
| "learning_rate": 0.00011975261240881213, | |
| "loss": 3.4800985717773436, | |
| "step": 173700 | |
| }, | |
| { | |
| "epoch": 1.8035219525356192, | |
| "grad_norm": 3.5623104572296143, | |
| "learning_rate": 0.00011964884244606555, | |
| "loss": 3.511365966796875, | |
| "step": 173800 | |
| }, | |
| { | |
| "epoch": 1.804559652163085, | |
| "grad_norm": 8.46833610534668, | |
| "learning_rate": 0.00011954507248331896, | |
| "loss": 3.7088421630859374, | |
| "step": 173900 | |
| }, | |
| { | |
| "epoch": 1.8055973517905506, | |
| "grad_norm": 5.097702980041504, | |
| "learning_rate": 0.00011944130252057239, | |
| "loss": 3.6202734375, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 1.8066350514180165, | |
| "grad_norm": 2.758472204208374, | |
| "learning_rate": 0.0001193375325578258, | |
| "loss": 3.561451721191406, | |
| "step": 174100 | |
| }, | |
| { | |
| "epoch": 1.8076727510454824, | |
| "grad_norm": 10.48659610748291, | |
| "learning_rate": 0.00011923376259507922, | |
| "loss": 3.5661395263671873, | |
| "step": 174200 | |
| }, | |
| { | |
| "epoch": 1.8087104506729483, | |
| "grad_norm": 4.996297836303711, | |
| "learning_rate": 0.00011912999263233263, | |
| "loss": 3.680464782714844, | |
| "step": 174300 | |
| }, | |
| { | |
| "epoch": 1.809748150300414, | |
| "grad_norm": 3.927097797393799, | |
| "learning_rate": 0.00011902622266958605, | |
| "loss": 3.4924087524414062, | |
| "step": 174400 | |
| }, | |
| { | |
| "epoch": 1.8107858499278797, | |
| "grad_norm": 9.367024421691895, | |
| "learning_rate": 0.00011892245270683946, | |
| "loss": 3.4610064697265623, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 1.8118235495553456, | |
| "grad_norm": 2.7783424854278564, | |
| "learning_rate": 0.0001188186827440929, | |
| "loss": 3.411673583984375, | |
| "step": 174600 | |
| }, | |
| { | |
| "epoch": 1.8128612491828116, | |
| "grad_norm": 8.61545181274414, | |
| "learning_rate": 0.0001187149127813463, | |
| "loss": 3.5328875732421876, | |
| "step": 174700 | |
| }, | |
| { | |
| "epoch": 1.8138989488102775, | |
| "grad_norm": 7.4906182289123535, | |
| "learning_rate": 0.00011861114281859971, | |
| "loss": 3.376343078613281, | |
| "step": 174800 | |
| }, | |
| { | |
| "epoch": 1.8149366484377432, | |
| "grad_norm": 1.9939513206481934, | |
| "learning_rate": 0.00011850737285585314, | |
| "loss": 3.428880615234375, | |
| "step": 174900 | |
| }, | |
| { | |
| "epoch": 1.815974348065209, | |
| "grad_norm": 6.011395454406738, | |
| "learning_rate": 0.00011840360289310654, | |
| "loss": 3.5798504638671873, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 1.8170120476926748, | |
| "grad_norm": 2.0973944664001465, | |
| "learning_rate": 0.00011829983293035997, | |
| "loss": 3.5833367919921875, | |
| "step": 175100 | |
| }, | |
| { | |
| "epoch": 1.8180497473201407, | |
| "grad_norm": 4.992910861968994, | |
| "learning_rate": 0.00011819606296761339, | |
| "loss": 3.6261285400390624, | |
| "step": 175200 | |
| }, | |
| { | |
| "epoch": 1.8190874469476066, | |
| "grad_norm": 89.73089599609375, | |
| "learning_rate": 0.00011809229300486681, | |
| "loss": 3.390103454589844, | |
| "step": 175300 | |
| }, | |
| { | |
| "epoch": 1.8201251465750725, | |
| "grad_norm": 4.343557834625244, | |
| "learning_rate": 0.00011798852304212022, | |
| "loss": 3.6147576904296876, | |
| "step": 175400 | |
| }, | |
| { | |
| "epoch": 1.8211628462025382, | |
| "grad_norm": Infinity, | |
| "learning_rate": 0.00011788475307937364, | |
| "loss": 3.5382080078125, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 1.822200545830004, | |
| "grad_norm": 8.41909408569336, | |
| "learning_rate": 0.00011778098311662705, | |
| "loss": 3.5090155029296874, | |
| "step": 175600 | |
| }, | |
| { | |
| "epoch": 1.8232382454574698, | |
| "grad_norm": 7.508602619171143, | |
| "learning_rate": 0.00011767721315388046, | |
| "loss": 3.540501708984375, | |
| "step": 175700 | |
| }, | |
| { | |
| "epoch": 1.8242759450849357, | |
| "grad_norm": 2.713555335998535, | |
| "learning_rate": 0.00011757344319113388, | |
| "loss": 3.5669735717773436, | |
| "step": 175800 | |
| }, | |
| { | |
| "epoch": 1.8253136447124017, | |
| "grad_norm": 9.780903816223145, | |
| "learning_rate": 0.0001174696732283873, | |
| "loss": 3.542893981933594, | |
| "step": 175900 | |
| }, | |
| { | |
| "epoch": 1.8263513443398673, | |
| "grad_norm": 2.6435556411743164, | |
| "learning_rate": 0.00011736590326564072, | |
| "loss": 3.6134707641601564, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 1.8273890439673333, | |
| "grad_norm": 3.3884384632110596, | |
| "learning_rate": 0.00011726213330289413, | |
| "loss": 3.574878845214844, | |
| "step": 176100 | |
| }, | |
| { | |
| "epoch": 1.828426743594799, | |
| "grad_norm": 4.323862552642822, | |
| "learning_rate": 0.00011715836334014755, | |
| "loss": 3.5432839965820313, | |
| "step": 176200 | |
| }, | |
| { | |
| "epoch": 1.8294644432222649, | |
| "grad_norm": 6.794419765472412, | |
| "learning_rate": 0.00011705459337740096, | |
| "loss": 3.4334552001953127, | |
| "step": 176300 | |
| }, | |
| { | |
| "epoch": 1.8305021428497308, | |
| "grad_norm": 3.3329992294311523, | |
| "learning_rate": 0.00011695082341465438, | |
| "loss": 3.511024169921875, | |
| "step": 176400 | |
| }, | |
| { | |
| "epoch": 1.8315398424771967, | |
| "grad_norm": 6.582189083099365, | |
| "learning_rate": 0.0001168470534519078, | |
| "loss": 3.4057382202148436, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 1.8325775421046624, | |
| "grad_norm": 3.5420665740966797, | |
| "learning_rate": 0.00011674328348916123, | |
| "loss": 3.530198974609375, | |
| "step": 176600 | |
| }, | |
| { | |
| "epoch": 1.833615241732128, | |
| "grad_norm": 3.2835450172424316, | |
| "learning_rate": 0.00011663951352641463, | |
| "loss": 3.3689605712890627, | |
| "step": 176700 | |
| }, | |
| { | |
| "epoch": 1.834652941359594, | |
| "grad_norm": 4.352384567260742, | |
| "learning_rate": 0.00011653574356366804, | |
| "loss": 3.5228622436523436, | |
| "step": 176800 | |
| }, | |
| { | |
| "epoch": 1.83569064098706, | |
| "grad_norm": 6.940867900848389, | |
| "learning_rate": 0.00011643197360092147, | |
| "loss": 3.422699279785156, | |
| "step": 176900 | |
| }, | |
| { | |
| "epoch": 1.8367283406145258, | |
| "grad_norm": 9.627628326416016, | |
| "learning_rate": 0.00011632820363817487, | |
| "loss": 3.4203256225585936, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 1.8377660402419915, | |
| "grad_norm": 7.819676399230957, | |
| "learning_rate": 0.00011622443367542831, | |
| "loss": 3.569815673828125, | |
| "step": 177100 | |
| }, | |
| { | |
| "epoch": 1.8388037398694572, | |
| "grad_norm": 3.4782094955444336, | |
| "learning_rate": 0.00011612066371268172, | |
| "loss": 3.5252569580078124, | |
| "step": 177200 | |
| }, | |
| { | |
| "epoch": 1.8398414394969231, | |
| "grad_norm": 9.448952674865723, | |
| "learning_rate": 0.00011601689374993514, | |
| "loss": 3.43080322265625, | |
| "step": 177300 | |
| }, | |
| { | |
| "epoch": 1.840879139124389, | |
| "grad_norm": 5.754225730895996, | |
| "learning_rate": 0.00011591312378718855, | |
| "loss": 3.45312744140625, | |
| "step": 177400 | |
| }, | |
| { | |
| "epoch": 1.841916838751855, | |
| "grad_norm": 2.9918229579925537, | |
| "learning_rate": 0.00011580935382444197, | |
| "loss": 3.548991394042969, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 1.8429545383793209, | |
| "grad_norm": 4.406205177307129, | |
| "learning_rate": 0.00011570558386169538, | |
| "loss": 3.5221047973632813, | |
| "step": 177600 | |
| }, | |
| { | |
| "epoch": 1.8439922380067866, | |
| "grad_norm": 3.79978346824646, | |
| "learning_rate": 0.00011560181389894879, | |
| "loss": 3.4272702026367186, | |
| "step": 177700 | |
| }, | |
| { | |
| "epoch": 1.8450299376342523, | |
| "grad_norm": 8.362844467163086, | |
| "learning_rate": 0.00011549804393620222, | |
| "loss": 3.496314697265625, | |
| "step": 177800 | |
| }, | |
| { | |
| "epoch": 1.8460676372617182, | |
| "grad_norm": 4.00974702835083, | |
| "learning_rate": 0.00011539427397345563, | |
| "loss": 3.4739456176757812, | |
| "step": 177900 | |
| }, | |
| { | |
| "epoch": 1.847105336889184, | |
| "grad_norm": 4.4382853507995605, | |
| "learning_rate": 0.00011529050401070905, | |
| "loss": 3.637906799316406, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 1.84814303651665, | |
| "grad_norm": 3.4561121463775635, | |
| "learning_rate": 0.00011518673404796246, | |
| "loss": 3.4582669067382814, | |
| "step": 178100 | |
| }, | |
| { | |
| "epoch": 1.8491807361441157, | |
| "grad_norm": 9.542756080627441, | |
| "learning_rate": 0.00011508296408521588, | |
| "loss": 3.5665469360351563, | |
| "step": 178200 | |
| }, | |
| { | |
| "epoch": 1.8502184357715814, | |
| "grad_norm": 5.516635894775391, | |
| "learning_rate": 0.00011497919412246929, | |
| "loss": 3.5199371337890626, | |
| "step": 178300 | |
| }, | |
| { | |
| "epoch": 1.8512561353990473, | |
| "grad_norm": 10.64023494720459, | |
| "learning_rate": 0.00011487542415972273, | |
| "loss": 3.605532531738281, | |
| "step": 178400 | |
| }, | |
| { | |
| "epoch": 1.8522938350265132, | |
| "grad_norm": 3.7197024822235107, | |
| "learning_rate": 0.00011477165419697613, | |
| "loss": 3.5585647583007813, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 1.8533315346539792, | |
| "grad_norm": 8.84176254272461, | |
| "learning_rate": 0.00011466788423422956, | |
| "loss": 3.469338684082031, | |
| "step": 178600 | |
| }, | |
| { | |
| "epoch": 1.8543692342814448, | |
| "grad_norm": 13.789299011230469, | |
| "learning_rate": 0.00011456411427148297, | |
| "loss": 3.654618835449219, | |
| "step": 178700 | |
| }, | |
| { | |
| "epoch": 1.8554069339089108, | |
| "grad_norm": 3.7758259773254395, | |
| "learning_rate": 0.00011446034430873637, | |
| "loss": 3.511930236816406, | |
| "step": 178800 | |
| }, | |
| { | |
| "epoch": 1.8564446335363765, | |
| "grad_norm": 4.542521953582764, | |
| "learning_rate": 0.0001143565743459898, | |
| "loss": 3.572850341796875, | |
| "step": 178900 | |
| }, | |
| { | |
| "epoch": 1.8574823331638424, | |
| "grad_norm": 7.155478477478027, | |
| "learning_rate": 0.00011425280438324322, | |
| "loss": 3.6194467163085937, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 1.8585200327913083, | |
| "grad_norm": 5.109609603881836, | |
| "learning_rate": 0.00011414903442049664, | |
| "loss": 3.585841064453125, | |
| "step": 179100 | |
| }, | |
| { | |
| "epoch": 1.8595577324187742, | |
| "grad_norm": 4.251883506774902, | |
| "learning_rate": 0.00011404526445775005, | |
| "loss": 3.4581594848632813, | |
| "step": 179200 | |
| }, | |
| { | |
| "epoch": 1.86059543204624, | |
| "grad_norm": 22.98354148864746, | |
| "learning_rate": 0.00011394149449500347, | |
| "loss": 3.47680419921875, | |
| "step": 179300 | |
| }, | |
| { | |
| "epoch": 1.8616331316737056, | |
| "grad_norm": 4.897403240203857, | |
| "learning_rate": 0.00011383772453225688, | |
| "loss": 3.5364599609375, | |
| "step": 179400 | |
| }, | |
| { | |
| "epoch": 1.8626708313011715, | |
| "grad_norm": 11.166070938110352, | |
| "learning_rate": 0.0001137339545695103, | |
| "loss": 3.4703445434570312, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 1.8637085309286374, | |
| "grad_norm": 3.64528226852417, | |
| "learning_rate": 0.00011363018460676372, | |
| "loss": 3.612529296875, | |
| "step": 179600 | |
| }, | |
| { | |
| "epoch": 1.8647462305561033, | |
| "grad_norm": 3.4828524589538574, | |
| "learning_rate": 0.00011352641464401714, | |
| "loss": 3.622635803222656, | |
| "step": 179700 | |
| }, | |
| { | |
| "epoch": 1.865783930183569, | |
| "grad_norm": 4.965012550354004, | |
| "learning_rate": 0.00011342264468127055, | |
| "loss": 3.420509033203125, | |
| "step": 179800 | |
| }, | |
| { | |
| "epoch": 1.866821629811035, | |
| "grad_norm": 6.657770156860352, | |
| "learning_rate": 0.00011331887471852396, | |
| "loss": 3.57205810546875, | |
| "step": 179900 | |
| }, | |
| { | |
| "epoch": 1.8678593294385006, | |
| "grad_norm": 6.785094738006592, | |
| "learning_rate": 0.00011321510475577738, | |
| "loss": 3.613439025878906, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 1.8688970290659666, | |
| "grad_norm": 3.2131218910217285, | |
| "learning_rate": 0.00011311133479303079, | |
| "loss": 3.721015625, | |
| "step": 180100 | |
| }, | |
| { | |
| "epoch": 1.8699347286934325, | |
| "grad_norm": 3.327937364578247, | |
| "learning_rate": 0.00011300756483028421, | |
| "loss": 3.47718017578125, | |
| "step": 180200 | |
| }, | |
| { | |
| "epoch": 1.8709724283208984, | |
| "grad_norm": 8.65044116973877, | |
| "learning_rate": 0.00011290379486753763, | |
| "loss": 3.6089404296875, | |
| "step": 180300 | |
| }, | |
| { | |
| "epoch": 1.872010127948364, | |
| "grad_norm": 2.0018603801727295, | |
| "learning_rate": 0.00011280002490479106, | |
| "loss": 3.3825112915039064, | |
| "step": 180400 | |
| }, | |
| { | |
| "epoch": 1.8730478275758298, | |
| "grad_norm": 2.7814066410064697, | |
| "learning_rate": 0.00011269625494204446, | |
| "loss": 3.4428082275390626, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 1.8740855272032957, | |
| "grad_norm": 2.5407564640045166, | |
| "learning_rate": 0.00011259248497929789, | |
| "loss": 3.6512811279296873, | |
| "step": 180600 | |
| }, | |
| { | |
| "epoch": 1.8751232268307616, | |
| "grad_norm": 3.6118102073669434, | |
| "learning_rate": 0.0001124887150165513, | |
| "loss": 3.4491305541992188, | |
| "step": 180700 | |
| }, | |
| { | |
| "epoch": 1.8761609264582275, | |
| "grad_norm": 4.681710720062256, | |
| "learning_rate": 0.0001123849450538047, | |
| "loss": 3.5399176025390626, | |
| "step": 180800 | |
| }, | |
| { | |
| "epoch": 1.8771986260856932, | |
| "grad_norm": 5.6345062255859375, | |
| "learning_rate": 0.00011228117509105814, | |
| "loss": 3.580292053222656, | |
| "step": 180900 | |
| }, | |
| { | |
| "epoch": 1.878236325713159, | |
| "grad_norm": 4.881344318389893, | |
| "learning_rate": 0.00011217740512831155, | |
| "loss": 3.5553582763671874, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 1.8792740253406248, | |
| "grad_norm": 3.3916895389556885, | |
| "learning_rate": 0.00011207363516556497, | |
| "loss": 3.468414001464844, | |
| "step": 181100 | |
| }, | |
| { | |
| "epoch": 1.8803117249680907, | |
| "grad_norm": 4.611287593841553, | |
| "learning_rate": 0.00011196986520281838, | |
| "loss": 3.420959167480469, | |
| "step": 181200 | |
| }, | |
| { | |
| "epoch": 1.8813494245955567, | |
| "grad_norm": 3.4268012046813965, | |
| "learning_rate": 0.0001118660952400718, | |
| "loss": 3.614518737792969, | |
| "step": 181300 | |
| }, | |
| { | |
| "epoch": 1.8823871242230226, | |
| "grad_norm": 9.675979614257812, | |
| "learning_rate": 0.00011176232527732521, | |
| "loss": 3.460643310546875, | |
| "step": 181400 | |
| }, | |
| { | |
| "epoch": 1.8834248238504883, | |
| "grad_norm": 4.765254497528076, | |
| "learning_rate": 0.00011165855531457864, | |
| "loss": 3.5331201171875, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 1.884462523477954, | |
| "grad_norm": 12.958268165588379, | |
| "learning_rate": 0.00011155478535183205, | |
| "loss": 3.458702392578125, | |
| "step": 181600 | |
| }, | |
| { | |
| "epoch": 1.8855002231054199, | |
| "grad_norm": 3.9760847091674805, | |
| "learning_rate": 0.00011145101538908547, | |
| "loss": 3.5144024658203126, | |
| "step": 181700 | |
| }, | |
| { | |
| "epoch": 1.8865379227328858, | |
| "grad_norm": 3.063124656677246, | |
| "learning_rate": 0.00011134724542633888, | |
| "loss": 3.3591217041015624, | |
| "step": 181800 | |
| }, | |
| { | |
| "epoch": 1.8875756223603517, | |
| "grad_norm": 14.115145683288574, | |
| "learning_rate": 0.00011124347546359229, | |
| "loss": 3.5416494750976564, | |
| "step": 181900 | |
| }, | |
| { | |
| "epoch": 1.8886133219878174, | |
| "grad_norm": 2.602299213409424, | |
| "learning_rate": 0.00011113970550084571, | |
| "loss": 3.4190499877929685, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 1.889651021615283, | |
| "grad_norm": 6.7280168533325195, | |
| "learning_rate": 0.00011103593553809912, | |
| "loss": 3.3795068359375, | |
| "step": 182100 | |
| }, | |
| { | |
| "epoch": 1.890688721242749, | |
| "grad_norm": 6.911862850189209, | |
| "learning_rate": 0.00011093216557535256, | |
| "loss": 3.5166439819335937, | |
| "step": 182200 | |
| }, | |
| { | |
| "epoch": 1.891726420870215, | |
| "grad_norm": 6.751010894775391, | |
| "learning_rate": 0.00011082839561260596, | |
| "loss": 3.4338143920898436, | |
| "step": 182300 | |
| }, | |
| { | |
| "epoch": 1.8927641204976808, | |
| "grad_norm": 4.327939510345459, | |
| "learning_rate": 0.00011072462564985939, | |
| "loss": 3.4822421264648438, | |
| "step": 182400 | |
| }, | |
| { | |
| "epoch": 1.8938018201251465, | |
| "grad_norm": 2.485795259475708, | |
| "learning_rate": 0.0001106208556871128, | |
| "loss": 3.464154052734375, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 1.8948395197526124, | |
| "grad_norm": 104.476318359375, | |
| "learning_rate": 0.00011051708572436622, | |
| "loss": 3.4480935668945314, | |
| "step": 182600 | |
| }, | |
| { | |
| "epoch": 1.8958772193800781, | |
| "grad_norm": 2.829188346862793, | |
| "learning_rate": 0.00011041331576161963, | |
| "loss": 3.593952331542969, | |
| "step": 182700 | |
| }, | |
| { | |
| "epoch": 1.896914919007544, | |
| "grad_norm": 4.845984935760498, | |
| "learning_rate": 0.00011030954579887306, | |
| "loss": 3.244365234375, | |
| "step": 182800 | |
| }, | |
| { | |
| "epoch": 1.89795261863501, | |
| "grad_norm": 2.055333375930786, | |
| "learning_rate": 0.00011020577583612647, | |
| "loss": 3.5465518188476564, | |
| "step": 182900 | |
| }, | |
| { | |
| "epoch": 1.8989903182624759, | |
| "grad_norm": 19.445037841796875, | |
| "learning_rate": 0.00011010200587337988, | |
| "loss": 3.5760122680664064, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 1.9000280178899416, | |
| "grad_norm": 3.0907251834869385, | |
| "learning_rate": 0.0001099982359106333, | |
| "loss": 3.524999084472656, | |
| "step": 183100 | |
| }, | |
| { | |
| "epoch": 1.9010657175174073, | |
| "grad_norm": 1.9697469472885132, | |
| "learning_rate": 0.00010989446594788671, | |
| "loss": 3.4634637451171875, | |
| "step": 183200 | |
| }, | |
| { | |
| "epoch": 1.9021034171448732, | |
| "grad_norm": 6.751926898956299, | |
| "learning_rate": 0.00010979069598514013, | |
| "loss": 3.4596926879882814, | |
| "step": 183300 | |
| }, | |
| { | |
| "epoch": 1.903141116772339, | |
| "grad_norm": 2.561213493347168, | |
| "learning_rate": 0.00010968692602239355, | |
| "loss": 3.5389044189453127, | |
| "step": 183400 | |
| }, | |
| { | |
| "epoch": 1.904178816399805, | |
| "grad_norm": 6.130541801452637, | |
| "learning_rate": 0.00010958315605964697, | |
| "loss": 3.4779763793945313, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 1.9052165160272707, | |
| "grad_norm": 3.2996444702148438, | |
| "learning_rate": 0.00010947938609690038, | |
| "loss": 3.4853436279296877, | |
| "step": 183600 | |
| }, | |
| { | |
| "epoch": 1.9062542156547366, | |
| "grad_norm": 4.535896301269531, | |
| "learning_rate": 0.0001093756161341538, | |
| "loss": 3.4235238647460937, | |
| "step": 183700 | |
| }, | |
| { | |
| "epoch": 1.9072919152822023, | |
| "grad_norm": 4.082485675811768, | |
| "learning_rate": 0.00010927184617140721, | |
| "loss": 3.4645541381835936, | |
| "step": 183800 | |
| }, | |
| { | |
| "epoch": 1.9083296149096682, | |
| "grad_norm": 5.501161098480225, | |
| "learning_rate": 0.00010916807620866062, | |
| "loss": 3.555899658203125, | |
| "step": 183900 | |
| }, | |
| { | |
| "epoch": 1.9093673145371342, | |
| "grad_norm": 7.624723434448242, | |
| "learning_rate": 0.00010906430624591404, | |
| "loss": 3.4653219604492187, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 1.9104050141646, | |
| "grad_norm": 3.386392116546631, | |
| "learning_rate": 0.00010896053628316746, | |
| "loss": 3.5530450439453123, | |
| "step": 184100 | |
| }, | |
| { | |
| "epoch": 1.9114427137920658, | |
| "grad_norm": 4.087791442871094, | |
| "learning_rate": 0.00010885676632042089, | |
| "loss": 3.470418701171875, | |
| "step": 184200 | |
| }, | |
| { | |
| "epoch": 1.9124804134195315, | |
| "grad_norm": 4.145429611206055, | |
| "learning_rate": 0.0001087529963576743, | |
| "loss": 3.416697692871094, | |
| "step": 184300 | |
| }, | |
| { | |
| "epoch": 1.9135181130469974, | |
| "grad_norm": 4.366927623748779, | |
| "learning_rate": 0.00010864922639492772, | |
| "loss": 3.4999765014648436, | |
| "step": 184400 | |
| }, | |
| { | |
| "epoch": 1.9145558126744633, | |
| "grad_norm": 4.084202289581299, | |
| "learning_rate": 0.00010854545643218113, | |
| "loss": 3.435041809082031, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 1.9155935123019292, | |
| "grad_norm": 9.935702323913574, | |
| "learning_rate": 0.00010844168646943455, | |
| "loss": 3.54100341796875, | |
| "step": 184600 | |
| }, | |
| { | |
| "epoch": 1.916631211929395, | |
| "grad_norm": 6.931925296783447, | |
| "learning_rate": 0.00010833791650668797, | |
| "loss": 3.5136874389648436, | |
| "step": 184700 | |
| }, | |
| { | |
| "epoch": 1.9176689115568606, | |
| "grad_norm": 3.0231878757476807, | |
| "learning_rate": 0.00010823414654394139, | |
| "loss": 3.6150555419921875, | |
| "step": 184800 | |
| }, | |
| { | |
| "epoch": 1.9187066111843265, | |
| "grad_norm": 3.3393242359161377, | |
| "learning_rate": 0.0001081303765811948, | |
| "loss": 3.479617004394531, | |
| "step": 184900 | |
| }, | |
| { | |
| "epoch": 1.9197443108117924, | |
| "grad_norm": 1.9449257850646973, | |
| "learning_rate": 0.00010802660661844821, | |
| "loss": 3.4772000122070312, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 1.9207820104392583, | |
| "grad_norm": 5.924251079559326, | |
| "learning_rate": 0.00010792283665570163, | |
| "loss": 3.558631591796875, | |
| "step": 185100 | |
| }, | |
| { | |
| "epoch": 1.9218197100667243, | |
| "grad_norm": 3.7242231369018555, | |
| "learning_rate": 0.00010781906669295504, | |
| "loss": 3.4901129150390626, | |
| "step": 185200 | |
| }, | |
| { | |
| "epoch": 1.92285740969419, | |
| "grad_norm": 4.291270732879639, | |
| "learning_rate": 0.00010771529673020847, | |
| "loss": 3.4830392456054686, | |
| "step": 185300 | |
| }, | |
| { | |
| "epoch": 1.9238951093216556, | |
| "grad_norm": 8.315948486328125, | |
| "learning_rate": 0.00010761152676746188, | |
| "loss": 3.654394226074219, | |
| "step": 185400 | |
| }, | |
| { | |
| "epoch": 1.9249328089491216, | |
| "grad_norm": 3.3864219188690186, | |
| "learning_rate": 0.0001075077568047153, | |
| "loss": 3.4916171264648437, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 1.9259705085765875, | |
| "grad_norm": 2.4446215629577637, | |
| "learning_rate": 0.00010740398684196871, | |
| "loss": 3.5801641845703127, | |
| "step": 185600 | |
| }, | |
| { | |
| "epoch": 1.9270082082040534, | |
| "grad_norm": 4.319270133972168, | |
| "learning_rate": 0.00010730021687922213, | |
| "loss": 3.485596008300781, | |
| "step": 185700 | |
| }, | |
| { | |
| "epoch": 1.928045907831519, | |
| "grad_norm": 12.243918418884277, | |
| "learning_rate": 0.00010719644691647554, | |
| "loss": 3.297283020019531, | |
| "step": 185800 | |
| }, | |
| { | |
| "epoch": 1.9290836074589848, | |
| "grad_norm": 3.614396333694458, | |
| "learning_rate": 0.00010709267695372895, | |
| "loss": 3.4672842407226563, | |
| "step": 185900 | |
| }, | |
| { | |
| "epoch": 1.9301213070864507, | |
| "grad_norm": 7.824878692626953, | |
| "learning_rate": 0.00010698890699098239, | |
| "loss": 3.5030999755859376, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 1.9311590067139166, | |
| "grad_norm": 11.845438003540039, | |
| "learning_rate": 0.0001068851370282358, | |
| "loss": 3.5722430419921873, | |
| "step": 186100 | |
| }, | |
| { | |
| "epoch": 1.9321967063413825, | |
| "grad_norm": 8.008241653442383, | |
| "learning_rate": 0.00010678136706548922, | |
| "loss": 3.4848983764648436, | |
| "step": 186200 | |
| }, | |
| { | |
| "epoch": 1.9332344059688482, | |
| "grad_norm": 38.26485824584961, | |
| "learning_rate": 0.00010667759710274262, | |
| "loss": 3.4654171752929686, | |
| "step": 186300 | |
| }, | |
| { | |
| "epoch": 1.9342721055963141, | |
| "grad_norm": 3.587207317352295, | |
| "learning_rate": 0.00010657382713999605, | |
| "loss": 3.443753967285156, | |
| "step": 186400 | |
| }, | |
| { | |
| "epoch": 1.9353098052237798, | |
| "grad_norm": 7.548192024230957, | |
| "learning_rate": 0.00010647005717724946, | |
| "loss": 3.555989074707031, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 1.9363475048512457, | |
| "grad_norm": 5.652491092681885, | |
| "learning_rate": 0.00010636628721450289, | |
| "loss": 3.5138848876953124, | |
| "step": 186600 | |
| }, | |
| { | |
| "epoch": 1.9373852044787117, | |
| "grad_norm": 4.181760311126709, | |
| "learning_rate": 0.0001062625172517563, | |
| "loss": 3.4649755859375, | |
| "step": 186700 | |
| }, | |
| { | |
| "epoch": 1.9384229041061776, | |
| "grad_norm": 39.51677703857422, | |
| "learning_rate": 0.00010615874728900972, | |
| "loss": 3.4170611572265623, | |
| "step": 186800 | |
| }, | |
| { | |
| "epoch": 1.9394606037336433, | |
| "grad_norm": 5.663796901702881, | |
| "learning_rate": 0.00010605497732626313, | |
| "loss": 3.6423403930664064, | |
| "step": 186900 | |
| }, | |
| { | |
| "epoch": 1.940498303361109, | |
| "grad_norm": 49.58971405029297, | |
| "learning_rate": 0.00010595120736351654, | |
| "loss": 3.556903076171875, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 1.9415360029885749, | |
| "grad_norm": 4.037705421447754, | |
| "learning_rate": 0.00010584743740076996, | |
| "loss": 3.581287536621094, | |
| "step": 187100 | |
| }, | |
| { | |
| "epoch": 1.9425737026160408, | |
| "grad_norm": 2.6354784965515137, | |
| "learning_rate": 0.00010574366743802338, | |
| "loss": 3.4927523803710936, | |
| "step": 187200 | |
| }, | |
| { | |
| "epoch": 1.9436114022435067, | |
| "grad_norm": 3.8889167308807373, | |
| "learning_rate": 0.0001056398974752768, | |
| "loss": 3.485701904296875, | |
| "step": 187300 | |
| }, | |
| { | |
| "epoch": 1.9446491018709724, | |
| "grad_norm": 6.694062232971191, | |
| "learning_rate": 0.00010553612751253021, | |
| "loss": 3.3910641479492187, | |
| "step": 187400 | |
| }, | |
| { | |
| "epoch": 1.9456868014984383, | |
| "grad_norm": 5.231113910675049, | |
| "learning_rate": 0.00010543235754978363, | |
| "loss": 3.5116064453125, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 1.946724501125904, | |
| "grad_norm": 13.281269073486328, | |
| "learning_rate": 0.00010532858758703704, | |
| "loss": 3.5454452514648436, | |
| "step": 187600 | |
| }, | |
| { | |
| "epoch": 1.94776220075337, | |
| "grad_norm": 5.362813472747803, | |
| "learning_rate": 0.00010522481762429046, | |
| "loss": 3.5717642211914065, | |
| "step": 187700 | |
| }, | |
| { | |
| "epoch": 1.9487999003808358, | |
| "grad_norm": 3.0265583992004395, | |
| "learning_rate": 0.00010512104766154387, | |
| "loss": 3.529712829589844, | |
| "step": 187800 | |
| }, | |
| { | |
| "epoch": 1.9498376000083018, | |
| "grad_norm": 2.4003071784973145, | |
| "learning_rate": 0.00010501727769879731, | |
| "loss": 3.5179287719726564, | |
| "step": 187900 | |
| }, | |
| { | |
| "epoch": 1.9508752996357674, | |
| "grad_norm": 3.5519869327545166, | |
| "learning_rate": 0.00010491350773605072, | |
| "loss": 3.3665447998046876, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 1.9519129992632331, | |
| "grad_norm": 1.9300223588943481, | |
| "learning_rate": 0.00010480973777330412, | |
| "loss": 3.5477023315429688, | |
| "step": 188100 | |
| }, | |
| { | |
| "epoch": 1.952950698890699, | |
| "grad_norm": 3.3745410442352295, | |
| "learning_rate": 0.00010470596781055755, | |
| "loss": 3.5283209228515626, | |
| "step": 188200 | |
| }, | |
| { | |
| "epoch": 1.953988398518165, | |
| "grad_norm": 18.314775466918945, | |
| "learning_rate": 0.00010460219784781096, | |
| "loss": 3.4730484008789064, | |
| "step": 188300 | |
| }, | |
| { | |
| "epoch": 1.9550260981456309, | |
| "grad_norm": 4.006529331207275, | |
| "learning_rate": 0.00010449842788506438, | |
| "loss": 3.4675115966796874, | |
| "step": 188400 | |
| }, | |
| { | |
| "epoch": 1.9560637977730966, | |
| "grad_norm": 4.9441094398498535, | |
| "learning_rate": 0.0001043946579223178, | |
| "loss": 3.404721984863281, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 1.9571014974005623, | |
| "grad_norm": 3.18265962600708, | |
| "learning_rate": 0.00010429088795957122, | |
| "loss": 3.667085876464844, | |
| "step": 188600 | |
| }, | |
| { | |
| "epoch": 1.9581391970280282, | |
| "grad_norm": 3.0164151191711426, | |
| "learning_rate": 0.00010418711799682463, | |
| "loss": 3.5224847412109375, | |
| "step": 188700 | |
| }, | |
| { | |
| "epoch": 1.959176896655494, | |
| "grad_norm": 5.3650007247924805, | |
| "learning_rate": 0.00010408334803407805, | |
| "loss": 3.4098544311523438, | |
| "step": 188800 | |
| }, | |
| { | |
| "epoch": 1.96021459628296, | |
| "grad_norm": 6.3775224685668945, | |
| "learning_rate": 0.00010397957807133146, | |
| "loss": 3.649906005859375, | |
| "step": 188900 | |
| }, | |
| { | |
| "epoch": 1.9612522959104257, | |
| "grad_norm": 18.32954978942871, | |
| "learning_rate": 0.00010387580810858487, | |
| "loss": 3.642203674316406, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 1.9622899955378916, | |
| "grad_norm": 3.267017126083374, | |
| "learning_rate": 0.0001037720381458383, | |
| "loss": 3.522268981933594, | |
| "step": 189100 | |
| }, | |
| { | |
| "epoch": 1.9633276951653573, | |
| "grad_norm": 3.3189854621887207, | |
| "learning_rate": 0.00010366826818309171, | |
| "loss": 3.525494384765625, | |
| "step": 189200 | |
| }, | |
| { | |
| "epoch": 1.9643653947928232, | |
| "grad_norm": 20.459917068481445, | |
| "learning_rate": 0.00010356449822034513, | |
| "loss": 3.4846673583984376, | |
| "step": 189300 | |
| }, | |
| { | |
| "epoch": 1.9654030944202892, | |
| "grad_norm": 10.600302696228027, | |
| "learning_rate": 0.00010346072825759854, | |
| "loss": 3.4710623168945314, | |
| "step": 189400 | |
| }, | |
| { | |
| "epoch": 1.966440794047755, | |
| "grad_norm": 5.836012363433838, | |
| "learning_rate": 0.00010335695829485196, | |
| "loss": 3.395472412109375, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 1.9674784936752208, | |
| "grad_norm": 1.8093000650405884, | |
| "learning_rate": 0.00010325318833210537, | |
| "loss": 3.4295391845703125, | |
| "step": 189600 | |
| }, | |
| { | |
| "epoch": 1.9685161933026865, | |
| "grad_norm": 3.580705165863037, | |
| "learning_rate": 0.0001031494183693588, | |
| "loss": 3.571369934082031, | |
| "step": 189700 | |
| }, | |
| { | |
| "epoch": 1.9695538929301524, | |
| "grad_norm": 4.870438575744629, | |
| "learning_rate": 0.00010304564840661222, | |
| "loss": 3.520045166015625, | |
| "step": 189800 | |
| }, | |
| { | |
| "epoch": 1.9705915925576183, | |
| "grad_norm": 3.781505823135376, | |
| "learning_rate": 0.00010294187844386564, | |
| "loss": 3.5424517822265624, | |
| "step": 189900 | |
| }, | |
| { | |
| "epoch": 1.9716292921850842, | |
| "grad_norm": 3.340085983276367, | |
| "learning_rate": 0.00010283810848111905, | |
| "loss": 3.518573913574219, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 1.97266699181255, | |
| "grad_norm": 5.02490234375, | |
| "learning_rate": 0.00010273433851837245, | |
| "loss": 3.3679263305664064, | |
| "step": 190100 | |
| }, | |
| { | |
| "epoch": 1.9737046914400158, | |
| "grad_norm": 4.117876052856445, | |
| "learning_rate": 0.00010263056855562588, | |
| "loss": 3.5929489135742188, | |
| "step": 190200 | |
| }, | |
| { | |
| "epoch": 1.9747423910674815, | |
| "grad_norm": 3.8365478515625, | |
| "learning_rate": 0.00010252679859287929, | |
| "loss": 3.40560302734375, | |
| "step": 190300 | |
| }, | |
| { | |
| "epoch": 1.9757800906949474, | |
| "grad_norm": 7.205904006958008, | |
| "learning_rate": 0.00010242302863013272, | |
| "loss": 3.38099609375, | |
| "step": 190400 | |
| }, | |
| { | |
| "epoch": 1.9768177903224133, | |
| "grad_norm": 2.767961025238037, | |
| "learning_rate": 0.00010231925866738613, | |
| "loss": 3.4381674194335936, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 1.9778554899498793, | |
| "grad_norm": 4.335025310516357, | |
| "learning_rate": 0.00010221548870463955, | |
| "loss": 3.3964199829101562, | |
| "step": 190600 | |
| }, | |
| { | |
| "epoch": 1.978893189577345, | |
| "grad_norm": 4.294001579284668, | |
| "learning_rate": 0.00010211171874189296, | |
| "loss": 3.411571350097656, | |
| "step": 190700 | |
| }, | |
| { | |
| "epoch": 1.9799308892048106, | |
| "grad_norm": 3.6443490982055664, | |
| "learning_rate": 0.00010200794877914638, | |
| "loss": 3.4534707641601563, | |
| "step": 190800 | |
| }, | |
| { | |
| "epoch": 1.9809685888322766, | |
| "grad_norm": 4.729245662689209, | |
| "learning_rate": 0.00010190417881639979, | |
| "loss": 3.577586669921875, | |
| "step": 190900 | |
| }, | |
| { | |
| "epoch": 1.9820062884597425, | |
| "grad_norm": 3.587510108947754, | |
| "learning_rate": 0.00010180040885365323, | |
| "loss": 3.4148577880859374, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 1.9830439880872084, | |
| "grad_norm": 13.635988235473633, | |
| "learning_rate": 0.00010169663889090663, | |
| "loss": 3.531971435546875, | |
| "step": 191100 | |
| }, | |
| { | |
| "epoch": 1.984081687714674, | |
| "grad_norm": 4.0034356117248535, | |
| "learning_rate": 0.00010159286892816004, | |
| "loss": 3.464627685546875, | |
| "step": 191200 | |
| }, | |
| { | |
| "epoch": 1.98511938734214, | |
| "grad_norm": 4.326283931732178, | |
| "learning_rate": 0.00010148909896541346, | |
| "loss": 3.4689093017578125, | |
| "step": 191300 | |
| }, | |
| { | |
| "epoch": 1.9861570869696057, | |
| "grad_norm": 10.159041404724121, | |
| "learning_rate": 0.00010138532900266687, | |
| "loss": 3.4093603515625, | |
| "step": 191400 | |
| }, | |
| { | |
| "epoch": 1.9871947865970716, | |
| "grad_norm": 6.295145511627197, | |
| "learning_rate": 0.0001012815590399203, | |
| "loss": 3.4013311767578127, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 1.9882324862245375, | |
| "grad_norm": 2.6228549480438232, | |
| "learning_rate": 0.0001011777890771737, | |
| "loss": 3.4039892578125, | |
| "step": 191600 | |
| }, | |
| { | |
| "epoch": 1.9892701858520034, | |
| "grad_norm": 2.0637784004211426, | |
| "learning_rate": 0.00010107401911442714, | |
| "loss": 3.4192919921875, | |
| "step": 191700 | |
| }, | |
| { | |
| "epoch": 1.9903078854794691, | |
| "grad_norm": 4.193583011627197, | |
| "learning_rate": 0.00010097024915168055, | |
| "loss": 3.5069757080078126, | |
| "step": 191800 | |
| }, | |
| { | |
| "epoch": 1.9913455851069348, | |
| "grad_norm": 3.6812117099761963, | |
| "learning_rate": 0.00010086647918893397, | |
| "loss": 3.421480712890625, | |
| "step": 191900 | |
| }, | |
| { | |
| "epoch": 1.9923832847344007, | |
| "grad_norm": 33.859195709228516, | |
| "learning_rate": 0.00010076270922618738, | |
| "loss": 3.506886291503906, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 1.9934209843618667, | |
| "grad_norm": 3.308947801589966, | |
| "learning_rate": 0.00010065893926344079, | |
| "loss": 3.424991455078125, | |
| "step": 192100 | |
| }, | |
| { | |
| "epoch": 1.9944586839893326, | |
| "grad_norm": 4.380412578582764, | |
| "learning_rate": 0.00010055516930069421, | |
| "loss": 3.4896340942382813, | |
| "step": 192200 | |
| }, | |
| { | |
| "epoch": 1.9954963836167983, | |
| "grad_norm": 3.492359161376953, | |
| "learning_rate": 0.00010045139933794763, | |
| "loss": 3.403392333984375, | |
| "step": 192300 | |
| }, | |
| { | |
| "epoch": 1.996534083244264, | |
| "grad_norm": 8.865891456604004, | |
| "learning_rate": 0.00010034762937520105, | |
| "loss": 3.60391845703125, | |
| "step": 192400 | |
| }, | |
| { | |
| "epoch": 1.9975717828717299, | |
| "grad_norm": 1.982731819152832, | |
| "learning_rate": 0.00010024385941245446, | |
| "loss": 3.5614895629882812, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 1.9986094824991958, | |
| "grad_norm": 2.9287161827087402, | |
| "learning_rate": 0.00010014008944970788, | |
| "loss": 3.5097760009765624, | |
| "step": 192600 | |
| }, | |
| { | |
| "epoch": 1.9996471821266617, | |
| "grad_norm": 1.8267062902450562, | |
| "learning_rate": 0.00010003631948696129, | |
| "loss": 3.4958160400390623, | |
| "step": 192700 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 289101, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.2645192822135194e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |