amirali1985 commited on
Commit
87364b8
·
verified ·
1 Parent(s): cc03d4c

Upload add_sub_baseline_100K_2L1H128d

Browse files
add_sub_baseline_100K_2L1H128d/metrics.json CHANGED
@@ -628,1258 +628,1258 @@
628
  31250
629
  ],
630
  "loss": [
631
- 11.977042198181152,
632
- 11.832605361938477,
633
- 11.639579772949219,
634
- 11.342317581176758,
635
- 11.0377197265625,
636
- 10.74262523651123,
637
- 10.538628578186035,
638
- 10.45229434967041,
639
- 10.25084400177002,
640
- 10.019124031066895,
641
- 9.906777381896973,
642
- 9.689866065979004,
643
- 9.564855575561523,
644
- 9.345112800598145,
645
- 9.12466049194336,
646
- 8.876622200012207,
647
- 8.629063606262207,
648
- 8.415751457214355,
649
- 8.068711280822754,
650
- 7.833225727081299,
651
- 7.505434513092041,
652
- 7.218222618103027,
653
- 6.988138675689697,
654
- 6.60437536239624,
655
- 6.359375,
656
- 6.010478496551514,
657
- 5.812808990478516,
658
- 5.430776119232178,
659
- 5.128222942352295,
660
- 4.943942546844482,
661
- 4.595494747161865,
662
- 4.427554607391357,
663
- 4.135222434997559,
664
- 3.901402235031128,
665
- 3.6997172832489014,
666
- 3.388075590133667,
667
- 3.211488962173462,
668
- 3.0833468437194824,
669
- 2.908508777618408,
670
- 2.7631280422210693,
671
- 2.642669200897217,
672
- 2.544445037841797,
673
- 2.4326510429382324,
674
- 2.304041624069214,
675
- 2.297609567642212,
676
- 2.19498872756958,
677
- 2.14454984664917,
678
- 2.190228223800659,
679
- 2.138949155807495,
680
- 2.071823835372925,
681
- 2.0647244453430176,
682
- 2.0801050662994385,
683
- 2.0899431705474854,
684
- 2.0253071784973145,
685
- 2.064008951187134,
686
- 1.992350459098816,
687
- 1.9674683809280396,
688
- 2.013637065887451,
689
- 1.933286428451538,
690
- 1.9325166940689087,
691
- 1.9623585939407349,
692
- 1.998954176902771,
693
- 1.9583021402359009,
694
- 1.964821457862854,
695
- 1.8752564191818237,
696
- 1.9217044115066528,
697
- 1.874695062637329,
698
- 1.8838536739349365,
699
- 1.9211722612380981,
700
- 1.9229551553726196,
701
- 1.8850759267807007,
702
- 1.7456798553466797,
703
- 1.7307690382003784,
704
- 1.8768775463104248,
705
- 1.8714839220046997,
706
- 1.8702248334884644,
707
- 1.7888954877853394,
708
- 1.837731957435608,
709
- 1.8072580099105835,
710
- 1.7668285369873047,
711
- 1.7479711771011353,
712
- 1.6895802021026611,
713
- 1.749003291130066,
714
- 1.740459680557251,
715
- 1.6661880016326904,
716
- 1.6528176069259644,
717
- 1.7080800533294678,
718
- 1.6159528493881226,
719
- 1.6114765405654907,
720
- 1.6065454483032227,
721
- 1.6221038103103638,
722
- 1.5639398097991943,
723
- 1.507914662361145,
724
- 1.5028311014175415,
725
- 1.508623719215393,
726
- 1.4017530679702759,
727
- 1.3961706161499023,
728
- 1.3411399126052856,
729
- 1.3201547861099243,
730
- 1.3360451459884644,
731
- 1.2849161624908447,
732
- 1.2160416841506958,
733
- 1.1782668828964233,
734
- 1.2449954748153687,
735
- 1.222753882408142,
736
- 1.16804039478302,
737
- 1.1446696519851685,
738
- 1.1211735010147095,
739
- 1.1117109060287476,
740
- 1.1207637786865234,
741
- 1.0753263235092163,
742
- 1.1095165014266968,
743
- 1.0497922897338867,
744
- 1.0258709192276,
745
- 1.0586824417114258,
746
- 1.0505311489105225,
747
- 1.0024679899215698,
748
- 0.9912530183792114,
749
- 0.9677543044090271,
750
- 0.9638428092002869,
751
- 0.941684901714325,
752
- 0.8960302472114563,
753
- 0.9070016145706177,
754
- 0.8673309683799744,
755
- 0.837029755115509,
756
- 0.8565458059310913,
757
- 0.8187461495399475,
758
- 0.8760162591934204,
759
- 0.811039924621582,
760
- 0.8174161314964294,
761
- 0.8083099722862244,
762
- 0.7729467749595642,
763
- 0.7676922678947449,
764
- 0.7842200994491577,
765
- 0.7361089587211609,
766
- 0.7701181173324585,
767
- 0.7662209272384644,
768
- 0.7574747800827026,
769
- 0.7157047986984253,
770
- 0.7457594275474548,
771
- 0.7155031561851501,
772
- 0.7134730219841003,
773
- 0.6851155161857605,
774
- 0.6558194160461426,
775
- 0.6535345911979675,
776
- 0.6496022343635559,
777
- 0.6414950489997864,
778
- 0.6602500081062317,
779
- 0.6322754621505737,
780
- 0.6254755854606628,
781
- 0.6046976447105408,
782
- 0.5830899477005005,
783
- 0.5934770703315735,
784
- 0.625095784664154,
785
- 0.6127793192863464,
786
- 0.6231734156608582,
787
- 0.5763152837753296,
788
- 0.6352439522743225,
789
- 0.6301745176315308,
790
- 0.6138210892677307,
791
- 0.5850849151611328,
792
- 0.5591212511062622,
793
- 0.5491364598274231,
794
- 0.5610260367393494,
795
- 0.5658707618713379,
796
- 0.5537570118904114,
797
- 0.5227963328361511,
798
- 0.5337905287742615,
799
- 0.571171760559082,
800
- 0.4919911026954651,
801
- 0.5623064041137695,
802
- 0.49735212326049805,
803
- 0.5251079797744751,
804
- 0.49005427956581116,
805
- 0.4920663833618164,
806
- 0.5062692761421204,
807
- 0.49886447191238403,
808
- 0.4979371130466461,
809
- 0.4994959235191345,
810
- 0.4708382785320282,
811
- 0.4461270272731781,
812
- 0.48827752470970154,
813
- 0.48627349734306335,
814
- 0.4709095358848572,
815
- 0.4374123215675354,
816
- 0.4551429748535156,
817
- 0.4273804724216461,
818
- 0.4615263044834137,
819
- 0.46365591883659363,
820
- 0.45876625180244446,
821
- 0.43030303716659546,
822
- 0.4557846486568451,
823
- 0.3991808295249939,
824
- 0.4235609173774719,
825
- 0.43494540452957153,
826
- 0.3913825452327728,
827
- 0.41650471091270447,
828
- 0.4323137104511261,
829
- 0.4159674048423767,
830
- 0.4032060205936432,
831
- 0.4136103093624115,
832
- 0.3848271369934082,
833
- 0.4130419194698334,
834
- 0.3380722403526306,
835
- 0.4253701865673065,
836
- 0.36251211166381836,
837
- 0.3768291473388672,
838
- 0.42368084192276,
839
- 0.35189884901046753,
840
- 0.41239291429519653,
841
- 0.35483068227767944,
842
- 0.3896591365337372,
843
- 0.342414915561676,
844
- 0.35113129019737244,
845
- 0.3747749626636505,
846
- 0.39180782437324524,
847
- 0.36123037338256836,
848
- 0.3841114044189453,
849
- 0.35293588042259216,
850
- 0.36013272404670715,
851
- 0.3717457354068756,
852
- 0.3420697748661041,
853
- 0.3644498288631439,
854
- 0.38519591093063354,
855
- 0.3384056091308594,
856
- 0.3558005690574646,
857
- 0.3262862265110016,
858
- 0.35307401418685913,
859
- 0.34869739413261414,
860
- 0.3247682750225067,
861
- 0.3389168679714203,
862
- 0.31367042660713196,
863
- 0.32246801257133484,
864
- 0.3325541019439697,
865
- 0.3126663863658905,
866
- 0.307912141084671,
867
- 0.3146059513092041,
868
- 0.29749685525894165,
869
- 0.29092270135879517,
870
- 0.29880958795547485,
871
- 0.3028065860271454,
872
- 0.31483981013298035,
873
- 0.30662885308265686,
874
- 0.3288079798221588,
875
- 0.2825016975402832,
876
- 0.27453750371932983,
877
- 0.2763153612613678,
878
- 0.31982606649398804,
879
- 0.3169565200805664,
880
- 0.28875669836997986,
881
- 0.2774421274662018,
882
- 0.27820807695388794,
883
- 0.2744719684123993,
884
- 0.26809149980545044,
885
- 0.2914562225341797,
886
- 0.28146621584892273,
887
- 0.2956283986568451,
888
- 0.27729472517967224,
889
- 0.26264533400535583,
890
- 0.2586286962032318,
891
- 0.3196188509464264,
892
- 0.2641201615333557,
893
- 0.2947527766227722,
894
- 0.22202637791633606,
895
- 0.26569533348083496,
896
- 0.2558952271938324,
897
- 0.2774972915649414,
898
- 0.28140655159950256,
899
- 0.2784387171268463,
900
- 0.2750130593776703,
901
- 0.2585727274417877,
902
- 0.26905277371406555,
903
- 0.26080891489982605,
904
- 0.2769533395767212,
905
- 0.253186970949173,
906
- 0.27682092785835266,
907
- 0.24935530126094818,
908
- 0.26743823289871216,
909
- 0.2523672878742218,
910
- 0.24207758903503418,
911
- 0.24797692894935608,
912
- 0.22850987315177917,
913
- 0.24364294111728668,
914
- 0.23140747845172882,
915
- 0.22725312411785126,
916
- 0.27189895510673523,
917
- 0.2742701470851898,
918
- 0.24831081926822662,
919
- 0.24600134789943695,
920
- 0.24502108991146088,
921
- 0.2064800262451172,
922
- 0.21140344440937042,
923
- 0.2310919463634491,
924
- 0.2578493654727936,
925
- 0.2671879529953003,
926
- 0.2098991721868515,
927
- 0.21068748831748962,
928
- 0.25403282046318054,
929
- 0.2670443654060364,
930
- 0.19045837223529816,
931
- 0.23210535943508148,
932
- 0.2465614229440689,
933
- 0.20154492557048798,
934
- 0.21706925332546234,
935
- 0.22015045583248138,
936
- 0.21647998690605164,
937
- 0.22915495932102203,
938
- 0.2205311805009842,
939
- 0.1925881952047348,
940
- 0.24692997336387634,
941
- 0.25309181213378906,
942
- 0.21870723366737366,
943
- 0.245932474732399,
944
- 0.2309727668762207,
945
- 0.19083799421787262,
946
- 0.22509662806987762,
947
- 0.22570917010307312,
948
- 0.25146031379699707,
949
- 0.1879393607378006,
950
- 0.17903225123882294,
951
- 0.230800598859787,
952
- 0.19553127884864807,
953
- 0.20370914041996002,
954
- 0.25358861684799194,
955
- 0.22613056004047394,
956
- 0.2019713670015335,
957
- 0.2268919199705124,
958
- 0.24874211847782135,
959
- 0.20951731503009796,
960
- 0.2107919603586197,
961
- 0.1959504336118698,
962
- 0.21553952991962433,
963
- 0.21554414927959442,
964
- 0.23541958630084991,
965
- 0.18030405044555664,
966
- 0.20203988254070282,
967
- 0.17926515638828278,
968
- 0.263072669506073,
969
- 0.2108616977930069,
970
- 0.14598241448402405,
971
- 0.20030143857002258,
972
- 0.22456932067871094,
973
- 0.2146858125925064,
974
- 0.20507298409938812,
975
- 0.19385136663913727,
976
- 0.1979534924030304,
977
- 0.20209898054599762,
978
- 0.20098797976970673,
979
- 0.1956910341978073,
980
- 0.19167955219745636,
981
- 0.21638809144496918,
982
- 0.1940392702817917,
983
- 0.18071554601192474,
984
- 0.23301713168621063,
985
- 0.18571732938289642,
986
- 0.20041833817958832,
987
- 0.19002893567085266,
988
- 0.23817777633666992,
989
- 0.16994866728782654,
990
- 0.1998552531003952,
991
- 0.18940328061580658,
992
- 0.1803516000509262,
993
- 0.17863358557224274,
994
- 0.19638517498970032,
995
- 0.2153729498386383,
996
- 0.16207505762577057,
997
- 0.16593241691589355,
998
- 0.1831996887922287,
999
- 0.2158493995666504,
1000
- 0.20669953525066376,
1001
- 0.16301994025707245,
1002
- 0.19910219311714172,
1003
- 0.19990552961826324,
1004
- 0.18015281856060028,
1005
- 0.13783733546733856,
1006
- 0.18134383857250214,
1007
- 0.16741850972175598,
1008
- 0.177109494805336,
1009
- 0.2050514966249466,
1010
- 0.1758144348859787,
1011
- 0.19073748588562012,
1012
- 0.1639399379491806,
1013
- 0.1469561606645584,
1014
- 0.20477049052715302,
1015
- 0.1598491668701172,
1016
- 0.17077581584453583,
1017
- 0.17846567928791046,
1018
- 0.2001478374004364,
1019
- 0.17690904438495636,
1020
- 0.16815437376499176,
1021
- 0.1419142186641693,
1022
- 0.19135485589504242,
1023
- 0.14557300508022308,
1024
- 0.1611776202917099,
1025
- 0.19547899067401886,
1026
- 0.19376233220100403,
1027
- 0.19313618540763855,
1028
- 0.17107929289340973,
1029
- 0.1763370782136917,
1030
- 0.19234664738178253,
1031
- 0.1792294681072235,
1032
- 0.22957375645637512,
1033
- 0.1926773637533188,
1034
- 0.17082278430461884,
1035
- 0.15852557122707367,
1036
- 0.16429941356182098,
1037
- 0.161500483751297,
1038
- 0.17774653434753418,
1039
- 0.16790100932121277,
1040
- 0.15072883665561676,
1041
- 0.1515331268310547,
1042
- 0.15604127943515778,
1043
- 0.16774308681488037,
1044
- 0.17304901778697968,
1045
- 0.16783729195594788,
1046
- 0.1633901447057724,
1047
- 0.15018674731254578,
1048
- 0.1959364116191864,
1049
- 0.15900716185569763,
1050
- 0.17716506123542786,
1051
- 0.18148012459278107,
1052
- 0.16957084834575653,
1053
- 0.17450162768363953,
1054
- 0.14624397456645966,
1055
- 0.19237984716892242,
1056
- 0.14564093947410583,
1057
- 0.1572442352771759,
1058
- 0.1769915670156479,
1059
- 0.18431629240512848,
1060
- 0.18862608075141907,
1061
- 0.1972048431634903,
1062
- 0.16945073008537292,
1063
- 0.17277629673480988,
1064
- 0.16611048579216003,
1065
- 0.17368483543395996,
1066
- 0.15440277755260468,
1067
- 0.18911337852478027,
1068
- 0.17402216792106628,
1069
- 0.19015789031982422,
1070
- 0.16805972158908844,
1071
- 0.15407218039035797,
1072
- 0.14304251968860626,
1073
- 0.18865016102790833,
1074
- 0.16110943257808685,
1075
- 0.2045971304178238,
1076
- 0.1502329260110855,
1077
- 0.12428479641675949,
1078
- 0.16494467854499817,
1079
- 0.12184248119592667,
1080
- 0.1607920080423355,
1081
- 0.1637299805879593,
1082
- 0.1727958768606186,
1083
- 0.168440580368042,
1084
- 0.1445026397705078,
1085
- 0.15350453555583954,
1086
- 0.14372386038303375,
1087
- 0.16206717491149902,
1088
- 0.15405137836933136,
1089
- 0.15814034640789032,
1090
- 0.14433301985263824,
1091
- 0.14300145208835602,
1092
- 0.17697784304618835,
1093
- 0.18625423312187195,
1094
- 0.16295507550239563,
1095
- 0.12883833050727844,
1096
- 0.15285921096801758,
1097
- 0.15010376274585724,
1098
- 0.1580805778503418,
1099
- 0.14755387604236603,
1100
- 0.19289889931678772,
1101
- 0.12691619992256165,
1102
- 0.14621464908123016,
1103
- 0.11558129638433456,
1104
- 0.1655631810426712,
1105
- 0.17105735838413239,
1106
- 0.17908822000026703,
1107
- 0.17774316668510437,
1108
- 0.16395303606987,
1109
- 0.1435573399066925,
1110
- 0.12409909814596176,
1111
- 0.16253414750099182,
1112
- 0.1573280543088913,
1113
- 0.14960770308971405,
1114
- 0.14318087697029114,
1115
- 0.12494007498025894,
1116
- 0.13559222221374512,
1117
- 0.18777742981910706,
1118
- 0.12923327088356018,
1119
- 0.16082116961479187,
1120
- 0.15188278257846832,
1121
- 0.13781002163887024,
1122
- 0.13092568516731262,
1123
- 0.17344479262828827,
1124
- 0.14992226660251617,
1125
- 0.11788155138492584,
1126
- 0.17064552009105682,
1127
- 0.1436513215303421,
1128
- 0.1744157075881958,
1129
- 0.1471249759197235,
1130
- 0.1319369375705719,
1131
- 0.12590177357196808,
1132
- 0.14819200336933136,
1133
- 0.1566087156534195,
1134
- 0.1258556842803955,
1135
- 0.1425287425518036,
1136
- 0.14611411094665527,
1137
- 0.12872281670570374,
1138
- 0.1713401973247528,
1139
- 0.1419590562582016,
1140
- 0.16423705220222473,
1141
- 0.13404837250709534,
1142
- 0.12364423274993896,
1143
- 0.13353344798088074,
1144
- 0.16752830147743225,
1145
- 0.17974546551704407,
1146
- 0.133741095662117,
1147
- 0.14522182941436768,
1148
- 0.14375519752502441,
1149
- 0.16996373236179352,
1150
- 0.12796184420585632,
1151
- 0.19787277281284332,
1152
- 0.14406491816043854,
1153
- 0.1600307673215866,
1154
- 0.14013724029064178,
1155
- 0.1414320021867752,
1156
- 0.13259896636009216,
1157
- 0.16883596777915955,
1158
- 0.15063153207302094,
1159
- 0.16968485713005066,
1160
- 0.1448565423488617,
1161
- 0.13374046981334686,
1162
- 0.14191940426826477,
1163
- 0.16690602898597717,
1164
- 0.16557033360004425,
1165
- 0.1341642588376999,
1166
- 0.14818671345710754,
1167
- 0.15339885652065277,
1168
- 0.1358605921268463,
1169
- 0.11332222074270248,
1170
- 0.13079999387264252,
1171
- 0.11911796033382416,
1172
- 0.13282403349876404,
1173
- 0.1295071691274643,
1174
- 0.14931797981262207,
1175
- 0.1497408151626587,
1176
- 0.1576954424381256,
1177
- 0.1566249281167984,
1178
- 0.14676371216773987,
1179
- 0.14718690514564514,
1180
- 0.14807923138141632,
1181
- 0.1212613433599472,
1182
- 0.16526338458061218,
1183
- 0.12577828764915466,
1184
- 0.14465472102165222,
1185
- 0.1332075595855713,
1186
- 0.14287416636943817,
1187
- 0.14098156988620758,
1188
- 0.12206381559371948,
1189
- 0.13529722392559052,
1190
- 0.14950823783874512,
1191
- 0.12731051445007324,
1192
- 0.1640467643737793,
1193
- 0.15214170515537262,
1194
- 0.14054253697395325,
1195
- 0.13051234185695648,
1196
- 0.12513872981071472,
1197
- 0.1256050020456314,
1198
- 0.13018260896205902,
1199
- 0.12952646613121033,
1200
- 0.15313276648521423,
1201
- 0.15212035179138184,
1202
- 0.1590813547372818,
1203
- 0.1418110430240631,
1204
- 0.14174507558345795,
1205
- 0.1443943977355957,
1206
- 0.13427415490150452,
1207
- 0.14180710911750793,
1208
- 0.1526232659816742,
1209
- 0.14491091668605804,
1210
- 0.1218637228012085,
1211
- 0.15213581919670105,
1212
- 0.12060185521841049,
1213
- 0.12314708530902863,
1214
- 0.135157972574234,
1215
- 0.14702904224395752,
1216
- 0.1435243785381317,
1217
- 0.14389681816101074,
1218
- 0.12983374297618866,
1219
- 0.1256387084722519,
1220
- 0.13456468284130096,
1221
- 0.11090267449617386,
1222
- 0.14436398446559906,
1223
- 0.15340672433376312,
1224
- 0.13106809556484222,
1225
- 0.13008198142051697,
1226
- 0.12773239612579346,
1227
- 0.12845288217067719,
1228
- 0.12067259848117828,
1229
- 0.12159726768732071,
1230
- 0.12335409969091415,
1231
- 0.14989832043647766,
1232
- 0.18622605502605438,
1233
- 0.14446382224559784,
1234
- 0.13140790164470673,
1235
- 0.14309929311275482,
1236
- 0.1644481122493744,
1237
- 0.13612475991249084,
1238
- 0.13592186570167542,
1239
- 0.1400979459285736,
1240
- 0.14549663662910461,
1241
- 0.14845392107963562,
1242
- 0.16581669449806213,
1243
- 0.1395435482263565,
1244
- 0.12557461857795715,
1245
- 0.13034124672412872,
1246
- 0.11364679783582687,
1247
- 0.1468597948551178,
1248
- 0.12835600972175598,
1249
- 0.1345360428094864,
1250
- 0.16890299320220947,
1251
- 0.1177949383854866,
1252
- 0.1442183256149292,
1253
- 0.13947038352489471,
1254
- 0.13237617909908295,
1255
- 0.12772080302238464
1256
  ],
1257
  "base_loss": [
1258
- 11.977042198181152,
1259
- 11.832605361938477,
1260
- 11.639579772949219,
1261
- 11.342317581176758,
1262
- 11.0377197265625,
1263
- 10.74262523651123,
1264
- 10.538628578186035,
1265
- 10.45229434967041,
1266
- 10.25084400177002,
1267
- 10.019124031066895,
1268
- 9.906777381896973,
1269
- 9.689866065979004,
1270
- 9.564855575561523,
1271
- 9.345112800598145,
1272
- 9.12466049194336,
1273
- 8.876622200012207,
1274
- 8.629063606262207,
1275
- 8.415751457214355,
1276
- 8.068711280822754,
1277
- 7.833225727081299,
1278
- 7.505434513092041,
1279
- 7.218222618103027,
1280
- 6.988138675689697,
1281
- 6.60437536239624,
1282
- 6.359375,
1283
- 6.010478496551514,
1284
- 5.812808990478516,
1285
- 5.430776119232178,
1286
- 5.128222942352295,
1287
- 4.943942546844482,
1288
- 4.595494747161865,
1289
- 4.427554607391357,
1290
- 4.135222434997559,
1291
- 3.901402235031128,
1292
- 3.6997172832489014,
1293
- 3.388075590133667,
1294
- 3.211488962173462,
1295
- 3.0833468437194824,
1296
- 2.908508777618408,
1297
- 2.7631280422210693,
1298
- 2.642669200897217,
1299
- 2.544445037841797,
1300
- 2.4326510429382324,
1301
- 2.304041624069214,
1302
- 2.297609567642212,
1303
- 2.19498872756958,
1304
- 2.14454984664917,
1305
- 2.190228223800659,
1306
- 2.138949155807495,
1307
- 2.071823835372925,
1308
- 2.0647244453430176,
1309
- 2.0801050662994385,
1310
- 2.0899431705474854,
1311
- 2.0253071784973145,
1312
- 2.064008951187134,
1313
- 1.992350459098816,
1314
- 1.9674683809280396,
1315
- 2.013637065887451,
1316
- 1.933286428451538,
1317
- 1.9325166940689087,
1318
- 1.9623585939407349,
1319
- 1.998954176902771,
1320
- 1.9583021402359009,
1321
- 1.964821457862854,
1322
- 1.8752564191818237,
1323
- 1.9217044115066528,
1324
- 1.874695062637329,
1325
- 1.8838536739349365,
1326
- 1.9211722612380981,
1327
- 1.9229551553726196,
1328
- 1.8850759267807007,
1329
- 1.7456798553466797,
1330
- 1.7307690382003784,
1331
- 1.8768775463104248,
1332
- 1.8714839220046997,
1333
- 1.8702248334884644,
1334
- 1.7888954877853394,
1335
- 1.837731957435608,
1336
- 1.8072580099105835,
1337
- 1.7668285369873047,
1338
- 1.7479711771011353,
1339
- 1.6895802021026611,
1340
- 1.749003291130066,
1341
- 1.740459680557251,
1342
- 1.6661880016326904,
1343
- 1.6528176069259644,
1344
- 1.7080800533294678,
1345
- 1.6159528493881226,
1346
- 1.6114765405654907,
1347
- 1.6065454483032227,
1348
- 1.6221038103103638,
1349
- 1.5639398097991943,
1350
- 1.507914662361145,
1351
- 1.5028311014175415,
1352
- 1.508623719215393,
1353
- 1.4017530679702759,
1354
- 1.3961706161499023,
1355
- 1.3411399126052856,
1356
- 1.3201547861099243,
1357
- 1.3360451459884644,
1358
- 1.2849161624908447,
1359
- 1.2160416841506958,
1360
- 1.1782668828964233,
1361
- 1.2449954748153687,
1362
- 1.222753882408142,
1363
- 1.16804039478302,
1364
- 1.1446696519851685,
1365
- 1.1211735010147095,
1366
- 1.1117109060287476,
1367
- 1.1207637786865234,
1368
- 1.0753263235092163,
1369
- 1.1095165014266968,
1370
- 1.0497922897338867,
1371
- 1.0258709192276,
1372
- 1.0586824417114258,
1373
- 1.0505311489105225,
1374
- 1.0024679899215698,
1375
- 0.9912530183792114,
1376
- 0.9677543044090271,
1377
- 0.9638428092002869,
1378
- 0.941684901714325,
1379
- 0.8960302472114563,
1380
- 0.9070016145706177,
1381
- 0.8673309683799744,
1382
- 0.837029755115509,
1383
- 0.8565458059310913,
1384
- 0.8187461495399475,
1385
- 0.8760162591934204,
1386
- 0.811039924621582,
1387
- 0.8174161314964294,
1388
- 0.8083099722862244,
1389
- 0.7729467749595642,
1390
- 0.7676922678947449,
1391
- 0.7842200994491577,
1392
- 0.7361089587211609,
1393
- 0.7701181173324585,
1394
- 0.7662209272384644,
1395
- 0.7574747800827026,
1396
- 0.7157047986984253,
1397
- 0.7457594275474548,
1398
- 0.7155031561851501,
1399
- 0.7134730219841003,
1400
- 0.6851155161857605,
1401
- 0.6558194160461426,
1402
- 0.6535345911979675,
1403
- 0.6496022343635559,
1404
- 0.6414950489997864,
1405
- 0.6602500081062317,
1406
- 0.6322754621505737,
1407
- 0.6254755854606628,
1408
- 0.6046976447105408,
1409
- 0.5830899477005005,
1410
- 0.5934770703315735,
1411
- 0.625095784664154,
1412
- 0.6127793192863464,
1413
- 0.6231734156608582,
1414
- 0.5763152837753296,
1415
- 0.6352439522743225,
1416
- 0.6301745176315308,
1417
- 0.6138210892677307,
1418
- 0.5850849151611328,
1419
- 0.5591212511062622,
1420
- 0.5491364598274231,
1421
- 0.5610260367393494,
1422
- 0.5658707618713379,
1423
- 0.5537570118904114,
1424
- 0.5227963328361511,
1425
- 0.5337905287742615,
1426
- 0.571171760559082,
1427
- 0.4919911026954651,
1428
- 0.5623064041137695,
1429
- 0.49735212326049805,
1430
- 0.5251079797744751,
1431
- 0.49005427956581116,
1432
- 0.4920663833618164,
1433
- 0.5062692761421204,
1434
- 0.49886447191238403,
1435
- 0.4979371130466461,
1436
- 0.4994959235191345,
1437
- 0.4708382785320282,
1438
- 0.4461270272731781,
1439
- 0.48827752470970154,
1440
- 0.48627349734306335,
1441
- 0.4709095358848572,
1442
- 0.4374123215675354,
1443
- 0.4551429748535156,
1444
- 0.4273804724216461,
1445
- 0.4615263044834137,
1446
- 0.46365591883659363,
1447
- 0.45876625180244446,
1448
- 0.43030303716659546,
1449
- 0.4557846486568451,
1450
- 0.3991808295249939,
1451
- 0.4235609173774719,
1452
- 0.43494540452957153,
1453
- 0.3913825452327728,
1454
- 0.41650471091270447,
1455
- 0.4323137104511261,
1456
- 0.4159674048423767,
1457
- 0.4032060205936432,
1458
- 0.4136103093624115,
1459
- 0.3848271369934082,
1460
- 0.4130419194698334,
1461
- 0.3380722403526306,
1462
- 0.4253701865673065,
1463
- 0.36251211166381836,
1464
- 0.3768291473388672,
1465
- 0.42368084192276,
1466
- 0.35189884901046753,
1467
- 0.41239291429519653,
1468
- 0.35483068227767944,
1469
- 0.3896591365337372,
1470
- 0.342414915561676,
1471
- 0.35113129019737244,
1472
- 0.3747749626636505,
1473
- 0.39180782437324524,
1474
- 0.36123037338256836,
1475
- 0.3841114044189453,
1476
- 0.35293588042259216,
1477
- 0.36013272404670715,
1478
- 0.3717457354068756,
1479
- 0.3420697748661041,
1480
- 0.3644498288631439,
1481
- 0.38519591093063354,
1482
- 0.3384056091308594,
1483
- 0.3558005690574646,
1484
- 0.3262862265110016,
1485
- 0.35307401418685913,
1486
- 0.34869739413261414,
1487
- 0.3247682750225067,
1488
- 0.3389168679714203,
1489
- 0.31367042660713196,
1490
- 0.32246801257133484,
1491
- 0.3325541019439697,
1492
- 0.3126663863658905,
1493
- 0.307912141084671,
1494
- 0.3146059513092041,
1495
- 0.29749685525894165,
1496
- 0.29092270135879517,
1497
- 0.29880958795547485,
1498
- 0.3028065860271454,
1499
- 0.31483981013298035,
1500
- 0.30662885308265686,
1501
- 0.3288079798221588,
1502
- 0.2825016975402832,
1503
- 0.27453750371932983,
1504
- 0.2763153612613678,
1505
- 0.31982606649398804,
1506
- 0.3169565200805664,
1507
- 0.28875669836997986,
1508
- 0.2774421274662018,
1509
- 0.27820807695388794,
1510
- 0.2744719684123993,
1511
- 0.26809149980545044,
1512
- 0.2914562225341797,
1513
- 0.28146621584892273,
1514
- 0.2956283986568451,
1515
- 0.27729472517967224,
1516
- 0.26264533400535583,
1517
- 0.2586286962032318,
1518
- 0.3196188509464264,
1519
- 0.2641201615333557,
1520
- 0.2947527766227722,
1521
- 0.22202637791633606,
1522
- 0.26569533348083496,
1523
- 0.2558952271938324,
1524
- 0.2774972915649414,
1525
- 0.28140655159950256,
1526
- 0.2784387171268463,
1527
- 0.2750130593776703,
1528
- 0.2585727274417877,
1529
- 0.26905277371406555,
1530
- 0.26080891489982605,
1531
- 0.2769533395767212,
1532
- 0.253186970949173,
1533
- 0.27682092785835266,
1534
- 0.24935530126094818,
1535
- 0.26743823289871216,
1536
- 0.2523672878742218,
1537
- 0.24207758903503418,
1538
- 0.24797692894935608,
1539
- 0.22850987315177917,
1540
- 0.24364294111728668,
1541
- 0.23140747845172882,
1542
- 0.22725312411785126,
1543
- 0.27189895510673523,
1544
- 0.2742701470851898,
1545
- 0.24831081926822662,
1546
- 0.24600134789943695,
1547
- 0.24502108991146088,
1548
- 0.2064800262451172,
1549
- 0.21140344440937042,
1550
- 0.2310919463634491,
1551
- 0.2578493654727936,
1552
- 0.2671879529953003,
1553
- 0.2098991721868515,
1554
- 0.21068748831748962,
1555
- 0.25403282046318054,
1556
- 0.2670443654060364,
1557
- 0.19045837223529816,
1558
- 0.23210535943508148,
1559
- 0.2465614229440689,
1560
- 0.20154492557048798,
1561
- 0.21706925332546234,
1562
- 0.22015045583248138,
1563
- 0.21647998690605164,
1564
- 0.22915495932102203,
1565
- 0.2205311805009842,
1566
- 0.1925881952047348,
1567
- 0.24692997336387634,
1568
- 0.25309181213378906,
1569
- 0.21870723366737366,
1570
- 0.245932474732399,
1571
- 0.2309727668762207,
1572
- 0.19083799421787262,
1573
- 0.22509662806987762,
1574
- 0.22570917010307312,
1575
- 0.25146031379699707,
1576
- 0.1879393607378006,
1577
- 0.17903225123882294,
1578
- 0.230800598859787,
1579
- 0.19553127884864807,
1580
- 0.20370914041996002,
1581
- 0.25358861684799194,
1582
- 0.22613056004047394,
1583
- 0.2019713670015335,
1584
- 0.2268919199705124,
1585
- 0.24874211847782135,
1586
- 0.20951731503009796,
1587
- 0.2107919603586197,
1588
- 0.1959504336118698,
1589
- 0.21553952991962433,
1590
- 0.21554414927959442,
1591
- 0.23541958630084991,
1592
- 0.18030405044555664,
1593
- 0.20203988254070282,
1594
- 0.17926515638828278,
1595
- 0.263072669506073,
1596
- 0.2108616977930069,
1597
- 0.14598241448402405,
1598
- 0.20030143857002258,
1599
- 0.22456932067871094,
1600
- 0.2146858125925064,
1601
- 0.20507298409938812,
1602
- 0.19385136663913727,
1603
- 0.1979534924030304,
1604
- 0.20209898054599762,
1605
- 0.20098797976970673,
1606
- 0.1956910341978073,
1607
- 0.19167955219745636,
1608
- 0.21638809144496918,
1609
- 0.1940392702817917,
1610
- 0.18071554601192474,
1611
- 0.23301713168621063,
1612
- 0.18571732938289642,
1613
- 0.20041833817958832,
1614
- 0.19002893567085266,
1615
- 0.23817777633666992,
1616
- 0.16994866728782654,
1617
- 0.1998552531003952,
1618
- 0.18940328061580658,
1619
- 0.1803516000509262,
1620
- 0.17863358557224274,
1621
- 0.19638517498970032,
1622
- 0.2153729498386383,
1623
- 0.16207505762577057,
1624
- 0.16593241691589355,
1625
- 0.1831996887922287,
1626
- 0.2158493995666504,
1627
- 0.20669953525066376,
1628
- 0.16301994025707245,
1629
- 0.19910219311714172,
1630
- 0.19990552961826324,
1631
- 0.18015281856060028,
1632
- 0.13783733546733856,
1633
- 0.18134383857250214,
1634
- 0.16741850972175598,
1635
- 0.177109494805336,
1636
- 0.2050514966249466,
1637
- 0.1758144348859787,
1638
- 0.19073748588562012,
1639
- 0.1639399379491806,
1640
- 0.1469561606645584,
1641
- 0.20477049052715302,
1642
- 0.1598491668701172,
1643
- 0.17077581584453583,
1644
- 0.17846567928791046,
1645
- 0.2001478374004364,
1646
- 0.17690904438495636,
1647
- 0.16815437376499176,
1648
- 0.1419142186641693,
1649
- 0.19135485589504242,
1650
- 0.14557300508022308,
1651
- 0.1611776202917099,
1652
- 0.19547899067401886,
1653
- 0.19376233220100403,
1654
- 0.19313618540763855,
1655
- 0.17107929289340973,
1656
- 0.1763370782136917,
1657
- 0.19234664738178253,
1658
- 0.1792294681072235,
1659
- 0.22957375645637512,
1660
- 0.1926773637533188,
1661
- 0.17082278430461884,
1662
- 0.15852557122707367,
1663
- 0.16429941356182098,
1664
- 0.161500483751297,
1665
- 0.17774653434753418,
1666
- 0.16790100932121277,
1667
- 0.15072883665561676,
1668
- 0.1515331268310547,
1669
- 0.15604127943515778,
1670
- 0.16774308681488037,
1671
- 0.17304901778697968,
1672
- 0.16783729195594788,
1673
- 0.1633901447057724,
1674
- 0.15018674731254578,
1675
- 0.1959364116191864,
1676
- 0.15900716185569763,
1677
- 0.17716506123542786,
1678
- 0.18148012459278107,
1679
- 0.16957084834575653,
1680
- 0.17450162768363953,
1681
- 0.14624397456645966,
1682
- 0.19237984716892242,
1683
- 0.14564093947410583,
1684
- 0.1572442352771759,
1685
- 0.1769915670156479,
1686
- 0.18431629240512848,
1687
- 0.18862608075141907,
1688
- 0.1972048431634903,
1689
- 0.16945073008537292,
1690
- 0.17277629673480988,
1691
- 0.16611048579216003,
1692
- 0.17368483543395996,
1693
- 0.15440277755260468,
1694
- 0.18911337852478027,
1695
- 0.17402216792106628,
1696
- 0.19015789031982422,
1697
- 0.16805972158908844,
1698
- 0.15407218039035797,
1699
- 0.14304251968860626,
1700
- 0.18865016102790833,
1701
- 0.16110943257808685,
1702
- 0.2045971304178238,
1703
- 0.1502329260110855,
1704
- 0.12428479641675949,
1705
- 0.16494467854499817,
1706
- 0.12184248119592667,
1707
- 0.1607920080423355,
1708
- 0.1637299805879593,
1709
- 0.1727958768606186,
1710
- 0.168440580368042,
1711
- 0.1445026397705078,
1712
- 0.15350453555583954,
1713
- 0.14372386038303375,
1714
- 0.16206717491149902,
1715
- 0.15405137836933136,
1716
- 0.15814034640789032,
1717
- 0.14433301985263824,
1718
- 0.14300145208835602,
1719
- 0.17697784304618835,
1720
- 0.18625423312187195,
1721
- 0.16295507550239563,
1722
- 0.12883833050727844,
1723
- 0.15285921096801758,
1724
- 0.15010376274585724,
1725
- 0.1580805778503418,
1726
- 0.14755387604236603,
1727
- 0.19289889931678772,
1728
- 0.12691619992256165,
1729
- 0.14621464908123016,
1730
- 0.11558129638433456,
1731
- 0.1655631810426712,
1732
- 0.17105735838413239,
1733
- 0.17908822000026703,
1734
- 0.17774316668510437,
1735
- 0.16395303606987,
1736
- 0.1435573399066925,
1737
- 0.12409909814596176,
1738
- 0.16253414750099182,
1739
- 0.1573280543088913,
1740
- 0.14960770308971405,
1741
- 0.14318087697029114,
1742
- 0.12494007498025894,
1743
- 0.13559222221374512,
1744
- 0.18777742981910706,
1745
- 0.12923327088356018,
1746
- 0.16082116961479187,
1747
- 0.15188278257846832,
1748
- 0.13781002163887024,
1749
- 0.13092568516731262,
1750
- 0.17344479262828827,
1751
- 0.14992226660251617,
1752
- 0.11788155138492584,
1753
- 0.17064552009105682,
1754
- 0.1436513215303421,
1755
- 0.1744157075881958,
1756
- 0.1471249759197235,
1757
- 0.1319369375705719,
1758
- 0.12590177357196808,
1759
- 0.14819200336933136,
1760
- 0.1566087156534195,
1761
- 0.1258556842803955,
1762
- 0.1425287425518036,
1763
- 0.14611411094665527,
1764
- 0.12872281670570374,
1765
- 0.1713401973247528,
1766
- 0.1419590562582016,
1767
- 0.16423705220222473,
1768
- 0.13404837250709534,
1769
- 0.12364423274993896,
1770
- 0.13353344798088074,
1771
- 0.16752830147743225,
1772
- 0.17974546551704407,
1773
- 0.133741095662117,
1774
- 0.14522182941436768,
1775
- 0.14375519752502441,
1776
- 0.16996373236179352,
1777
- 0.12796184420585632,
1778
- 0.19787277281284332,
1779
- 0.14406491816043854,
1780
- 0.1600307673215866,
1781
- 0.14013724029064178,
1782
- 0.1414320021867752,
1783
- 0.13259896636009216,
1784
- 0.16883596777915955,
1785
- 0.15063153207302094,
1786
- 0.16968485713005066,
1787
- 0.1448565423488617,
1788
- 0.13374046981334686,
1789
- 0.14191940426826477,
1790
- 0.16690602898597717,
1791
- 0.16557033360004425,
1792
- 0.1341642588376999,
1793
- 0.14818671345710754,
1794
- 0.15339885652065277,
1795
- 0.1358605921268463,
1796
- 0.11332222074270248,
1797
- 0.13079999387264252,
1798
- 0.11911796033382416,
1799
- 0.13282403349876404,
1800
- 0.1295071691274643,
1801
- 0.14931797981262207,
1802
- 0.1497408151626587,
1803
- 0.1576954424381256,
1804
- 0.1566249281167984,
1805
- 0.14676371216773987,
1806
- 0.14718690514564514,
1807
- 0.14807923138141632,
1808
- 0.1212613433599472,
1809
- 0.16526338458061218,
1810
- 0.12577828764915466,
1811
- 0.14465472102165222,
1812
- 0.1332075595855713,
1813
- 0.14287416636943817,
1814
- 0.14098156988620758,
1815
- 0.12206381559371948,
1816
- 0.13529722392559052,
1817
- 0.14950823783874512,
1818
- 0.12731051445007324,
1819
- 0.1640467643737793,
1820
- 0.15214170515537262,
1821
- 0.14054253697395325,
1822
- 0.13051234185695648,
1823
- 0.12513872981071472,
1824
- 0.1256050020456314,
1825
- 0.13018260896205902,
1826
- 0.12952646613121033,
1827
- 0.15313276648521423,
1828
- 0.15212035179138184,
1829
- 0.1590813547372818,
1830
- 0.1418110430240631,
1831
- 0.14174507558345795,
1832
- 0.1443943977355957,
1833
- 0.13427415490150452,
1834
- 0.14180710911750793,
1835
- 0.1526232659816742,
1836
- 0.14491091668605804,
1837
- 0.1218637228012085,
1838
- 0.15213581919670105,
1839
- 0.12060185521841049,
1840
- 0.12314708530902863,
1841
- 0.135157972574234,
1842
- 0.14702904224395752,
1843
- 0.1435243785381317,
1844
- 0.14389681816101074,
1845
- 0.12983374297618866,
1846
- 0.1256387084722519,
1847
- 0.13456468284130096,
1848
- 0.11090267449617386,
1849
- 0.14436398446559906,
1850
- 0.15340672433376312,
1851
- 0.13106809556484222,
1852
- 0.13008198142051697,
1853
- 0.12773239612579346,
1854
- 0.12845288217067719,
1855
- 0.12067259848117828,
1856
- 0.12159726768732071,
1857
- 0.12335409969091415,
1858
- 0.14989832043647766,
1859
- 0.18622605502605438,
1860
- 0.14446382224559784,
1861
- 0.13140790164470673,
1862
- 0.14309929311275482,
1863
- 0.1644481122493744,
1864
- 0.13612475991249084,
1865
- 0.13592186570167542,
1866
- 0.1400979459285736,
1867
- 0.14549663662910461,
1868
- 0.14845392107963562,
1869
- 0.16581669449806213,
1870
- 0.1395435482263565,
1871
- 0.12557461857795715,
1872
- 0.13034124672412872,
1873
- 0.11364679783582687,
1874
- 0.1468597948551178,
1875
- 0.12835600972175598,
1876
- 0.1345360428094864,
1877
- 0.16890299320220947,
1878
- 0.1177949383854866,
1879
- 0.1442183256149292,
1880
- 0.13947038352489471,
1881
- 0.13237617909908295,
1882
- 0.12772080302238464
1883
  ],
1884
  "lr": [
1885
  1.0458911419423694e-06,
@@ -2554,594 +2554,594 @@
2554
  ],
2555
  "eval_accuracy": [
2556
  0.0,
2557
- 0.0,
2558
- 0.012222222222222223,
2559
- 0.04555555555555556,
2560
- 0.09888888888888889,
2561
- 0.1922222222222222,
2562
- 0.25,
2563
- 0.3422222222222222,
2564
- 0.37,
2565
- 0.43444444444444447,
2566
- 0.49666666666666665,
2567
- 0.53,
2568
- 0.5233333333333333,
2569
- 0.5744444444444444,
2570
- 0.5533333333333333,
2571
- 0.5655555555555556,
2572
- 0.5755555555555556,
2573
- 0.5977777777777777,
2574
- 0.5866666666666667,
2575
- 0.5988888888888889
2576
  ]
2577
  },
2578
- "final_accuracy": 0.5025,
2579
  "sft_eval": {
2580
  "config": {
2581
  "ops": "add_sub",
2582
  "K": null,
2583
  "mode": "sft",
2584
  "n_digits": 6,
2585
- "n_per_split": 50
2586
  },
2587
  "splits": {
2588
  "add_S0": {
2589
- "full_accuracy": 0.92,
2590
- "digit_accuracy": 0.9885714285714285,
2591
- "n_examples": 50,
2592
  "per_subtask": {
2593
  "SA": {
2594
- "accuracy": 0.9864406779661017,
2595
- "count": 295
2596
  },
2597
  "SS": {
2598
- "accuracy": 1.0,
2599
- "count": 55
2600
  }
2601
  }
2602
  },
2603
  "add_S1": {
2604
- "full_accuracy": 0.94,
2605
- "digit_accuracy": 0.9914285714285714,
2606
- "n_examples": 50,
2607
  "per_subtask": {
2608
  "SA": {
2609
- "accuracy": 1.0,
2610
- "count": 126
2611
  },
2612
  "SC": {
2613
- "accuracy": 0.9746835443037974,
2614
- "count": 79
2615
  },
2616
  "SS": {
2617
- "accuracy": 0.9523809523809523,
2618
- "count": 21
2619
  },
2620
  "UC": {
2621
- "accuracy": 1.0,
2622
- "count": 124
2623
  }
2624
  }
2625
  },
2626
  "add_S2": {
2627
- "full_accuracy": 0.38,
2628
- "digit_accuracy": 0.8942857142857142,
2629
- "n_examples": 50,
2630
  "per_subtask": {
2631
  "SA": {
2632
- "accuracy": 0.9733333333333334,
2633
- "count": 75
2634
  },
2635
  "SC": {
2636
- "accuracy": 0.9032258064516129,
2637
- "count": 62
2638
  },
2639
  "SS": {
2640
- "accuracy": 0.9743589743589743,
2641
- "count": 39
2642
  },
2643
  "UC": {
2644
- "accuracy": 0.7477477477477478,
2645
- "count": 111
2646
  },
2647
  "US": {
2648
  "accuracy": 1.0,
2649
- "count": 63
2650
  }
2651
  }
2652
  },
2653
  "add_S3": {
2654
- "full_accuracy": 0.34,
2655
- "digit_accuracy": 0.8828571428571429,
2656
- "n_examples": 50,
2657
  "per_subtask": {
2658
  "SA": {
2659
- "accuracy": 0.9833333333333333,
2660
- "count": 60
2661
  },
2662
  "SC": {
2663
- "accuracy": 0.9649122807017544,
2664
- "count": 57
2665
  },
2666
  "SS": {
2667
- "accuracy": 0.8947368421052632,
2668
- "count": 19
2669
  },
2670
  "UC": {
2671
- "accuracy": 0.7692307692307693,
2672
- "count": 104
2673
  },
2674
  "US": {
2675
- "accuracy": 0.8909090909090909,
2676
- "count": 110
2677
  }
2678
  }
2679
  },
2680
  "add_S4": {
2681
- "full_accuracy": 0.42,
2682
- "digit_accuracy": 0.8228571428571428,
2683
- "n_examples": 50,
2684
  "per_subtask": {
2685
  "SA": {
2686
  "accuracy": 1.0,
2687
- "count": 48
2688
  },
2689
  "SC": {
2690
- "accuracy": 0.9807692307692307,
2691
- "count": 52
2692
  },
2693
  "SS": {
2694
  "accuracy": 1.0,
2695
- "count": 7
2696
  },
2697
  "UC": {
2698
- "accuracy": 0.7191011235955056,
2699
- "count": 89
2700
  },
2701
  "US": {
2702
- "accuracy": 0.7662337662337663,
2703
- "count": 154
2704
  }
2705
  }
2706
  },
2707
  "add_S5": {
2708
- "full_accuracy": 0.3,
2709
- "digit_accuracy": 0.6485714285714286,
2710
- "n_examples": 50,
2711
  "per_subtask": {
2712
  "SA": {
2713
  "accuracy": 1.0,
2714
- "count": 50
2715
  },
2716
  "SC": {
2717
  "accuracy": 1.0,
2718
- "count": 50
2719
  },
2720
  "UC": {
2721
- "accuracy": 0.36,
2722
- "count": 50
2723
  },
2724
  "US": {
2725
- "accuracy": 0.545,
2726
- "count": 200
2727
  }
2728
  }
2729
  },
2730
  "add_S6": {
2731
- "full_accuracy": 0.48,
2732
- "digit_accuracy": 0.7257142857142858,
2733
- "n_examples": 50,
2734
  "per_subtask": {
2735
  "SC": {
2736
  "accuracy": 1.0,
2737
- "count": 50
2738
  },
2739
  "UC": {
2740
- "accuracy": 0.72,
2741
- "count": 50
2742
  },
2743
  "US": {
2744
- "accuracy": 0.672,
2745
- "count": 250
2746
  }
2747
  }
2748
  },
2749
  "add_random": {
2750
- "full_accuracy": 0.835,
2751
- "digit_accuracy": 0.9728571428571429,
2752
  "n_examples": 200,
2753
  "per_subtask": {
2754
  "SA": {
2755
- "accuracy": 0.9907192575406032,
2756
- "count": 431
2757
  },
2758
  "SC": {
2759
- "accuracy": 0.9873417721518988,
2760
- "count": 316
2761
  },
2762
  "SS": {
2763
  "accuracy": 1.0,
2764
- "count": 39
2765
  },
2766
  "UC": {
2767
- "accuracy": 0.9517857142857142,
2768
- "count": 560
2769
  },
2770
  "US": {
2771
- "accuracy": 0.9444444444444444,
2772
- "count": 54
2773
  }
2774
  }
2775
  },
2776
  "add_C1": {
2777
- "full_accuracy": 0.96,
2778
- "digit_accuracy": 0.9942857142857143,
2779
- "n_examples": 50,
2780
  "per_subtask": {
2781
  "SA": {
2782
- "accuracy": 0.996,
2783
- "count": 250
2784
  },
2785
  "SC": {
2786
  "accuracy": 1.0,
2787
- "count": 50
2788
  },
2789
  "UC": {
2790
- "accuracy": 0.98,
2791
- "count": 50
2792
  }
2793
  }
2794
  },
2795
  "add_C2": {
2796
- "full_accuracy": 0.72,
2797
  "digit_accuracy": 0.9571428571428572,
2798
- "n_examples": 50,
2799
  "per_subtask": {
2800
  "SA": {
2801
  "accuracy": 0.99,
2802
- "count": 200
2803
  },
2804
  "SC": {
2805
- "accuracy": 0.98,
2806
- "count": 50
2807
  },
2808
  "UC": {
2809
- "accuracy": 0.8554216867469879,
2810
- "count": 83
2811
  },
2812
  "US": {
2813
  "accuracy": 1.0,
2814
- "count": 17
2815
  }
2816
  }
2817
  },
2818
  "add_C3": {
2819
- "full_accuracy": 0.28,
2820
- "digit_accuracy": 0.8742857142857143,
2821
- "n_examples": 50,
2822
  "per_subtask": {
2823
  "SA": {
2824
- "accuracy": 1.0,
2825
- "count": 150
2826
  },
2827
  "SC": {
2828
  "accuracy": 1.0,
2829
- "count": 50
2830
  },
2831
  "UC": {
2832
- "accuracy": 0.66,
2833
- "count": 100
2834
  },
2835
  "US": {
2836
- "accuracy": 0.8,
2837
- "count": 50
2838
  }
2839
  }
2840
  },
2841
  "add_C4": {
2842
- "full_accuracy": 0.38,
2843
- "digit_accuracy": 0.8942857142857142,
2844
- "n_examples": 50,
2845
  "per_subtask": {
2846
  "SA": {
2847
  "accuracy": 1.0,
2848
- "count": 100
2849
  },
2850
  "SC": {
2851
  "accuracy": 1.0,
2852
- "count": 50
2853
  },
2854
  "UC": {
2855
- "accuracy": 0.7575757575757576,
2856
- "count": 132
2857
  },
2858
  "US": {
2859
- "accuracy": 0.9264705882352942,
2860
- "count": 68
2861
  }
2862
  }
2863
  },
2864
  "add_C5": {
2865
- "full_accuracy": 0.48,
2866
- "digit_accuracy": 0.9028571428571428,
2867
- "n_examples": 50,
2868
  "per_subtask": {
2869
  "SA": {
2870
  "accuracy": 1.0,
2871
- "count": 50
2872
  },
2873
  "SC": {
2874
- "accuracy": 0.98,
2875
- "count": 50
2876
  },
2877
  "UC": {
2878
- "accuracy": 0.821917808219178,
2879
- "count": 146
2880
  },
2881
  "US": {
2882
- "accuracy": 0.9326923076923077,
2883
- "count": 104
2884
  }
2885
  }
2886
  },
2887
  "add_C6": {
2888
- "full_accuracy": 0.32,
2889
- "digit_accuracy": 0.8542857142857143,
2890
- "n_examples": 50,
2891
  "per_subtask": {
2892
  "SC": {
2893
  "accuracy": 1.0,
2894
- "count": 50
2895
  },
2896
  "UC": {
2897
- "accuracy": 0.798941798941799,
2898
- "count": 189
2899
  },
2900
  "US": {
2901
- "accuracy": 0.8828828828828829,
2902
- "count": 111
2903
  }
2904
  }
2905
  },
2906
  "sub_M0": {
2907
  "full_accuracy": 0.92,
2908
  "digit_accuracy": 0.9885714285714285,
2909
- "n_examples": 50,
2910
  "per_subtask": {
2911
  "MD": {
2912
- "accuracy": 0.9867986798679867,
2913
- "count": 303
2914
  },
2915
  "ME": {
2916
  "accuracy": 1.0,
2917
- "count": 47
2918
  }
2919
  }
2920
  },
2921
  "sub_M1": {
2922
- "full_accuracy": 0.86,
2923
- "digit_accuracy": 0.9771428571428571,
2924
- "n_examples": 50,
2925
  "per_subtask": {
2926
  "MD": {
2927
- "accuracy": 1.0,
2928
- "count": 141
2929
  },
2930
  "MB": {
2931
- "accuracy": 0.9444444444444444,
2932
- "count": 72
2933
  },
2934
  "ME": {
2935
- "accuracy": 0.9444444444444444,
2936
- "count": 18
2937
  },
2938
  "UB": {
2939
- "accuracy": 0.9747899159663865,
2940
- "count": 119
2941
  }
2942
  }
2943
  },
2944
  "sub_M2": {
2945
- "full_accuracy": 0.18,
2946
- "digit_accuracy": 0.8628571428571429,
2947
- "n_examples": 50,
2948
  "per_subtask": {
2949
  "MD": {
2950
- "accuracy": 0.9732142857142857,
2951
- "count": 112
2952
  },
2953
  "MB": {
2954
- "accuracy": 0.9433962264150944,
2955
- "count": 53
2956
  },
2957
  "ME": {
2958
- "accuracy": 0.9574468085106383,
2959
- "count": 47
2960
  },
2961
  "UB": {
2962
- "accuracy": 0.5294117647058824,
2963
- "count": 85
2964
  },
2965
  "UD": {
2966
- "accuracy": 1.0,
2967
- "count": 53
2968
  }
2969
  }
2970
  },
2971
  "sub_M3": {
2972
- "full_accuracy": 0.1,
2973
- "digit_accuracy": 0.8028571428571428,
2974
- "n_examples": 50,
2975
  "per_subtask": {
2976
  "MD": {
2977
- "accuracy": 0.9896907216494846,
2978
- "count": 97
2979
  },
2980
  "MB": {
2981
  "accuracy": 1.0,
2982
- "count": 51
2983
  },
2984
  "ME": {
2985
  "accuracy": 1.0,
2986
- "count": 27
2987
  },
2988
  "UB": {
2989
- "accuracy": 0.5405405405405406,
2990
- "count": 74
2991
  },
2992
  "UD": {
2993
- "accuracy": 0.6633663366336634,
2994
- "count": 101
2995
  }
2996
  }
2997
  },
2998
  "sub_M4": {
2999
  "full_accuracy": 0.1,
3000
- "digit_accuracy": 0.7657142857142857,
3001
- "n_examples": 50,
3002
  "per_subtask": {
3003
  "MD": {
3004
  "accuracy": 1.0,
3005
- "count": 100
3006
  },
3007
  "MB": {
3008
- "accuracy": 1.0,
3009
- "count": 50
3010
  },
3011
  "UB": {
3012
- "accuracy": 0.46,
3013
- "count": 50
3014
  },
3015
  "UD": {
3016
- "accuracy": 0.6333333333333333,
3017
- "count": 150
3018
  }
3019
  }
3020
  },
3021
  "sub_M5": {
3022
- "full_accuracy": 0.14,
3023
- "digit_accuracy": 0.6857142857142857,
3024
- "n_examples": 50,
3025
  "per_subtask": {
3026
  "MD": {
3027
  "accuracy": 1.0,
3028
- "count": 50
3029
  },
3030
  "MB": {
3031
  "accuracy": 1.0,
3032
- "count": 50
3033
  },
3034
  "UB": {
3035
- "accuracy": 0.38,
3036
- "count": 50
3037
  },
3038
  "UD": {
3039
- "accuracy": 0.605,
3040
- "count": 200
3041
  }
3042
  }
3043
  },
3044
  "sub_random": {
3045
- "full_accuracy": 0.815,
3046
- "digit_accuracy": 0.9735714285714285,
3047
  "n_examples": 200,
3048
  "per_subtask": {
3049
  "MD": {
3050
- "accuracy": 0.9947368421052631,
3051
- "count": 570
3052
  },
3053
  "MB": {
3054
- "accuracy": 0.9638989169675091,
3055
- "count": 277
3056
  },
3057
  "ME": {
3058
- "accuracy": 1.0,
3059
  "count": 53
3060
  },
3061
  "UB": {
3062
- "accuracy": 0.9490445859872612,
3063
- "count": 471
3064
  },
3065
  "UD": {
3066
- "accuracy": 1.0,
3067
- "count": 29
3068
  }
3069
  }
3070
  },
3071
  "sub_B3": {
3072
- "full_accuracy": 0.58,
3073
- "digit_accuracy": 0.9171428571428571,
3074
- "n_examples": 50,
3075
  "per_subtask": {
3076
  "MD": {
3077
  "accuracy": 1.0,
3078
- "count": 150
3079
  },
3080
  "MB": {
3081
  "accuracy": 1.0,
3082
- "count": 50
3083
  },
3084
  "UB": {
3085
- "accuracy": 0.801980198019802,
3086
- "count": 101
3087
  },
3088
  "UD": {
3089
- "accuracy": 0.8163265306122449,
3090
- "count": 49
3091
  }
3092
  }
3093
  },
3094
  "sub_B4": {
3095
- "full_accuracy": 0.34,
3096
- "digit_accuracy": 0.8771428571428571,
3097
- "n_examples": 50,
3098
  "per_subtask": {
3099
  "MD": {
3100
  "accuracy": 1.0,
3101
- "count": 100
3102
  },
3103
  "MB": {
3104
  "accuracy": 1.0,
3105
- "count": 50
3106
  },
3107
  "UB": {
3108
- "accuracy": 0.7520661157024794,
3109
- "count": 121
3110
  },
3111
  "UD": {
3112
- "accuracy": 0.8354430379746836,
3113
- "count": 79
3114
  }
3115
  }
3116
  },
3117
  "sub_B5": {
3118
- "full_accuracy": 0.16,
3119
- "digit_accuracy": 0.8257142857142857,
3120
- "n_examples": 50,
3121
  "per_subtask": {
3122
  "MD": {
3123
  "accuracy": 1.0,
3124
- "count": 50
3125
  },
3126
  "MB": {
3127
  "accuracy": 1.0,
3128
- "count": 50
3129
  },
3130
  "UB": {
3131
- "accuracy": 0.7171052631578947,
3132
- "count": 152
3133
  },
3134
  "UD": {
3135
- "accuracy": 0.8163265306122449,
3136
- "count": 98
3137
  }
3138
  }
3139
  }
3140
  },
3141
  "summary": {
3142
- "overall_accuracy": 0.5626666666666666,
3143
- "digit_accuracy": 0.8972380952380953,
3144
- "total_examples": 1500,
3145
  "n_splits": 24
3146
  }
3147
  }
 
628
  31250
629
  ],
630
  "loss": [
631
+ 11.94859790802002,
632
+ 11.840438842773438,
633
+ 11.64094066619873,
634
+ 11.324628829956055,
635
+ 10.98892879486084,
636
+ 10.639241218566895,
637
+ 10.578450202941895,
638
+ 10.493820190429688,
639
+ 10.257356643676758,
640
+ 10.105127334594727,
641
+ 9.852198600769043,
642
+ 9.760133743286133,
643
+ 9.558384895324707,
644
+ 9.300349235534668,
645
+ 9.154654502868652,
646
+ 8.889933586120605,
647
+ 8.539278984069824,
648
+ 8.303180694580078,
649
+ 8.065629959106445,
650
+ 7.7729902267456055,
651
+ 7.503629207611084,
652
+ 7.239531517028809,
653
+ 6.897196292877197,
654
+ 6.684809684753418,
655
+ 6.3543596267700195,
656
+ 6.028420448303223,
657
+ 5.7321271896362305,
658
+ 5.48431396484375,
659
+ 5.272823333740234,
660
+ 4.938896656036377,
661
+ 4.6226396560668945,
662
+ 4.419801235198975,
663
+ 4.154916286468506,
664
+ 3.8767247200012207,
665
+ 3.7096211910247803,
666
+ 3.4114320278167725,
667
+ 3.246389389038086,
668
+ 3.0310475826263428,
669
+ 2.960927724838257,
670
+ 2.78560209274292,
671
+ 2.619802236557007,
672
+ 2.522150754928589,
673
+ 2.482307195663452,
674
+ 2.383479356765747,
675
+ 2.2891829013824463,
676
+ 2.306661367416382,
677
+ 2.2119381427764893,
678
+ 2.176996946334839,
679
+ 2.1396560668945312,
680
+ 2.1182291507720947,
681
+ 2.138927936553955,
682
+ 2.099632740020752,
683
+ 2.0461552143096924,
684
+ 2.0577521324157715,
685
+ 2.014800786972046,
686
+ 2.057321786880493,
687
+ 2.0552284717559814,
688
+ 1.9784787893295288,
689
+ 1.9504175186157227,
690
+ 1.9787120819091797,
691
+ 1.8843504190444946,
692
+ 1.936767816543579,
693
+ 1.9563958644866943,
694
+ 1.8449238538742065,
695
+ 1.8611806631088257,
696
+ 1.928608775138855,
697
+ 1.7879769802093506,
698
+ 1.8571434020996094,
699
+ 1.821972131729126,
700
+ 1.8500518798828125,
701
+ 1.861620306968689,
702
+ 1.8515946865081787,
703
+ 1.8433369398117065,
704
+ 1.8581030368804932,
705
+ 1.7452529668807983,
706
+ 1.7694547176361084,
707
+ 1.8078683614730835,
708
+ 1.7968032360076904,
709
+ 1.7500313520431519,
710
+ 1.6263803243637085,
711
+ 1.6820186376571655,
712
+ 1.6738622188568115,
713
+ 1.672284483909607,
714
+ 1.655076026916504,
715
+ 1.586304783821106,
716
+ 1.4643189907073975,
717
+ 1.5293394327163696,
718
+ 1.5075138807296753,
719
+ 1.5243085622787476,
720
+ 1.5082238912582397,
721
+ 1.4548567533493042,
722
+ 1.3837165832519531,
723
+ 1.41783607006073,
724
+ 1.2917324304580688,
725
+ 1.2966030836105347,
726
+ 1.2949161529541016,
727
+ 1.2839399576187134,
728
+ 1.2788225412368774,
729
+ 1.2647664546966553,
730
+ 1.2145267724990845,
731
+ 1.2092732191085815,
732
+ 1.182142972946167,
733
+ 1.1864356994628906,
734
+ 1.121931791305542,
735
+ 1.0740060806274414,
736
+ 1.1255488395690918,
737
+ 1.0717084407806396,
738
+ 1.0892133712768555,
739
+ 1.1152369976043701,
740
+ 1.050582766532898,
741
+ 1.0131772756576538,
742
+ 1.0159205198287964,
743
+ 0.9687409996986389,
744
+ 0.9926728010177612,
745
+ 0.9656931161880493,
746
+ 0.9558922052383423,
747
+ 0.9416300058364868,
748
+ 0.9168248176574707,
749
+ 0.9094190001487732,
750
+ 0.9148558378219604,
751
+ 0.8885840773582458,
752
+ 0.8573789596557617,
753
+ 0.8440743088722229,
754
+ 0.8346267342567444,
755
+ 0.8313559293746948,
756
+ 0.8086395263671875,
757
+ 0.8341150879859924,
758
+ 0.8089955449104309,
759
+ 0.8083120584487915,
760
+ 0.7901385426521301,
761
+ 0.8018090128898621,
762
+ 0.7361825108528137,
763
+ 0.7226907014846802,
764
+ 0.7151471972465515,
765
+ 0.6951335668563843,
766
+ 0.6982666850090027,
767
+ 0.6982697248458862,
768
+ 0.7394556403160095,
769
+ 0.7399061322212219,
770
+ 0.7149839401245117,
771
+ 0.6887863278388977,
772
+ 0.7093302607536316,
773
+ 0.6588761210441589,
774
+ 0.6807154417037964,
775
+ 0.6855794191360474,
776
+ 0.6251148581504822,
777
+ 0.6450304985046387,
778
+ 0.6763089299201965,
779
+ 0.6778842210769653,
780
+ 0.6132544875144958,
781
+ 0.6343541741371155,
782
+ 0.611568808555603,
783
+ 0.5893497467041016,
784
+ 0.5825234651565552,
785
+ 0.5738789439201355,
786
+ 0.5681091547012329,
787
+ 0.5831159949302673,
788
+ 0.550575852394104,
789
+ 0.5623425245285034,
790
+ 0.5403538942337036,
791
+ 0.5861159563064575,
792
+ 0.5384799242019653,
793
+ 0.5562480688095093,
794
+ 0.5091863870620728,
795
+ 0.5155102014541626,
796
+ 0.5429946184158325,
797
+ 0.527049720287323,
798
+ 0.49557358026504517,
799
+ 0.5270788073539734,
800
+ 0.48558399081230164,
801
+ 0.48906925320625305,
802
+ 0.49761268496513367,
803
+ 0.4692177176475525,
804
+ 0.478564590215683,
805
+ 0.4760856330394745,
806
+ 0.44141462445259094,
807
+ 0.4625226855278015,
808
+ 0.4593048095703125,
809
+ 0.44456854462623596,
810
+ 0.5015004277229309,
811
+ 0.45153093338012695,
812
+ 0.4653579294681549,
813
+ 0.4412159323692322,
814
+ 0.46345633268356323,
815
+ 0.4392021596431732,
816
+ 0.42899444699287415,
817
+ 0.4141101837158203,
818
+ 0.414285808801651,
819
+ 0.41810232400894165,
820
+ 0.42292097210884094,
821
+ 0.3962114751338959,
822
+ 0.4087282121181488,
823
+ 0.4289020001888275,
824
+ 0.42968955636024475,
825
+ 0.3943832814693451,
826
+ 0.41051462292671204,
827
+ 0.3950999677181244,
828
+ 0.40754038095474243,
829
+ 0.3994915783405304,
830
+ 0.41414302587509155,
831
+ 0.39105725288391113,
832
+ 0.37961503863334656,
833
+ 0.38364723324775696,
834
+ 0.4205988347530365,
835
+ 0.41498011350631714,
836
+ 0.3854501247406006,
837
+ 0.4055662751197815,
838
+ 0.3829682171344757,
839
+ 0.37448883056640625,
840
+ 0.36969661712646484,
841
+ 0.35262203216552734,
842
+ 0.4099288284778595,
843
+ 0.3834565579891205,
844
+ 0.3552636504173279,
845
+ 0.32924869656562805,
846
+ 0.36005476117134094,
847
+ 0.3339636027812958,
848
+ 0.3542087972164154,
849
+ 0.3503226339817047,
850
+ 0.36919230222702026,
851
+ 0.3485560715198517,
852
+ 0.3238471448421478,
853
+ 0.3353195786476135,
854
+ 0.3599640429019928,
855
+ 0.3437122702598572,
856
+ 0.3387219309806824,
857
+ 0.3057003617286682,
858
+ 0.2906756103038788,
859
+ 0.34256139397621155,
860
+ 0.31671828031539917,
861
+ 0.3027709126472473,
862
+ 0.3286406099796295,
863
+ 0.29700973629951477,
864
+ 0.33028173446655273,
865
+ 0.30202531814575195,
866
+ 0.32036495208740234,
867
+ 0.2988939583301544,
868
+ 0.27923664450645447,
869
+ 0.3316617012023926,
870
+ 0.3278091251850128,
871
+ 0.28900453448295593,
872
+ 0.3363553583621979,
873
+ 0.36397480964660645,
874
+ 0.30927157402038574,
875
+ 0.2704579532146454,
876
+ 0.3233926594257355,
877
+ 0.3281395137310028,
878
+ 0.2813015282154083,
879
+ 0.2918522357940674,
880
+ 0.310793936252594,
881
+ 0.26009702682495117,
882
+ 0.3331923484802246,
883
+ 0.28003939986228943,
884
+ 0.33480802178382874,
885
+ 0.28114771842956543,
886
+ 0.28258591890335083,
887
+ 0.2621360719203949,
888
+ 0.26446372270584106,
889
+ 0.25052106380462646,
890
+ 0.24290919303894043,
891
+ 0.28893521428108215,
892
+ 0.28211459517478943,
893
+ 0.26482027769088745,
894
+ 0.2912617027759552,
895
+ 0.2805217206478119,
896
+ 0.262124240398407,
897
+ 0.2579425275325775,
898
+ 0.248891219496727,
899
+ 0.293045312166214,
900
+ 0.2911718189716339,
901
+ 0.24228894710540771,
902
+ 0.2836480736732483,
903
+ 0.2810485363006592,
904
+ 0.2615375816822052,
905
+ 0.24184216558933258,
906
+ 0.2451353818178177,
907
+ 0.23175807297229767,
908
+ 0.23173300921916962,
909
+ 0.23882640898227692,
910
+ 0.2664676904678345,
911
+ 0.26882803440093994,
912
+ 0.2690429091453552,
913
+ 0.24577376246452332,
914
+ 0.23808881640434265,
915
+ 0.26410430669784546,
916
+ 0.23443324863910675,
917
+ 0.2398104965686798,
918
+ 0.22221653163433075,
919
+ 0.26233434677124023,
920
+ 0.23280136287212372,
921
+ 0.21146978437900543,
922
+ 0.2486180067062378,
923
+ 0.20887069404125214,
924
+ 0.2606666386127472,
925
+ 0.24783062934875488,
926
+ 0.25852853059768677,
927
+ 0.21136604249477386,
928
+ 0.19507789611816406,
929
+ 0.2310580462217331,
930
+ 0.19218036532402039,
931
+ 0.28741464018821716,
932
+ 0.2194812297821045,
933
+ 0.22402501106262207,
934
+ 0.2472635954618454,
935
+ 0.2330581098794937,
936
+ 0.18851839005947113,
937
+ 0.22764381766319275,
938
+ 0.2213810235261917,
939
+ 0.2095119208097458,
940
+ 0.20449183881282806,
941
+ 0.24300800263881683,
942
+ 0.21304859220981598,
943
+ 0.21145255863666534,
944
+ 0.22761793434619904,
945
+ 0.22742269933223724,
946
+ 0.2193547636270523,
947
+ 0.1930546760559082,
948
+ 0.21217192709445953,
949
+ 0.19071756303310394,
950
+ 0.24465782940387726,
951
+ 0.21515420079231262,
952
+ 0.1957842856645584,
953
+ 0.24550697207450867,
954
+ 0.20868192613124847,
955
+ 0.20368941128253937,
956
+ 0.19286994636058807,
957
+ 0.25735655426979065,
958
+ 0.19909431040287018,
959
+ 0.22133401036262512,
960
+ 0.19772383570671082,
961
+ 0.2013375461101532,
962
+ 0.23962707817554474,
963
+ 0.19507475197315216,
964
+ 0.22025993466377258,
965
+ 0.16754481196403503,
966
+ 0.21235434710979462,
967
+ 0.23731891810894012,
968
+ 0.20245544612407684,
969
+ 0.2115701586008072,
970
+ 0.18609514832496643,
971
+ 0.19329078495502472,
972
+ 0.191462442278862,
973
+ 0.21664702892303467,
974
+ 0.2282712459564209,
975
+ 0.1856536567211151,
976
+ 0.21686986088752747,
977
+ 0.21144740283489227,
978
+ 0.2090771198272705,
979
+ 0.18183530867099762,
980
+ 0.2151528149843216,
981
+ 0.21354100108146667,
982
+ 0.189223974943161,
983
+ 0.2282743752002716,
984
+ 0.18897734582424164,
985
+ 0.1972484588623047,
986
+ 0.18682360649108887,
987
+ 0.19328011572360992,
988
+ 0.22190427780151367,
989
+ 0.17040297389030457,
990
+ 0.2322014719247818,
991
+ 0.17299620807170868,
992
+ 0.19289156794548035,
993
+ 0.19416391849517822,
994
+ 0.2013746052980423,
995
+ 0.16254912316799164,
996
+ 0.19851411879062653,
997
+ 0.17761853337287903,
998
+ 0.19167092442512512,
999
+ 0.16775621473789215,
1000
+ 0.19005520641803741,
1001
+ 0.18174482882022858,
1002
+ 0.1820433884859085,
1003
+ 0.1686544120311737,
1004
+ 0.17813602089881897,
1005
+ 0.18956564366817474,
1006
+ 0.18593311309814453,
1007
+ 0.15203449130058289,
1008
+ 0.16920749843120575,
1009
+ 0.1750987023115158,
1010
+ 0.1903667151927948,
1011
+ 0.21415254473686218,
1012
+ 0.19226084649562836,
1013
+ 0.16631709039211273,
1014
+ 0.22381491959095,
1015
+ 0.1565556824207306,
1016
+ 0.178656667470932,
1017
+ 0.16794231534004211,
1018
+ 0.2035113424062729,
1019
+ 0.15502004325389862,
1020
+ 0.2080964893102646,
1021
+ 0.16120600700378418,
1022
+ 0.17409558594226837,
1023
+ 0.18227358162403107,
1024
+ 0.15712487697601318,
1025
+ 0.1848597526550293,
1026
+ 0.22843310236930847,
1027
+ 0.1544765681028366,
1028
+ 0.16624769568443298,
1029
+ 0.18198521435260773,
1030
+ 0.18578679859638214,
1031
+ 0.1581115424633026,
1032
+ 0.1675325185060501,
1033
+ 0.17194543778896332,
1034
+ 0.1624213606119156,
1035
+ 0.15694668889045715,
1036
+ 0.14574037492275238,
1037
+ 0.19108226895332336,
1038
+ 0.20138464868068695,
1039
+ 0.16426335275173187,
1040
+ 0.1591721922159195,
1041
+ 0.21045982837677002,
1042
+ 0.1457131952047348,
1043
+ 0.16802136600017548,
1044
+ 0.17403490841388702,
1045
+ 0.1713341623544693,
1046
+ 0.18586938083171844,
1047
+ 0.1811138540506363,
1048
+ 0.14901359379291534,
1049
+ 0.16701973974704742,
1050
+ 0.1604980081319809,
1051
+ 0.1447248011827469,
1052
+ 0.12645721435546875,
1053
+ 0.16988113522529602,
1054
+ 0.16805614531040192,
1055
+ 0.16320255398750305,
1056
+ 0.1912159025669098,
1057
+ 0.1462724506855011,
1058
+ 0.170498326420784,
1059
+ 0.15915271639823914,
1060
+ 0.16366423666477203,
1061
+ 0.13886980712413788,
1062
+ 0.1343265324831009,
1063
+ 0.17708076536655426,
1064
+ 0.14998695254325867,
1065
+ 0.17960231006145477,
1066
+ 0.156875342130661,
1067
+ 0.15048567950725555,
1068
+ 0.18893864750862122,
1069
+ 0.16481050848960876,
1070
+ 0.16897451877593994,
1071
+ 0.15260040760040283,
1072
+ 0.11548135429620743,
1073
+ 0.1384969800710678,
1074
+ 0.15838623046875,
1075
+ 0.15708640217781067,
1076
+ 0.1365736722946167,
1077
+ 0.18369968235492706,
1078
+ 0.16396315395832062,
1079
+ 0.1661636084318161,
1080
+ 0.17268292605876923,
1081
+ 0.19088900089263916,
1082
+ 0.14722244441509247,
1083
+ 0.19560489058494568,
1084
+ 0.14434412121772766,
1085
+ 0.16260702908039093,
1086
+ 0.1408696174621582,
1087
+ 0.168971449136734,
1088
+ 0.17031393945217133,
1089
+ 0.14803941547870636,
1090
+ 0.16470588743686676,
1091
+ 0.1566447913646698,
1092
+ 0.20542870461940765,
1093
+ 0.1710493564605713,
1094
+ 0.18512073159217834,
1095
+ 0.1473395824432373,
1096
+ 0.15322645008563995,
1097
+ 0.1436643898487091,
1098
+ 0.1595940738916397,
1099
+ 0.18959307670593262,
1100
+ 0.15076453983783722,
1101
+ 0.1550501435995102,
1102
+ 0.18462494015693665,
1103
+ 0.1573774814605713,
1104
+ 0.16171033680438995,
1105
+ 0.15045134723186493,
1106
+ 0.17703239619731903,
1107
+ 0.14783276617527008,
1108
+ 0.16923877596855164,
1109
+ 0.1926303207874298,
1110
+ 0.19895410537719727,
1111
+ 0.17747224867343903,
1112
+ 0.19405317306518555,
1113
+ 0.14278696477413177,
1114
+ 0.11125724017620087,
1115
+ 0.1512247622013092,
1116
+ 0.15484271943569183,
1117
+ 0.12213726341724396,
1118
+ 0.162554070353508,
1119
+ 0.16736207902431488,
1120
+ 0.13373737037181854,
1121
+ 0.17999958992004395,
1122
+ 0.15072117745876312,
1123
+ 0.1646808385848999,
1124
+ 0.14545665681362152,
1125
+ 0.16830526292324066,
1126
+ 0.12739090621471405,
1127
+ 0.10390200465917587,
1128
+ 0.17377625405788422,
1129
+ 0.16494156420230865,
1130
+ 0.13795830309391022,
1131
+ 0.14690710604190826,
1132
+ 0.12438012659549713,
1133
+ 0.14515355229377747,
1134
+ 0.14279648661613464,
1135
+ 0.12356720119714737,
1136
+ 0.13406822085380554,
1137
+ 0.13987518846988678,
1138
+ 0.1401202231645584,
1139
+ 0.1880432814359665,
1140
+ 0.18093450367450714,
1141
+ 0.1592968851327896,
1142
+ 0.17524102330207825,
1143
+ 0.14916424453258514,
1144
+ 0.15612027049064636,
1145
+ 0.1444508135318756,
1146
+ 0.1407918483018875,
1147
+ 0.12171785533428192,
1148
+ 0.13664177060127258,
1149
+ 0.17838147282600403,
1150
+ 0.15328529477119446,
1151
+ 0.12011374533176422,
1152
+ 0.1469172239303589,
1153
+ 0.15794073045253754,
1154
+ 0.16883553564548492,
1155
+ 0.14052900671958923,
1156
+ 0.18314428627490997,
1157
+ 0.16488347947597504,
1158
+ 0.11859999597072601,
1159
+ 0.14168976247310638,
1160
+ 0.14921541512012482,
1161
+ 0.17350149154663086,
1162
+ 0.17269587516784668,
1163
+ 0.13202032446861267,
1164
+ 0.15186575055122375,
1165
+ 0.13948580622673035,
1166
+ 0.1624227911233902,
1167
+ 0.14760947227478027,
1168
+ 0.1712327003479004,
1169
+ 0.13646575808525085,
1170
+ 0.14727208018302917,
1171
+ 0.1506548374891281,
1172
+ 0.15115901827812195,
1173
+ 0.12916085124015808,
1174
+ 0.15773268043994904,
1175
+ 0.14408628642559052,
1176
+ 0.12364902347326279,
1177
+ 0.11989416927099228,
1178
+ 0.15335413813591003,
1179
+ 0.1312076300382614,
1180
+ 0.1278875768184662,
1181
+ 0.1595352590084076,
1182
+ 0.17473769187927246,
1183
+ 0.139608696103096,
1184
+ 0.12069375813007355,
1185
+ 0.12792883813381195,
1186
+ 0.14324326813220978,
1187
+ 0.16077683866024017,
1188
+ 0.13171955943107605,
1189
+ 0.12155797332525253,
1190
+ 0.14841106534004211,
1191
+ 0.14754816889762878,
1192
+ 0.13991589844226837,
1193
+ 0.14981675148010254,
1194
+ 0.15216532349586487,
1195
+ 0.14560063183307648,
1196
+ 0.15437017381191254,
1197
+ 0.1558697521686554,
1198
+ 0.12956713140010834,
1199
+ 0.1606471836566925,
1200
+ 0.1396074891090393,
1201
+ 0.1678946316242218,
1202
+ 0.14537987112998962,
1203
+ 0.12508836388587952,
1204
+ 0.13816596567630768,
1205
+ 0.1560039520263672,
1206
+ 0.13455283641815186,
1207
+ 0.13224788010120392,
1208
+ 0.14797094464302063,
1209
+ 0.1326710730791092,
1210
+ 0.12267882376909256,
1211
+ 0.15669405460357666,
1212
+ 0.1376783549785614,
1213
+ 0.13550518453121185,
1214
+ 0.15633442997932434,
1215
+ 0.13264869153499603,
1216
+ 0.15317828953266144,
1217
+ 0.13146944344043732,
1218
+ 0.1285228580236435,
1219
+ 0.14777718484401703,
1220
+ 0.10374288260936737,
1221
+ 0.13567717373371124,
1222
+ 0.11807692050933838,
1223
+ 0.14930854737758636,
1224
+ 0.10957635939121246,
1225
+ 0.1414402723312378,
1226
+ 0.16026048362255096,
1227
+ 0.1368298977613449,
1228
+ 0.12276037782430649,
1229
+ 0.1520388275384903,
1230
+ 0.14423634111881256,
1231
+ 0.1343306601047516,
1232
+ 0.15340445935726166,
1233
+ 0.14033977687358856,
1234
+ 0.11754585802555084,
1235
+ 0.13860604166984558,
1236
+ 0.14536812901496887,
1237
+ 0.1477390080690384,
1238
+ 0.13101817667484283,
1239
+ 0.13425470888614655,
1240
+ 0.12961837649345398,
1241
+ 0.12532994151115417,
1242
+ 0.14014628529548645,
1243
+ 0.14592517912387848,
1244
+ 0.14245127141475677,
1245
+ 0.15898846089839935,
1246
+ 0.14187023043632507,
1247
+ 0.1525047868490219,
1248
+ 0.15915094316005707,
1249
+ 0.11151237040758133,
1250
+ 0.14539368450641632,
1251
+ 0.14672277867794037,
1252
+ 0.16712842881679535,
1253
+ 0.16369090974330902,
1254
+ 0.13350342214107513,
1255
+ 0.15044137835502625
1256
  ],
1257
  "base_loss": [
1258
+ 11.94859790802002,
1259
+ 11.840438842773438,
1260
+ 11.64094066619873,
1261
+ 11.324628829956055,
1262
+ 10.98892879486084,
1263
+ 10.639241218566895,
1264
+ 10.578450202941895,
1265
+ 10.493820190429688,
1266
+ 10.257356643676758,
1267
+ 10.105127334594727,
1268
+ 9.852198600769043,
1269
+ 9.760133743286133,
1270
+ 9.558384895324707,
1271
+ 9.300349235534668,
1272
+ 9.154654502868652,
1273
+ 8.889933586120605,
1274
+ 8.539278984069824,
1275
+ 8.303180694580078,
1276
+ 8.065629959106445,
1277
+ 7.7729902267456055,
1278
+ 7.503629207611084,
1279
+ 7.239531517028809,
1280
+ 6.897196292877197,
1281
+ 6.684809684753418,
1282
+ 6.3543596267700195,
1283
+ 6.028420448303223,
1284
+ 5.7321271896362305,
1285
+ 5.48431396484375,
1286
+ 5.272823333740234,
1287
+ 4.938896656036377,
1288
+ 4.6226396560668945,
1289
+ 4.419801235198975,
1290
+ 4.154916286468506,
1291
+ 3.8767247200012207,
1292
+ 3.7096211910247803,
1293
+ 3.4114320278167725,
1294
+ 3.246389389038086,
1295
+ 3.0310475826263428,
1296
+ 2.960927724838257,
1297
+ 2.78560209274292,
1298
+ 2.619802236557007,
1299
+ 2.522150754928589,
1300
+ 2.482307195663452,
1301
+ 2.383479356765747,
1302
+ 2.2891829013824463,
1303
+ 2.306661367416382,
1304
+ 2.2119381427764893,
1305
+ 2.176996946334839,
1306
+ 2.1396560668945312,
1307
+ 2.1182291507720947,
1308
+ 2.138927936553955,
1309
+ 2.099632740020752,
1310
+ 2.0461552143096924,
1311
+ 2.0577521324157715,
1312
+ 2.014800786972046,
1313
+ 2.057321786880493,
1314
+ 2.0552284717559814,
1315
+ 1.9784787893295288,
1316
+ 1.9504175186157227,
1317
+ 1.9787120819091797,
1318
+ 1.8843504190444946,
1319
+ 1.936767816543579,
1320
+ 1.9563958644866943,
1321
+ 1.8449238538742065,
1322
+ 1.8611806631088257,
1323
+ 1.928608775138855,
1324
+ 1.7879769802093506,
1325
+ 1.8571434020996094,
1326
+ 1.821972131729126,
1327
+ 1.8500518798828125,
1328
+ 1.861620306968689,
1329
+ 1.8515946865081787,
1330
+ 1.8433369398117065,
1331
+ 1.8581030368804932,
1332
+ 1.7452529668807983,
1333
+ 1.7694547176361084,
1334
+ 1.8078683614730835,
1335
+ 1.7968032360076904,
1336
+ 1.7500313520431519,
1337
+ 1.6263803243637085,
1338
+ 1.6820186376571655,
1339
+ 1.6738622188568115,
1340
+ 1.672284483909607,
1341
+ 1.655076026916504,
1342
+ 1.586304783821106,
1343
+ 1.4643189907073975,
1344
+ 1.5293394327163696,
1345
+ 1.5075138807296753,
1346
+ 1.5243085622787476,
1347
+ 1.5082238912582397,
1348
+ 1.4548567533493042,
1349
+ 1.3837165832519531,
1350
+ 1.41783607006073,
1351
+ 1.2917324304580688,
1352
+ 1.2966030836105347,
1353
+ 1.2949161529541016,
1354
+ 1.2839399576187134,
1355
+ 1.2788225412368774,
1356
+ 1.2647664546966553,
1357
+ 1.2145267724990845,
1358
+ 1.2092732191085815,
1359
+ 1.182142972946167,
1360
+ 1.1864356994628906,
1361
+ 1.121931791305542,
1362
+ 1.0740060806274414,
1363
+ 1.1255488395690918,
1364
+ 1.0717084407806396,
1365
+ 1.0892133712768555,
1366
+ 1.1152369976043701,
1367
+ 1.050582766532898,
1368
+ 1.0131772756576538,
1369
+ 1.0159205198287964,
1370
+ 0.9687409996986389,
1371
+ 0.9926728010177612,
1372
+ 0.9656931161880493,
1373
+ 0.9558922052383423,
1374
+ 0.9416300058364868,
1375
+ 0.9168248176574707,
1376
+ 0.9094190001487732,
1377
+ 0.9148558378219604,
1378
+ 0.8885840773582458,
1379
+ 0.8573789596557617,
1380
+ 0.8440743088722229,
1381
+ 0.8346267342567444,
1382
+ 0.8313559293746948,
1383
+ 0.8086395263671875,
1384
+ 0.8341150879859924,
1385
+ 0.8089955449104309,
1386
+ 0.8083120584487915,
1387
+ 0.7901385426521301,
1388
+ 0.8018090128898621,
1389
+ 0.7361825108528137,
1390
+ 0.7226907014846802,
1391
+ 0.7151471972465515,
1392
+ 0.6951335668563843,
1393
+ 0.6982666850090027,
1394
+ 0.6982697248458862,
1395
+ 0.7394556403160095,
1396
+ 0.7399061322212219,
1397
+ 0.7149839401245117,
1398
+ 0.6887863278388977,
1399
+ 0.7093302607536316,
1400
+ 0.6588761210441589,
1401
+ 0.6807154417037964,
1402
+ 0.6855794191360474,
1403
+ 0.6251148581504822,
1404
+ 0.6450304985046387,
1405
+ 0.6763089299201965,
1406
+ 0.6778842210769653,
1407
+ 0.6132544875144958,
1408
+ 0.6343541741371155,
1409
+ 0.611568808555603,
1410
+ 0.5893497467041016,
1411
+ 0.5825234651565552,
1412
+ 0.5738789439201355,
1413
+ 0.5681091547012329,
1414
+ 0.5831159949302673,
1415
+ 0.550575852394104,
1416
+ 0.5623425245285034,
1417
+ 0.5403538942337036,
1418
+ 0.5861159563064575,
1419
+ 0.5384799242019653,
1420
+ 0.5562480688095093,
1421
+ 0.5091863870620728,
1422
+ 0.5155102014541626,
1423
+ 0.5429946184158325,
1424
+ 0.527049720287323,
1425
+ 0.49557358026504517,
1426
+ 0.5270788073539734,
1427
+ 0.48558399081230164,
1428
+ 0.48906925320625305,
1429
+ 0.49761268496513367,
1430
+ 0.4692177176475525,
1431
+ 0.478564590215683,
1432
+ 0.4760856330394745,
1433
+ 0.44141462445259094,
1434
+ 0.4625226855278015,
1435
+ 0.4593048095703125,
1436
+ 0.44456854462623596,
1437
+ 0.5015004277229309,
1438
+ 0.45153093338012695,
1439
+ 0.4653579294681549,
1440
+ 0.4412159323692322,
1441
+ 0.46345633268356323,
1442
+ 0.4392021596431732,
1443
+ 0.42899444699287415,
1444
+ 0.4141101837158203,
1445
+ 0.414285808801651,
1446
+ 0.41810232400894165,
1447
+ 0.42292097210884094,
1448
+ 0.3962114751338959,
1449
+ 0.4087282121181488,
1450
+ 0.4289020001888275,
1451
+ 0.42968955636024475,
1452
+ 0.3943832814693451,
1453
+ 0.41051462292671204,
1454
+ 0.3950999677181244,
1455
+ 0.40754038095474243,
1456
+ 0.3994915783405304,
1457
+ 0.41414302587509155,
1458
+ 0.39105725288391113,
1459
+ 0.37961503863334656,
1460
+ 0.38364723324775696,
1461
+ 0.4205988347530365,
1462
+ 0.41498011350631714,
1463
+ 0.3854501247406006,
1464
+ 0.4055662751197815,
1465
+ 0.3829682171344757,
1466
+ 0.37448883056640625,
1467
+ 0.36969661712646484,
1468
+ 0.35262203216552734,
1469
+ 0.4099288284778595,
1470
+ 0.3834565579891205,
1471
+ 0.3552636504173279,
1472
+ 0.32924869656562805,
1473
+ 0.36005476117134094,
1474
+ 0.3339636027812958,
1475
+ 0.3542087972164154,
1476
+ 0.3503226339817047,
1477
+ 0.36919230222702026,
1478
+ 0.3485560715198517,
1479
+ 0.3238471448421478,
1480
+ 0.3353195786476135,
1481
+ 0.3599640429019928,
1482
+ 0.3437122702598572,
1483
+ 0.3387219309806824,
1484
+ 0.3057003617286682,
1485
+ 0.2906756103038788,
1486
+ 0.34256139397621155,
1487
+ 0.31671828031539917,
1488
+ 0.3027709126472473,
1489
+ 0.3286406099796295,
1490
+ 0.29700973629951477,
1491
+ 0.33028173446655273,
1492
+ 0.30202531814575195,
1493
+ 0.32036495208740234,
1494
+ 0.2988939583301544,
1495
+ 0.27923664450645447,
1496
+ 0.3316617012023926,
1497
+ 0.3278091251850128,
1498
+ 0.28900453448295593,
1499
+ 0.3363553583621979,
1500
+ 0.36397480964660645,
1501
+ 0.30927157402038574,
1502
+ 0.2704579532146454,
1503
+ 0.3233926594257355,
1504
+ 0.3281395137310028,
1505
+ 0.2813015282154083,
1506
+ 0.2918522357940674,
1507
+ 0.310793936252594,
1508
+ 0.26009702682495117,
1509
+ 0.3331923484802246,
1510
+ 0.28003939986228943,
1511
+ 0.33480802178382874,
1512
+ 0.28114771842956543,
1513
+ 0.28258591890335083,
1514
+ 0.2621360719203949,
1515
+ 0.26446372270584106,
1516
+ 0.25052106380462646,
1517
+ 0.24290919303894043,
1518
+ 0.28893521428108215,
1519
+ 0.28211459517478943,
1520
+ 0.26482027769088745,
1521
+ 0.2912617027759552,
1522
+ 0.2805217206478119,
1523
+ 0.262124240398407,
1524
+ 0.2579425275325775,
1525
+ 0.248891219496727,
1526
+ 0.293045312166214,
1527
+ 0.2911718189716339,
1528
+ 0.24228894710540771,
1529
+ 0.2836480736732483,
1530
+ 0.2810485363006592,
1531
+ 0.2615375816822052,
1532
+ 0.24184216558933258,
1533
+ 0.2451353818178177,
1534
+ 0.23175807297229767,
1535
+ 0.23173300921916962,
1536
+ 0.23882640898227692,
1537
+ 0.2664676904678345,
1538
+ 0.26882803440093994,
1539
+ 0.2690429091453552,
1540
+ 0.24577376246452332,
1541
+ 0.23808881640434265,
1542
+ 0.26410430669784546,
1543
+ 0.23443324863910675,
1544
+ 0.2398104965686798,
1545
+ 0.22221653163433075,
1546
+ 0.26233434677124023,
1547
+ 0.23280136287212372,
1548
+ 0.21146978437900543,
1549
+ 0.2486180067062378,
1550
+ 0.20887069404125214,
1551
+ 0.2606666386127472,
1552
+ 0.24783062934875488,
1553
+ 0.25852853059768677,
1554
+ 0.21136604249477386,
1555
+ 0.19507789611816406,
1556
+ 0.2310580462217331,
1557
+ 0.19218036532402039,
1558
+ 0.28741464018821716,
1559
+ 0.2194812297821045,
1560
+ 0.22402501106262207,
1561
+ 0.2472635954618454,
1562
+ 0.2330581098794937,
1563
+ 0.18851839005947113,
1564
+ 0.22764381766319275,
1565
+ 0.2213810235261917,
1566
+ 0.2095119208097458,
1567
+ 0.20449183881282806,
1568
+ 0.24300800263881683,
1569
+ 0.21304859220981598,
1570
+ 0.21145255863666534,
1571
+ 0.22761793434619904,
1572
+ 0.22742269933223724,
1573
+ 0.2193547636270523,
1574
+ 0.1930546760559082,
1575
+ 0.21217192709445953,
1576
+ 0.19071756303310394,
1577
+ 0.24465782940387726,
1578
+ 0.21515420079231262,
1579
+ 0.1957842856645584,
1580
+ 0.24550697207450867,
1581
+ 0.20868192613124847,
1582
+ 0.20368941128253937,
1583
+ 0.19286994636058807,
1584
+ 0.25735655426979065,
1585
+ 0.19909431040287018,
1586
+ 0.22133401036262512,
1587
+ 0.19772383570671082,
1588
+ 0.2013375461101532,
1589
+ 0.23962707817554474,
1590
+ 0.19507475197315216,
1591
+ 0.22025993466377258,
1592
+ 0.16754481196403503,
1593
+ 0.21235434710979462,
1594
+ 0.23731891810894012,
1595
+ 0.20245544612407684,
1596
+ 0.2115701586008072,
1597
+ 0.18609514832496643,
1598
+ 0.19329078495502472,
1599
+ 0.191462442278862,
1600
+ 0.21664702892303467,
1601
+ 0.2282712459564209,
1602
+ 0.1856536567211151,
1603
+ 0.21686986088752747,
1604
+ 0.21144740283489227,
1605
+ 0.2090771198272705,
1606
+ 0.18183530867099762,
1607
+ 0.2151528149843216,
1608
+ 0.21354100108146667,
1609
+ 0.189223974943161,
1610
+ 0.2282743752002716,
1611
+ 0.18897734582424164,
1612
+ 0.1972484588623047,
1613
+ 0.18682360649108887,
1614
+ 0.19328011572360992,
1615
+ 0.22190427780151367,
1616
+ 0.17040297389030457,
1617
+ 0.2322014719247818,
1618
+ 0.17299620807170868,
1619
+ 0.19289156794548035,
1620
+ 0.19416391849517822,
1621
+ 0.2013746052980423,
1622
+ 0.16254912316799164,
1623
+ 0.19851411879062653,
1624
+ 0.17761853337287903,
1625
+ 0.19167092442512512,
1626
+ 0.16775621473789215,
1627
+ 0.19005520641803741,
1628
+ 0.18174482882022858,
1629
+ 0.1820433884859085,
1630
+ 0.1686544120311737,
1631
+ 0.17813602089881897,
1632
+ 0.18956564366817474,
1633
+ 0.18593311309814453,
1634
+ 0.15203449130058289,
1635
+ 0.16920749843120575,
1636
+ 0.1750987023115158,
1637
+ 0.1903667151927948,
1638
+ 0.21415254473686218,
1639
+ 0.19226084649562836,
1640
+ 0.16631709039211273,
1641
+ 0.22381491959095,
1642
+ 0.1565556824207306,
1643
+ 0.178656667470932,
1644
+ 0.16794231534004211,
1645
+ 0.2035113424062729,
1646
+ 0.15502004325389862,
1647
+ 0.2080964893102646,
1648
+ 0.16120600700378418,
1649
+ 0.17409558594226837,
1650
+ 0.18227358162403107,
1651
+ 0.15712487697601318,
1652
+ 0.1848597526550293,
1653
+ 0.22843310236930847,
1654
+ 0.1544765681028366,
1655
+ 0.16624769568443298,
1656
+ 0.18198521435260773,
1657
+ 0.18578679859638214,
1658
+ 0.1581115424633026,
1659
+ 0.1675325185060501,
1660
+ 0.17194543778896332,
1661
+ 0.1624213606119156,
1662
+ 0.15694668889045715,
1663
+ 0.14574037492275238,
1664
+ 0.19108226895332336,
1665
+ 0.20138464868068695,
1666
+ 0.16426335275173187,
1667
+ 0.1591721922159195,
1668
+ 0.21045982837677002,
1669
+ 0.1457131952047348,
1670
+ 0.16802136600017548,
1671
+ 0.17403490841388702,
1672
+ 0.1713341623544693,
1673
+ 0.18586938083171844,
1674
+ 0.1811138540506363,
1675
+ 0.14901359379291534,
1676
+ 0.16701973974704742,
1677
+ 0.1604980081319809,
1678
+ 0.1447248011827469,
1679
+ 0.12645721435546875,
1680
+ 0.16988113522529602,
1681
+ 0.16805614531040192,
1682
+ 0.16320255398750305,
1683
+ 0.1912159025669098,
1684
+ 0.1462724506855011,
1685
+ 0.170498326420784,
1686
+ 0.15915271639823914,
1687
+ 0.16366423666477203,
1688
+ 0.13886980712413788,
1689
+ 0.1343265324831009,
1690
+ 0.17708076536655426,
1691
+ 0.14998695254325867,
1692
+ 0.17960231006145477,
1693
+ 0.156875342130661,
1694
+ 0.15048567950725555,
1695
+ 0.18893864750862122,
1696
+ 0.16481050848960876,
1697
+ 0.16897451877593994,
1698
+ 0.15260040760040283,
1699
+ 0.11548135429620743,
1700
+ 0.1384969800710678,
1701
+ 0.15838623046875,
1702
+ 0.15708640217781067,
1703
+ 0.1365736722946167,
1704
+ 0.18369968235492706,
1705
+ 0.16396315395832062,
1706
+ 0.1661636084318161,
1707
+ 0.17268292605876923,
1708
+ 0.19088900089263916,
1709
+ 0.14722244441509247,
1710
+ 0.19560489058494568,
1711
+ 0.14434412121772766,
1712
+ 0.16260702908039093,
1713
+ 0.1408696174621582,
1714
+ 0.168971449136734,
1715
+ 0.17031393945217133,
1716
+ 0.14803941547870636,
1717
+ 0.16470588743686676,
1718
+ 0.1566447913646698,
1719
+ 0.20542870461940765,
1720
+ 0.1710493564605713,
1721
+ 0.18512073159217834,
1722
+ 0.1473395824432373,
1723
+ 0.15322645008563995,
1724
+ 0.1436643898487091,
1725
+ 0.1595940738916397,
1726
+ 0.18959307670593262,
1727
+ 0.15076453983783722,
1728
+ 0.1550501435995102,
1729
+ 0.18462494015693665,
1730
+ 0.1573774814605713,
1731
+ 0.16171033680438995,
1732
+ 0.15045134723186493,
1733
+ 0.17703239619731903,
1734
+ 0.14783276617527008,
1735
+ 0.16923877596855164,
1736
+ 0.1926303207874298,
1737
+ 0.19895410537719727,
1738
+ 0.17747224867343903,
1739
+ 0.19405317306518555,
1740
+ 0.14278696477413177,
1741
+ 0.11125724017620087,
1742
+ 0.1512247622013092,
1743
+ 0.15484271943569183,
1744
+ 0.12213726341724396,
1745
+ 0.162554070353508,
1746
+ 0.16736207902431488,
1747
+ 0.13373737037181854,
1748
+ 0.17999958992004395,
1749
+ 0.15072117745876312,
1750
+ 0.1646808385848999,
1751
+ 0.14545665681362152,
1752
+ 0.16830526292324066,
1753
+ 0.12739090621471405,
1754
+ 0.10390200465917587,
1755
+ 0.17377625405788422,
1756
+ 0.16494156420230865,
1757
+ 0.13795830309391022,
1758
+ 0.14690710604190826,
1759
+ 0.12438012659549713,
1760
+ 0.14515355229377747,
1761
+ 0.14279648661613464,
1762
+ 0.12356720119714737,
1763
+ 0.13406822085380554,
1764
+ 0.13987518846988678,
1765
+ 0.1401202231645584,
1766
+ 0.1880432814359665,
1767
+ 0.18093450367450714,
1768
+ 0.1592968851327896,
1769
+ 0.17524102330207825,
1770
+ 0.14916424453258514,
1771
+ 0.15612027049064636,
1772
+ 0.1444508135318756,
1773
+ 0.1407918483018875,
1774
+ 0.12171785533428192,
1775
+ 0.13664177060127258,
1776
+ 0.17838147282600403,
1777
+ 0.15328529477119446,
1778
+ 0.12011374533176422,
1779
+ 0.1469172239303589,
1780
+ 0.15794073045253754,
1781
+ 0.16883553564548492,
1782
+ 0.14052900671958923,
1783
+ 0.18314428627490997,
1784
+ 0.16488347947597504,
1785
+ 0.11859999597072601,
1786
+ 0.14168976247310638,
1787
+ 0.14921541512012482,
1788
+ 0.17350149154663086,
1789
+ 0.17269587516784668,
1790
+ 0.13202032446861267,
1791
+ 0.15186575055122375,
1792
+ 0.13948580622673035,
1793
+ 0.1624227911233902,
1794
+ 0.14760947227478027,
1795
+ 0.1712327003479004,
1796
+ 0.13646575808525085,
1797
+ 0.14727208018302917,
1798
+ 0.1506548374891281,
1799
+ 0.15115901827812195,
1800
+ 0.12916085124015808,
1801
+ 0.15773268043994904,
1802
+ 0.14408628642559052,
1803
+ 0.12364902347326279,
1804
+ 0.11989416927099228,
1805
+ 0.15335413813591003,
1806
+ 0.1312076300382614,
1807
+ 0.1278875768184662,
1808
+ 0.1595352590084076,
1809
+ 0.17473769187927246,
1810
+ 0.139608696103096,
1811
+ 0.12069375813007355,
1812
+ 0.12792883813381195,
1813
+ 0.14324326813220978,
1814
+ 0.16077683866024017,
1815
+ 0.13171955943107605,
1816
+ 0.12155797332525253,
1817
+ 0.14841106534004211,
1818
+ 0.14754816889762878,
1819
+ 0.13991589844226837,
1820
+ 0.14981675148010254,
1821
+ 0.15216532349586487,
1822
+ 0.14560063183307648,
1823
+ 0.15437017381191254,
1824
+ 0.1558697521686554,
1825
+ 0.12956713140010834,
1826
+ 0.1606471836566925,
1827
+ 0.1396074891090393,
1828
+ 0.1678946316242218,
1829
+ 0.14537987112998962,
1830
+ 0.12508836388587952,
1831
+ 0.13816596567630768,
1832
+ 0.1560039520263672,
1833
+ 0.13455283641815186,
1834
+ 0.13224788010120392,
1835
+ 0.14797094464302063,
1836
+ 0.1326710730791092,
1837
+ 0.12267882376909256,
1838
+ 0.15669405460357666,
1839
+ 0.1376783549785614,
1840
+ 0.13550518453121185,
1841
+ 0.15633442997932434,
1842
+ 0.13264869153499603,
1843
+ 0.15317828953266144,
1844
+ 0.13146944344043732,
1845
+ 0.1285228580236435,
1846
+ 0.14777718484401703,
1847
+ 0.10374288260936737,
1848
+ 0.13567717373371124,
1849
+ 0.11807692050933838,
1850
+ 0.14930854737758636,
1851
+ 0.10957635939121246,
1852
+ 0.1414402723312378,
1853
+ 0.16026048362255096,
1854
+ 0.1368298977613449,
1855
+ 0.12276037782430649,
1856
+ 0.1520388275384903,
1857
+ 0.14423634111881256,
1858
+ 0.1343306601047516,
1859
+ 0.15340445935726166,
1860
+ 0.14033977687358856,
1861
+ 0.11754585802555084,
1862
+ 0.13860604166984558,
1863
+ 0.14536812901496887,
1864
+ 0.1477390080690384,
1865
+ 0.13101817667484283,
1866
+ 0.13425470888614655,
1867
+ 0.12961837649345398,
1868
+ 0.12532994151115417,
1869
+ 0.14014628529548645,
1870
+ 0.14592517912387848,
1871
+ 0.14245127141475677,
1872
+ 0.15898846089839935,
1873
+ 0.14187023043632507,
1874
+ 0.1525047868490219,
1875
+ 0.15915094316005707,
1876
+ 0.11151237040758133,
1877
+ 0.14539368450641632,
1878
+ 0.14672277867794037,
1879
+ 0.16712842881679535,
1880
+ 0.16369090974330902,
1881
+ 0.13350342214107513,
1882
+ 0.15044137835502625
1883
  ],
1884
  "lr": [
1885
  1.0458911419423694e-06,
 
2554
  ],
2555
  "eval_accuracy": [
2556
  0.0,
2557
+ 0.005263157894736842,
2558
+ 0.015789473684210527,
2559
+ 0.08105263157894736,
2560
+ 0.11578947368421053,
2561
+ 0.22842105263157894,
2562
+ 0.2768421052631579,
2563
+ 0.3231578947368421,
2564
+ 0.38842105263157894,
2565
+ 0.4652631578947368,
2566
+ 0.5305263157894737,
2567
+ 0.4968421052631579,
2568
+ 0.5389473684210526,
2569
+ 0.5621052631578948,
2570
+ 0.5831578947368421,
2571
+ 0.5631578947368421,
2572
+ 0.5894736842105263,
2573
+ 0.5768421052631579,
2574
+ 0.588421052631579,
2575
+ 0.5831578947368421
2576
  ]
2577
  },
2578
+ "final_accuracy": 0.515,
2579
  "sft_eval": {
2580
  "config": {
2581
  "ops": "add_sub",
2582
  "K": null,
2583
  "mode": "sft",
2584
  "n_digits": 6,
2585
+ "n_per_split": 100
2586
  },
2587
  "splits": {
2588
  "add_S0": {
2589
+ "full_accuracy": 0.89,
2590
+ "digit_accuracy": 0.9828571428571429,
2591
+ "n_examples": 100,
2592
  "per_subtask": {
2593
  "SA": {
2594
+ "accuracy": 0.9834710743801653,
2595
+ "count": 605
2596
  },
2597
  "SS": {
2598
+ "accuracy": 0.9789473684210527,
2599
+ "count": 95
2600
  }
2601
  }
2602
  },
2603
  "add_S1": {
2604
+ "full_accuracy": 0.88,
2605
+ "digit_accuracy": 0.9828571428571429,
2606
+ "n_examples": 100,
2607
  "per_subtask": {
2608
  "SA": {
2609
+ "accuracy": 0.9754901960784313,
2610
+ "count": 204
2611
  },
2612
  "SC": {
2613
+ "accuracy": 0.9881656804733728,
2614
+ "count": 169
2615
  },
2616
  "SS": {
2617
+ "accuracy": 1.0,
2618
+ "count": 31
2619
  },
2620
  "UC": {
2621
+ "accuracy": 0.9831081081081081,
2622
+ "count": 296
2623
  }
2624
  }
2625
  },
2626
  "add_S2": {
2627
+ "full_accuracy": 0.47,
2628
+ "digit_accuracy": 0.9171428571428571,
2629
+ "n_examples": 100,
2630
  "per_subtask": {
2631
  "SA": {
2632
+ "accuracy": 0.9693251533742331,
2633
+ "count": 163
2634
  },
2635
  "SC": {
2636
+ "accuracy": 0.9846153846153847,
2637
+ "count": 130
2638
  },
2639
  "SS": {
2640
+ "accuracy": 0.9655172413793104,
2641
+ "count": 87
2642
  },
2643
  "UC": {
2644
+ "accuracy": 0.7635467980295566,
2645
+ "count": 203
2646
  },
2647
  "US": {
2648
  "accuracy": 1.0,
2649
+ "count": 117
2650
  }
2651
  }
2652
  },
2653
  "add_S3": {
2654
+ "full_accuracy": 0.42,
2655
+ "digit_accuracy": 0.8785714285714286,
2656
+ "n_examples": 100,
2657
  "per_subtask": {
2658
  "SA": {
2659
+ "accuracy": 0.9669421487603306,
2660
+ "count": 121
2661
  },
2662
  "SC": {
2663
+ "accuracy": 0.9917355371900827,
2664
+ "count": 121
2665
  },
2666
  "SS": {
2667
+ "accuracy": 0.9795918367346939,
2668
+ "count": 49
2669
  },
2670
  "UC": {
2671
+ "accuracy": 0.7741935483870968,
2672
+ "count": 186
2673
  },
2674
  "US": {
2675
+ "accuracy": 0.8340807174887892,
2676
+ "count": 223
2677
  }
2678
  }
2679
  },
2680
  "add_S4": {
2681
+ "full_accuracy": 0.43,
2682
+ "digit_accuracy": 0.8085714285714286,
2683
+ "n_examples": 100,
2684
  "per_subtask": {
2685
  "SA": {
2686
  "accuracy": 1.0,
2687
+ "count": 104
2688
  },
2689
  "SC": {
2690
+ "accuracy": 0.9716981132075472,
2691
+ "count": 106
2692
  },
2693
  "SS": {
2694
  "accuracy": 1.0,
2695
+ "count": 23
2696
  },
2697
  "UC": {
2698
+ "accuracy": 0.7625,
2699
+ "count": 160
2700
  },
2701
  "US": {
2702
+ "accuracy": 0.6970684039087948,
2703
+ "count": 307
2704
  }
2705
  }
2706
  },
2707
  "add_S5": {
2708
+ "full_accuracy": 0.17,
2709
+ "digit_accuracy": 0.5985714285714285,
2710
+ "n_examples": 100,
2711
  "per_subtask": {
2712
  "SA": {
2713
  "accuracy": 1.0,
2714
+ "count": 100
2715
  },
2716
  "SC": {
2717
  "accuracy": 1.0,
2718
+ "count": 100
2719
  },
2720
  "UC": {
2721
+ "accuracy": 0.33,
2722
+ "count": 100
2723
  },
2724
  "US": {
2725
+ "accuracy": 0.465,
2726
+ "count": 400
2727
  }
2728
  }
2729
  },
2730
  "add_S6": {
2731
+ "full_accuracy": 0.52,
2732
+ "digit_accuracy": 0.72,
2733
+ "n_examples": 100,
2734
  "per_subtask": {
2735
  "SC": {
2736
  "accuracy": 1.0,
2737
+ "count": 100
2738
  },
2739
  "UC": {
2740
+ "accuracy": 0.64,
2741
+ "count": 100
2742
  },
2743
  "US": {
2744
+ "accuracy": 0.68,
2745
+ "count": 500
2746
  }
2747
  }
2748
  },
2749
  "add_random": {
2750
+ "full_accuracy": 0.76,
2751
+ "digit_accuracy": 0.9635714285714285,
2752
  "n_examples": 200,
2753
  "per_subtask": {
2754
  "SA": {
2755
+ "accuracy": 0.9731543624161074,
2756
+ "count": 447
2757
  },
2758
  "SC": {
2759
+ "accuracy": 0.975,
2760
+ "count": 320
2761
  },
2762
  "SS": {
2763
  "accuracy": 1.0,
2764
+ "count": 56
2765
  },
2766
  "UC": {
2767
+ "accuracy": 0.943289224952741,
2768
+ "count": 529
2769
  },
2770
  "US": {
2771
+ "accuracy": 0.9791666666666666,
2772
+ "count": 48
2773
  }
2774
  }
2775
  },
2776
  "add_C1": {
2777
+ "full_accuracy": 0.91,
2778
+ "digit_accuracy": 0.9857142857142858,
2779
+ "n_examples": 100,
2780
  "per_subtask": {
2781
  "SA": {
2782
+ "accuracy": 0.988,
2783
+ "count": 500
2784
  },
2785
  "SC": {
2786
  "accuracy": 1.0,
2787
+ "count": 100
2788
  },
2789
  "UC": {
2790
+ "accuracy": 0.96,
2791
+ "count": 100
2792
  }
2793
  }
2794
  },
2795
  "add_C2": {
2796
+ "full_accuracy": 0.71,
2797
  "digit_accuracy": 0.9571428571428572,
2798
+ "n_examples": 100,
2799
  "per_subtask": {
2800
  "SA": {
2801
  "accuracy": 0.99,
2802
+ "count": 400
2803
  },
2804
  "SC": {
2805
+ "accuracy": 1.0,
2806
+ "count": 100
2807
  },
2808
  "UC": {
2809
+ "accuracy": 0.8333333333333334,
2810
+ "count": 156
2811
  },
2812
  "US": {
2813
  "accuracy": 1.0,
2814
+ "count": 44
2815
  }
2816
  }
2817
  },
2818
  "add_C3": {
2819
+ "full_accuracy": 0.4,
2820
+ "digit_accuracy": 0.8942857142857142,
2821
+ "n_examples": 100,
2822
  "per_subtask": {
2823
  "SA": {
2824
+ "accuracy": 0.9966666666666667,
2825
+ "count": 300
2826
  },
2827
  "SC": {
2828
  "accuracy": 1.0,
2829
+ "count": 100
2830
  },
2831
  "UC": {
2832
+ "accuracy": 0.6984924623115578,
2833
+ "count": 199
2834
  },
2835
  "US": {
2836
+ "accuracy": 0.8712871287128713,
2837
+ "count": 101
2838
  }
2839
  }
2840
  },
2841
  "add_C4": {
2842
+ "full_accuracy": 0.41,
2843
+ "digit_accuracy": 0.8814285714285715,
2844
+ "n_examples": 100,
2845
  "per_subtask": {
2846
  "SA": {
2847
  "accuracy": 1.0,
2848
+ "count": 200
2849
  },
2850
  "SC": {
2851
  "accuracy": 1.0,
2852
+ "count": 100
2853
  },
2854
  "UC": {
2855
+ "accuracy": 0.7727272727272727,
2856
+ "count": 264
2857
  },
2858
  "US": {
2859
+ "accuracy": 0.8308823529411765,
2860
+ "count": 136
2861
  }
2862
  }
2863
  },
2864
  "add_C5": {
2865
+ "full_accuracy": 0.39,
2866
+ "digit_accuracy": 0.8728571428571429,
2867
+ "n_examples": 100,
2868
  "per_subtask": {
2869
  "SA": {
2870
  "accuracy": 1.0,
2871
+ "count": 100
2872
  },
2873
  "SC": {
2874
+ "accuracy": 1.0,
2875
+ "count": 100
2876
  },
2877
  "UC": {
2878
+ "accuracy": 0.7870967741935484,
2879
+ "count": 310
2880
  },
2881
  "US": {
2882
+ "accuracy": 0.8789473684210526,
2883
+ "count": 190
2884
  }
2885
  }
2886
  },
2887
  "add_C6": {
2888
+ "full_accuracy": 0.42,
2889
+ "digit_accuracy": 0.8742857142857143,
2890
+ "n_examples": 100,
2891
  "per_subtask": {
2892
  "SC": {
2893
  "accuracy": 1.0,
2894
+ "count": 100
2895
  },
2896
  "UC": {
2897
+ "accuracy": 0.845945945945946,
2898
+ "count": 370
2899
  },
2900
  "US": {
2901
+ "accuracy": 0.8652173913043478,
2902
+ "count": 230
2903
  }
2904
  }
2905
  },
2906
  "sub_M0": {
2907
  "full_accuracy": 0.92,
2908
  "digit_accuracy": 0.9885714285714285,
2909
+ "n_examples": 100,
2910
  "per_subtask": {
2911
  "MD": {
2912
+ "accuracy": 0.9869918699186991,
2913
+ "count": 615
2914
  },
2915
  "ME": {
2916
  "accuracy": 1.0,
2917
+ "count": 85
2918
  }
2919
  }
2920
  },
2921
  "sub_M1": {
2922
+ "full_accuracy": 0.87,
2923
+ "digit_accuracy": 0.9785714285714285,
2924
+ "n_examples": 100,
2925
  "per_subtask": {
2926
  "MD": {
2927
+ "accuracy": 0.9965753424657534,
2928
+ "count": 292
2929
  },
2930
  "MB": {
2931
+ "accuracy": 0.9583333333333334,
2932
+ "count": 144
2933
  },
2934
  "ME": {
2935
+ "accuracy": 1.0,
2936
+ "count": 25
2937
  },
2938
  "UB": {
2939
+ "accuracy": 0.9665271966527197,
2940
+ "count": 239
2941
  }
2942
  }
2943
  },
2944
  "sub_M2": {
2945
+ "full_accuracy": 0.25,
2946
+ "digit_accuracy": 0.8685714285714285,
2947
+ "n_examples": 100,
2948
  "per_subtask": {
2949
  "MD": {
2950
+ "accuracy": 0.976303317535545,
2951
+ "count": 211
2952
  },
2953
  "MB": {
2954
+ "accuracy": 0.9391304347826087,
2955
+ "count": 115
2956
  },
2957
  "ME": {
2958
+ "accuracy": 0.9764705882352941,
2959
+ "count": 85
2960
  },
2961
  "UB": {
2962
+ "accuracy": 0.574585635359116,
2963
+ "count": 181
2964
  },
2965
  "UD": {
2966
+ "accuracy": 0.9907407407407407,
2967
+ "count": 108
2968
  }
2969
  }
2970
  },
2971
  "sub_M3": {
2972
+ "full_accuracy": 0.08,
2973
+ "digit_accuracy": 0.7985714285714286,
2974
+ "n_examples": 100,
2975
  "per_subtask": {
2976
  "MD": {
2977
+ "accuracy": 1.0,
2978
+ "count": 179
2979
  },
2980
  "MB": {
2981
  "accuracy": 1.0,
2982
+ "count": 103
2983
  },
2984
  "ME": {
2985
  "accuracy": 1.0,
2986
+ "count": 56
2987
  },
2988
  "UB": {
2989
+ "accuracy": 0.4966442953020134,
2990
+ "count": 149
2991
  },
2992
  "UD": {
2993
+ "accuracy": 0.6901408450704225,
2994
+ "count": 213
2995
  }
2996
  }
2997
  },
2998
  "sub_M4": {
2999
  "full_accuracy": 0.1,
3000
+ "digit_accuracy": 0.7171428571428572,
3001
+ "n_examples": 100,
3002
  "per_subtask": {
3003
  "MD": {
3004
  "accuracy": 1.0,
3005
+ "count": 200
3006
  },
3007
  "MB": {
3008
+ "accuracy": 0.99,
3009
+ "count": 100
3010
  },
3011
  "UB": {
3012
+ "accuracy": 0.31,
3013
+ "count": 100
3014
  },
3015
  "UD": {
3016
+ "accuracy": 0.5733333333333334,
3017
+ "count": 300
3018
  }
3019
  }
3020
  },
3021
  "sub_M5": {
3022
+ "full_accuracy": 0.07,
3023
+ "digit_accuracy": 0.6271428571428571,
3024
+ "n_examples": 100,
3025
  "per_subtask": {
3026
  "MD": {
3027
  "accuracy": 1.0,
3028
+ "count": 100
3029
  },
3030
  "MB": {
3031
  "accuracy": 1.0,
3032
+ "count": 100
3033
  },
3034
  "UB": {
3035
+ "accuracy": 0.35,
3036
+ "count": 100
3037
  },
3038
  "UD": {
3039
+ "accuracy": 0.51,
3040
+ "count": 400
3041
  }
3042
  }
3043
  },
3044
  "sub_random": {
3045
+ "full_accuracy": 0.775,
3046
+ "digit_accuracy": 0.9642857142857143,
3047
  "n_examples": 200,
3048
  "per_subtask": {
3049
  "MD": {
3050
+ "accuracy": 0.9916666666666667,
3051
+ "count": 600
3052
  },
3053
  "MB": {
3054
+ "accuracy": 0.9737827715355806,
3055
+ "count": 267
3056
  },
3057
  "ME": {
3058
+ "accuracy": 0.9811320754716981,
3059
  "count": 53
3060
  },
3061
  "UB": {
3062
+ "accuracy": 0.9179954441913439,
3063
+ "count": 439
3064
  },
3065
  "UD": {
3066
+ "accuracy": 0.975609756097561,
3067
+ "count": 41
3068
  }
3069
  }
3070
  },
3071
  "sub_B3": {
3072
+ "full_accuracy": 0.44,
3073
+ "digit_accuracy": 0.9071428571428571,
3074
+ "n_examples": 100,
3075
  "per_subtask": {
3076
  "MD": {
3077
  "accuracy": 1.0,
3078
+ "count": 300
3079
  },
3080
  "MB": {
3081
  "accuracy": 1.0,
3082
+ "count": 100
3083
  },
3084
  "UB": {
3085
+ "accuracy": 0.7309644670050761,
3086
+ "count": 197
3087
  },
3088
  "UD": {
3089
+ "accuracy": 0.883495145631068,
3090
+ "count": 103
3091
  }
3092
  }
3093
  },
3094
  "sub_B4": {
3095
+ "full_accuracy": 0.31,
3096
+ "digit_accuracy": 0.85,
3097
+ "n_examples": 100,
3098
  "per_subtask": {
3099
  "MD": {
3100
  "accuracy": 1.0,
3101
+ "count": 200
3102
  },
3103
  "MB": {
3104
  "accuracy": 1.0,
3105
+ "count": 100
3106
  },
3107
  "UB": {
3108
+ "accuracy": 0.7408906882591093,
3109
+ "count": 247
3110
  },
3111
  "UD": {
3112
+ "accuracy": 0.7320261437908496,
3113
+ "count": 153
3114
  }
3115
  }
3116
  },
3117
  "sub_B5": {
3118
+ "full_accuracy": 0.26,
3119
+ "digit_accuracy": 0.82,
3120
+ "n_examples": 100,
3121
  "per_subtask": {
3122
  "MD": {
3123
  "accuracy": 1.0,
3124
+ "count": 100
3125
  },
3126
  "MB": {
3127
  "accuracy": 1.0,
3128
+ "count": 100
3129
  },
3130
  "UB": {
3131
+ "accuracy": 0.714765100671141,
3132
+ "count": 298
3133
  },
3134
  "UD": {
3135
+ "accuracy": 0.7970297029702971,
3136
+ "count": 202
3137
  }
3138
  }
3139
  }
3140
  },
3141
  "summary": {
3142
+ "overall_accuracy": 0.515,
3143
+ "digit_accuracy": 0.8754395604395604,
3144
+ "total_examples": 2600,
3145
  "n_splits": 24
3146
  }
3147
  }
add_sub_baseline_100K_2L1H128d/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90899c085fd806de1c96d1f47c079dfeca53ab362dd0abf3388a5cb6e783183b
3
  size 157692826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc9a0ac6c9a2be1c3135f9224fd4bb5af35b77bbf1db0e8323800020dd009d0d
3
  size 157692826
add_sub_baseline_100K_2L1H128d/train_config.json CHANGED
@@ -69,16 +69,20 @@
69
  "no_wandb": false,
70
  "n_params": 39346560,
71
  "run_name": "add_sub_baseline_100K_2L1H128d",
72
- "git_commit": "f447da529caceac8c7d256cbb2cd185cbc50feac",
73
- "timestamp": "2026-04-12T15:44:58.727569+00:00",
74
  "tokenizer": "Qwen/Qwen3-0.6B",
75
  "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
  "dataset_config": "add_sub_6digit",
 
77
  "model_repo": "thoughtworks/arithmetic-sorl",
78
  "trainer_version": "sft",
79
- "wandb_run_id": "w68hfkt2",
80
- "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/w68hfkt2",
81
- "final_accuracy": 0.5025,
82
- "sft_accuracy": 0.5025,
 
 
 
83
  "eval_method": "ArithmeticEvaluator"
84
  }
 
69
  "no_wandb": false,
70
  "n_params": 39346560,
71
  "run_name": "add_sub_baseline_100K_2L1H128d",
72
+ "git_commit": "1d5a160e16a5070d61b881494e832aa88149b15c",
73
+ "timestamp": "2026-04-15T03:15:39.062270+00:00",
74
  "tokenizer": "Qwen/Qwen3-0.6B",
75
  "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
  "dataset_config": "add_sub_6digit",
77
+ "train_dataset": "fixed_train/train_100K_seed42.pt",
78
  "model_repo": "thoughtworks/arithmetic-sorl",
79
  "trainer_version": "sft",
80
+ "wandb_run_id": "vb53usae",
81
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/vb53usae",
82
+ "eval_final_dataset": "eval_sets/eval_add_sub_6d_N100_seed42.json",
83
+ "eval_epoch_dataset": "eval_sets/eval_add_sub_6d_N25_seed42.json",
84
+ "eval_hf_repo": "thoughtworks/arithmetic-sorl-data",
85
+ "final_accuracy": 0.515,
86
+ "sft_accuracy": 0.515,
87
  "eval_method": "ArithmeticEvaluator"
88
  }