amirali1985 commited on
Commit
295fd9c
·
verified ·
1 Parent(s): 0db8692

Upload add_sub_sorl_v1_abs1_K1_10K

Browse files
add_sub_sorl_v1_abs1_K1_10K/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SorlModelWrapper"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": null,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 510,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2040,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 128,
20
+ "max_window_layers": 28,
21
+ "model_type": "qwen3",
22
+ "num_attention_heads": 3,
23
+ "num_hidden_layers": 2,
24
+ "num_key_value_heads": 3,
25
+ "pad_token_id": null,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_parameters": {
28
+ "rope_theta": 10000.0,
29
+ "rope_type": "default"
30
+ },
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "transformers_version": "5.5.0",
34
+ "use_cache": true,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151645
37
+ }
add_sub_sorl_v1_abs1_K1_10K/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": false,
4
+ "output_hidden_states": false,
5
+ "transformers_version": "5.5.0",
6
+ "use_cache": true
7
+ }
add_sub_sorl_v1_abs1_K1_10K/metrics.json ADDED
@@ -0,0 +1,1687 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "step": [
4
+ 50,
5
+ 100,
6
+ 150,
7
+ 207,
8
+ 257,
9
+ 307,
10
+ 364,
11
+ 414,
12
+ 464,
13
+ 521,
14
+ 571,
15
+ 621,
16
+ 678,
17
+ 728,
18
+ 778,
19
+ 835,
20
+ 885,
21
+ 935,
22
+ 992,
23
+ 1042,
24
+ 1092,
25
+ 1149,
26
+ 1199,
27
+ 1249,
28
+ 1306,
29
+ 1356,
30
+ 1406,
31
+ 1463,
32
+ 1513,
33
+ 1563,
34
+ 1620,
35
+ 1670,
36
+ 1720,
37
+ 1777,
38
+ 1827,
39
+ 1877,
40
+ 1934,
41
+ 1984,
42
+ 2034,
43
+ 2091,
44
+ 2141,
45
+ 2191,
46
+ 2248,
47
+ 2298,
48
+ 2348,
49
+ 2405,
50
+ 2455,
51
+ 2505,
52
+ 2562,
53
+ 2612,
54
+ 2662,
55
+ 2719,
56
+ 2769,
57
+ 2819,
58
+ 2876,
59
+ 2926,
60
+ 2976,
61
+ 3033,
62
+ 3083,
63
+ 3133
64
+ ],
65
+ "loss": [
66
+ NaN,
67
+ NaN,
68
+ NaN,
69
+ NaN,
70
+ NaN,
71
+ NaN,
72
+ NaN,
73
+ NaN,
74
+ NaN,
75
+ NaN,
76
+ NaN,
77
+ NaN,
78
+ NaN,
79
+ NaN,
80
+ NaN,
81
+ NaN,
82
+ NaN,
83
+ NaN,
84
+ NaN,
85
+ NaN,
86
+ NaN,
87
+ NaN,
88
+ NaN,
89
+ NaN,
90
+ NaN,
91
+ NaN,
92
+ NaN,
93
+ NaN,
94
+ NaN,
95
+ NaN,
96
+ NaN,
97
+ NaN,
98
+ NaN,
99
+ NaN,
100
+ NaN,
101
+ NaN,
102
+ NaN,
103
+ NaN,
104
+ NaN,
105
+ NaN,
106
+ NaN,
107
+ NaN,
108
+ NaN,
109
+ NaN,
110
+ NaN,
111
+ NaN,
112
+ NaN,
113
+ NaN,
114
+ NaN,
115
+ NaN,
116
+ NaN,
117
+ NaN,
118
+ NaN,
119
+ NaN,
120
+ NaN,
121
+ NaN,
122
+ NaN,
123
+ NaN,
124
+ NaN,
125
+ NaN
126
+ ],
127
+ "base_loss": [
128
+ 7.768276691436768,
129
+ 4.110814094543457,
130
+ 2.00011944770813,
131
+ 1.8781509399414062,
132
+ 1.8382772207260132,
133
+ 1.812304139137268,
134
+ 1.7306851148605347,
135
+ 1.727476716041565,
136
+ 1.7197872400283813,
137
+ 1.6057711839675903,
138
+ 1.2709606885910034,
139
+ 1.1420778036117554,
140
+ 0.902216911315918,
141
+ 0.8765246272087097,
142
+ 0.771567165851593,
143
+ 0.7372206449508667,
144
+ 0.6973976492881775,
145
+ 0.702940046787262,
146
+ 0.6661996841430664,
147
+ 0.594870388507843,
148
+ 0.5054638981819153,
149
+ 0.5699015259742737,
150
+ 0.5439233183860779,
151
+ 0.5391989946365356,
152
+ 0.4917656481266022,
153
+ 0.5458092093467712,
154
+ 0.4743604362010956,
155
+ 0.4410257935523987,
156
+ 0.44940850138664246,
157
+ 0.4665364623069763,
158
+ 0.39998432993888855,
159
+ 0.3829936385154724,
160
+ 0.37692418694496155,
161
+ 0.43536049127578735,
162
+ 0.41529184579849243,
163
+ 0.4333970248699188,
164
+ 0.41430020332336426,
165
+ 0.3589570224285126,
166
+ 0.32946068048477173,
167
+ 0.2883765399456024,
168
+ 0.32201504707336426,
169
+ 0.31255021691322327,
170
+ 0.2970984876155853,
171
+ 0.3347127139568329,
172
+ 0.310171902179718,
173
+ 0.3680216372013092,
174
+ 0.2886590361595154,
175
+ 0.29018694162368774,
176
+ 0.26444265246391296,
177
+ 0.273633748292923,
178
+ 0.2644065022468567,
179
+ 0.26346877217292786,
180
+ 0.20881788432598114,
181
+ 0.25836777687072754,
182
+ 0.21619343757629395,
183
+ 0.2508084177970886,
184
+ 0.2066682130098343,
185
+ 0.18159745633602142,
186
+ 0.21431173384189606,
187
+ 0.19220055639743805
188
+ ],
189
+ "info_loss": [
190
+ -0.29820680618286133,
191
+ -0.020511627197265625,
192
+ -0.03582203388214111,
193
+ -0.046578407287597656,
194
+ -0.10162782669067383,
195
+ -0.11258316040039062,
196
+ -0.45531249046325684,
197
+ -0.804049015045166,
198
+ -0.9047041535377502,
199
+ -0.904850423336029,
200
+ -0.6043012142181396,
201
+ -0.5437737107276917,
202
+ -0.38619208335876465,
203
+ -0.3382325768470764,
204
+ -0.3524826765060425,
205
+ -0.36228618025779724,
206
+ -0.3772541284561157,
207
+ -0.3371272087097168,
208
+ -0.3112085163593292,
209
+ -0.2882551848888397,
210
+ -0.2722354233264923,
211
+ -0.3066837787628174,
212
+ -0.29322195053100586,
213
+ -0.34488701820373535,
214
+ -0.2819674611091614,
215
+ -0.3375912308692932,
216
+ -0.3093259334564209,
217
+ -0.2779213786125183,
218
+ -0.2858109176158905,
219
+ -0.31344014406204224,
220
+ -0.29580312967300415,
221
+ -0.24960872530937195,
222
+ -0.24778319895267487,
223
+ -0.2526150643825531,
224
+ -0.258733868598938,
225
+ -0.2963833212852478,
226
+ -0.28865474462509155,
227
+ -0.24197861552238464,
228
+ -0.24057714641094208,
229
+ -0.2397194355726242,
230
+ -0.23951296508312225,
231
+ -0.24607932567596436,
232
+ -0.2575826346874237,
233
+ -0.28247666358947754,
234
+ -0.2385159730911255,
235
+ -0.2948923110961914,
236
+ -0.22245699167251587,
237
+ -0.24565109610557556,
238
+ -0.2091638743877411,
239
+ -0.21068799495697021,
240
+ -0.22219160199165344,
241
+ -0.2074809968471527,
242
+ -0.17261289060115814,
243
+ -0.225511372089386,
244
+ -0.18519316613674164,
245
+ -0.2172529101371765,
246
+ -0.18351851403713226,
247
+ -0.16235585510730743,
248
+ -0.19041140377521515,
249
+ -0.17723910510540009
250
+ ],
251
+ "abs_loss": [
252
+ 0.0,
253
+ 0.0,
254
+ 0.0,
255
+ 0.0,
256
+ 0.0,
257
+ 0.0,
258
+ 0.0,
259
+ 0.0,
260
+ 0.0,
261
+ 0.0,
262
+ 0.0,
263
+ 0.0,
264
+ 0.0,
265
+ 0.0,
266
+ 0.0,
267
+ 0.0,
268
+ 0.0,
269
+ 0.0,
270
+ 0.0,
271
+ 0.0,
272
+ 0.0,
273
+ 0.0,
274
+ 0.0,
275
+ 0.0,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0,
279
+ 0.0,
280
+ 0.0,
281
+ 0.0,
282
+ 0.0,
283
+ 0.0,
284
+ 0.0,
285
+ 0.0,
286
+ 0.0,
287
+ 0.0,
288
+ 0.0,
289
+ 0.0,
290
+ 0.0,
291
+ 0.0,
292
+ 0.0,
293
+ 0.0,
294
+ 0.0,
295
+ 0.0,
296
+ 0.0,
297
+ 0.0,
298
+ 0.0,
299
+ 0.0,
300
+ 0.0,
301
+ 0.0,
302
+ 0.0,
303
+ 0.0,
304
+ 0.0,
305
+ 0.0,
306
+ 0.0,
307
+ 0.0,
308
+ 0.0,
309
+ 0.0,
310
+ 0.0,
311
+ 0.0
312
+ ],
313
+ "zipf_loss": [
314
+ 11.741195678710938,
315
+ 0.9040621519088745,
316
+ 0.049326345324516296,
317
+ 0.014370954595506191,
318
+ 0.008595990017056465,
319
+ 0.00705260643735528,
320
+ 0.014052295126020908,
321
+ 0.03310779482126236,
322
+ 0.027424722909927368,
323
+ 0.02580416575074196,
324
+ 0.02558719925582409,
325
+ 0.02553989179432392,
326
+ 0.02478562295436859,
327
+ 0.01847648434340954,
328
+ 0.015505120158195496,
329
+ 0.017019525170326233,
330
+ 0.012050194665789604,
331
+ 0.013144981116056442,
332
+ 0.009881021454930305,
333
+ 0.010805479250848293,
334
+ 0.009166087955236435,
335
+ 0.009159133769571781,
336
+ 0.0075935060158371925,
337
+ 0.007109250873327255,
338
+ 0.006107652559876442,
339
+ 0.005024964455515146,
340
+ 0.004673627205193043,
341
+ 0.006059933453798294,
342
+ 0.0038989605382084846,
343
+ 0.004231364466249943,
344
+ 0.005021039396524429,
345
+ 0.002918974496424198,
346
+ 0.002982738194987178,
347
+ 0.003413921222090721,
348
+ 0.003438219428062439,
349
+ 0.0028688814491033554,
350
+ 0.0031602769158780575,
351
+ 0.003254554932937026,
352
+ 0.003583212150260806,
353
+ 0.0021136049181222916,
354
+ 0.002153565175831318,
355
+ 0.0020841355435550213,
356
+ 0.0022789854556322098,
357
+ 0.0021481704898178577,
358
+ 0.0018883869051933289,
359
+ 0.0018621227936819196,
360
+ 0.0018467966001480818,
361
+ 0.001444419613108039,
362
+ 0.0018124142661690712,
363
+ 0.00142430339474231,
364
+ 0.0017039082013070583,
365
+ 0.0014954449143260717,
366
+ 0.0015363717684522271,
367
+ 0.0015524711925536394,
368
+ 0.0016609512967988849,
369
+ 0.001424457412213087,
370
+ 0.0015847194008529186,
371
+ 0.0014708241214975715,
372
+ 0.0015044735046103597,
373
+ 0.0013292990624904633
374
+ ],
375
+ "denoise_loss": [],
376
+ "ortho_loss": [
377
+ NaN,
378
+ NaN,
379
+ NaN,
380
+ NaN,
381
+ NaN,
382
+ NaN,
383
+ NaN,
384
+ NaN,
385
+ NaN,
386
+ NaN,
387
+ NaN,
388
+ NaN,
389
+ NaN,
390
+ NaN,
391
+ NaN,
392
+ NaN,
393
+ NaN,
394
+ NaN,
395
+ NaN,
396
+ NaN,
397
+ NaN,
398
+ NaN,
399
+ NaN,
400
+ NaN,
401
+ NaN,
402
+ NaN,
403
+ NaN,
404
+ NaN,
405
+ NaN,
406
+ NaN,
407
+ NaN,
408
+ NaN,
409
+ NaN,
410
+ NaN,
411
+ NaN,
412
+ NaN,
413
+ NaN,
414
+ NaN,
415
+ NaN,
416
+ NaN,
417
+ NaN,
418
+ NaN,
419
+ NaN,
420
+ NaN,
421
+ NaN,
422
+ NaN,
423
+ NaN,
424
+ NaN,
425
+ NaN,
426
+ NaN,
427
+ NaN,
428
+ NaN,
429
+ NaN,
430
+ NaN,
431
+ NaN,
432
+ NaN,
433
+ NaN,
434
+ NaN,
435
+ NaN,
436
+ NaN
437
+ ],
438
+ "lr": [
439
+ 3.9200000000000004e-05,
440
+ 7.92e-05,
441
+ 8e-05,
442
+ 8e-05,
443
+ 8e-05,
444
+ 8e-05,
445
+ 8e-05,
446
+ 8e-05,
447
+ 8e-05,
448
+ 8e-05,
449
+ 8e-05,
450
+ 8e-05,
451
+ 8e-05,
452
+ 8e-05,
453
+ 8e-05,
454
+ 8e-05,
455
+ 8e-05,
456
+ 8e-05,
457
+ 8e-05,
458
+ 8e-05,
459
+ 8e-05,
460
+ 8e-05,
461
+ 8e-05,
462
+ 8e-05,
463
+ 8e-05,
464
+ 8e-05,
465
+ 8e-05,
466
+ 8e-05,
467
+ 8e-05,
468
+ 8e-05,
469
+ 8e-05,
470
+ 8e-05,
471
+ 8e-05,
472
+ 8e-05,
473
+ 8e-05,
474
+ 8e-05,
475
+ 7.946710526315791e-05,
476
+ 7.650657894736843e-05,
477
+ 7.354605263157895e-05,
478
+ 7.017105263157896e-05,
479
+ 6.721052631578948e-05,
480
+ 6.425e-05,
481
+ 6.0875e-05,
482
+ 5.791447368421054e-05,
483
+ 5.495394736842105e-05,
484
+ 5.157894736842105e-05,
485
+ 4.861842105263157e-05,
486
+ 4.565789473684212e-05,
487
+ 4.2282894736842104e-05,
488
+ 3.9322368421052625e-05,
489
+ 3.636184210526315e-05,
490
+ 3.2986842105263165e-05,
491
+ 3.0026315789473686e-05,
492
+ 2.7065789473684206e-05,
493
+ 2.3690789473684223e-05,
494
+ 2.0730263157894743e-05,
495
+ 1.7769736842105264e-05,
496
+ 1.4394736842105275e-05,
497
+ 1.1434210526315796e-05,
498
+ 8.473684210526318e-06
499
+ ],
500
+ "emb_lr": [],
501
+ "eval_step": [
502
+ 150,
503
+ 307,
504
+ 464,
505
+ 621,
506
+ 778,
507
+ 935,
508
+ 1092,
509
+ 1199,
510
+ 1356,
511
+ 1513,
512
+ 1670,
513
+ 1827,
514
+ 1984,
515
+ 2141,
516
+ 2298,
517
+ 2455,
518
+ 2612,
519
+ 2769,
520
+ 2926,
521
+ 3083
522
+ ],
523
+ "eval_accuracy": [
524
+ 0.02,
525
+ 0.01,
526
+ 0.0,
527
+ 0.0,
528
+ 0.0,
529
+ 0.0,
530
+ 0.0,
531
+ 0.0,
532
+ 0.0,
533
+ 0.0,
534
+ 0.0,
535
+ 0.0,
536
+ 0.0,
537
+ 0.0,
538
+ 0.0,
539
+ 0.0,
540
+ 0.0,
541
+ 0.0,
542
+ 0.0,
543
+ 0.0
544
+ ]
545
+ },
546
+ "final_accuracy": 0.845,
547
+ "sft_eval": {
548
+ "config": {
549
+ "ops": "add_sub",
550
+ "K": null,
551
+ "mode": "sft",
552
+ "n_digits": 6,
553
+ "n_per_split": 100
554
+ },
555
+ "splits": {
556
+ "add_S0": {
557
+ "full_accuracy": 0.36,
558
+ "digit_accuracy": 0.8785714285714286,
559
+ "n_examples": 100,
560
+ "per_subtask": {
561
+ "SA": {
562
+ "accuracy": 0.8661157024793389,
563
+ "count": 605
564
+ },
565
+ "SS": {
566
+ "accuracy": 0.9578947368421052,
567
+ "count": 95
568
+ }
569
+ }
570
+ },
571
+ "add_S1": {
572
+ "full_accuracy": 0.5,
573
+ "digit_accuracy": 0.9085714285714286,
574
+ "n_examples": 100,
575
+ "per_subtask": {
576
+ "SA": {
577
+ "accuracy": 0.8921568627450981,
578
+ "count": 204
579
+ },
580
+ "SC": {
581
+ "accuracy": 0.9585798816568047,
582
+ "count": 169
583
+ },
584
+ "SS": {
585
+ "accuracy": 0.9032258064516129,
586
+ "count": 31
587
+ },
588
+ "UC": {
589
+ "accuracy": 0.8918918918918919,
590
+ "count": 296
591
+ }
592
+ }
593
+ },
594
+ "add_S2": {
595
+ "full_accuracy": 0.37,
596
+ "digit_accuracy": 0.8814285714285715,
597
+ "n_examples": 100,
598
+ "per_subtask": {
599
+ "SA": {
600
+ "accuracy": 0.9386503067484663,
601
+ "count": 163
602
+ },
603
+ "SC": {
604
+ "accuracy": 0.9076923076923077,
605
+ "count": 130
606
+ },
607
+ "SS": {
608
+ "accuracy": 0.9080459770114943,
609
+ "count": 87
610
+ },
611
+ "UC": {
612
+ "accuracy": 0.7684729064039408,
613
+ "count": 203
614
+ },
615
+ "US": {
616
+ "accuracy": 0.9487179487179487,
617
+ "count": 117
618
+ }
619
+ }
620
+ },
621
+ "add_S3": {
622
+ "full_accuracy": 0.24,
623
+ "digit_accuracy": 0.7985714285714286,
624
+ "n_examples": 100,
625
+ "per_subtask": {
626
+ "SA": {
627
+ "accuracy": 0.9008264462809917,
628
+ "count": 121
629
+ },
630
+ "SC": {
631
+ "accuracy": 0.9421487603305785,
632
+ "count": 121
633
+ },
634
+ "SS": {
635
+ "accuracy": 0.8775510204081632,
636
+ "count": 49
637
+ },
638
+ "UC": {
639
+ "accuracy": 0.7043010752688172,
640
+ "count": 186
641
+ },
642
+ "US": {
643
+ "accuracy": 0.726457399103139,
644
+ "count": 223
645
+ }
646
+ }
647
+ },
648
+ "add_S4": {
649
+ "full_accuracy": 0.21,
650
+ "digit_accuracy": 0.7085714285714285,
651
+ "n_examples": 100,
652
+ "per_subtask": {
653
+ "SA": {
654
+ "accuracy": 0.9326923076923077,
655
+ "count": 104
656
+ },
657
+ "SC": {
658
+ "accuracy": 0.9339622641509434,
659
+ "count": 106
660
+ },
661
+ "SS": {
662
+ "accuracy": 0.9565217391304348,
663
+ "count": 23
664
+ },
665
+ "UC": {
666
+ "accuracy": 0.7,
667
+ "count": 160
668
+ },
669
+ "US": {
670
+ "accuracy": 0.5407166123778502,
671
+ "count": 307
672
+ }
673
+ }
674
+ },
675
+ "add_S5": {
676
+ "full_accuracy": 0.19,
677
+ "digit_accuracy": 0.5757142857142857,
678
+ "n_examples": 100,
679
+ "per_subtask": {
680
+ "SA": {
681
+ "accuracy": 1.0,
682
+ "count": 100
683
+ },
684
+ "SC": {
685
+ "accuracy": 0.97,
686
+ "count": 100
687
+ },
688
+ "UC": {
689
+ "accuracy": 0.46,
690
+ "count": 100
691
+ },
692
+ "US": {
693
+ "accuracy": 0.4,
694
+ "count": 400
695
+ }
696
+ }
697
+ },
698
+ "add_S6": {
699
+ "full_accuracy": 0.32,
700
+ "digit_accuracy": 0.58,
701
+ "n_examples": 100,
702
+ "per_subtask": {
703
+ "SC": {
704
+ "accuracy": 1.0,
705
+ "count": 100
706
+ },
707
+ "UC": {
708
+ "accuracy": 0.6,
709
+ "count": 100
710
+ },
711
+ "US": {
712
+ "accuracy": 0.492,
713
+ "count": 500
714
+ }
715
+ }
716
+ },
717
+ "add_random": {
718
+ "full_accuracy": 0.495,
719
+ "digit_accuracy": 0.91,
720
+ "n_examples": 200,
721
+ "per_subtask": {
722
+ "SA": {
723
+ "accuracy": 0.8926174496644296,
724
+ "count": 447
725
+ },
726
+ "SC": {
727
+ "accuracy": 0.95625,
728
+ "count": 320
729
+ },
730
+ "SS": {
731
+ "accuracy": 0.9464285714285714,
732
+ "count": 56
733
+ },
734
+ "UC": {
735
+ "accuracy": 0.8960302457466919,
736
+ "count": 529
737
+ },
738
+ "US": {
739
+ "accuracy": 0.875,
740
+ "count": 48
741
+ }
742
+ }
743
+ },
744
+ "add_C1": {
745
+ "full_accuracy": 0.39,
746
+ "digit_accuracy": 0.8728571428571429,
747
+ "n_examples": 100,
748
+ "per_subtask": {
749
+ "SA": {
750
+ "accuracy": 0.866,
751
+ "count": 500
752
+ },
753
+ "SC": {
754
+ "accuracy": 0.99,
755
+ "count": 100
756
+ },
757
+ "UC": {
758
+ "accuracy": 0.79,
759
+ "count": 100
760
+ }
761
+ }
762
+ },
763
+ "add_C2": {
764
+ "full_accuracy": 0.49,
765
+ "digit_accuracy": 0.89,
766
+ "n_examples": 100,
767
+ "per_subtask": {
768
+ "SA": {
769
+ "accuracy": 0.905,
770
+ "count": 400
771
+ },
772
+ "SC": {
773
+ "accuracy": 0.99,
774
+ "count": 100
775
+ },
776
+ "UC": {
777
+ "accuracy": 0.8205128205128205,
778
+ "count": 156
779
+ },
780
+ "US": {
781
+ "accuracy": 0.7727272727272727,
782
+ "count": 44
783
+ }
784
+ }
785
+ },
786
+ "add_C3": {
787
+ "full_accuracy": 0.25,
788
+ "digit_accuracy": 0.8357142857142857,
789
+ "n_examples": 100,
790
+ "per_subtask": {
791
+ "SA": {
792
+ "accuracy": 0.9333333333333333,
793
+ "count": 300
794
+ },
795
+ "SC": {
796
+ "accuracy": 0.99,
797
+ "count": 100
798
+ },
799
+ "UC": {
800
+ "accuracy": 0.6532663316582915,
801
+ "count": 199
802
+ },
803
+ "US": {
804
+ "accuracy": 0.7524752475247525,
805
+ "count": 101
806
+ }
807
+ }
808
+ },
809
+ "add_C4": {
810
+ "full_accuracy": 0.35,
811
+ "digit_accuracy": 0.8342857142857143,
812
+ "n_examples": 100,
813
+ "per_subtask": {
814
+ "SA": {
815
+ "accuracy": 0.945,
816
+ "count": 200
817
+ },
818
+ "SC": {
819
+ "accuracy": 0.97,
820
+ "count": 100
821
+ },
822
+ "UC": {
823
+ "accuracy": 0.75,
824
+ "count": 264
825
+ },
826
+ "US": {
827
+ "accuracy": 0.7352941176470589,
828
+ "count": 136
829
+ }
830
+ }
831
+ },
832
+ "add_C5": {
833
+ "full_accuracy": 0.19,
834
+ "digit_accuracy": 0.7742857142857142,
835
+ "n_examples": 100,
836
+ "per_subtask": {
837
+ "SA": {
838
+ "accuracy": 0.99,
839
+ "count": 100
840
+ },
841
+ "SC": {
842
+ "accuracy": 0.98,
843
+ "count": 100
844
+ },
845
+ "UC": {
846
+ "accuracy": 0.7,
847
+ "count": 310
848
+ },
849
+ "US": {
850
+ "accuracy": 0.6736842105263158,
851
+ "count": 190
852
+ }
853
+ }
854
+ },
855
+ "add_C6": {
856
+ "full_accuracy": 0.22,
857
+ "digit_accuracy": 0.8014285714285714,
858
+ "n_examples": 100,
859
+ "per_subtask": {
860
+ "SC": {
861
+ "accuracy": 1.0,
862
+ "count": 100
863
+ },
864
+ "UC": {
865
+ "accuracy": 0.7594594594594595,
866
+ "count": 370
867
+ },
868
+ "US": {
869
+ "accuracy": 0.782608695652174,
870
+ "count": 230
871
+ }
872
+ }
873
+ },
874
+ "sub_M0": {
875
+ "full_accuracy": 0.56,
876
+ "digit_accuracy": 0.9214285714285714,
877
+ "n_examples": 100,
878
+ "per_subtask": {
879
+ "MD": {
880
+ "accuracy": 0.9170731707317074,
881
+ "count": 615
882
+ },
883
+ "ME": {
884
+ "accuracy": 0.9529411764705882,
885
+ "count": 85
886
+ }
887
+ }
888
+ },
889
+ "sub_M1": {
890
+ "full_accuracy": 0.67,
891
+ "digit_accuracy": 0.9471428571428572,
892
+ "n_examples": 100,
893
+ "per_subtask": {
894
+ "MD": {
895
+ "accuracy": 0.9657534246575342,
896
+ "count": 292
897
+ },
898
+ "MB": {
899
+ "accuracy": 0.9583333333333334,
900
+ "count": 144
901
+ },
902
+ "ME": {
903
+ "accuracy": 0.88,
904
+ "count": 25
905
+ },
906
+ "UB": {
907
+ "accuracy": 0.9246861924686193,
908
+ "count": 239
909
+ }
910
+ }
911
+ },
912
+ "sub_M2": {
913
+ "full_accuracy": 0.35,
914
+ "digit_accuracy": 0.8714285714285714,
915
+ "n_examples": 100,
916
+ "per_subtask": {
917
+ "MD": {
918
+ "accuracy": 0.9620853080568721,
919
+ "count": 211
920
+ },
921
+ "MB": {
922
+ "accuracy": 0.9304347826086956,
923
+ "count": 115
924
+ },
925
+ "ME": {
926
+ "accuracy": 0.9764705882352941,
927
+ "count": 85
928
+ },
929
+ "UB": {
930
+ "accuracy": 0.6795580110497238,
931
+ "count": 181
932
+ },
933
+ "UD": {
934
+ "accuracy": 0.8703703703703703,
935
+ "count": 108
936
+ }
937
+ }
938
+ },
939
+ "sub_M3": {
940
+ "full_accuracy": 0.15,
941
+ "digit_accuracy": 0.8014285714285714,
942
+ "n_examples": 100,
943
+ "per_subtask": {
944
+ "MD": {
945
+ "accuracy": 0.9888268156424581,
946
+ "count": 179
947
+ },
948
+ "MB": {
949
+ "accuracy": 0.941747572815534,
950
+ "count": 103
951
+ },
952
+ "ME": {
953
+ "accuracy": 0.9821428571428571,
954
+ "count": 56
955
+ },
956
+ "UB": {
957
+ "accuracy": 0.6308724832214765,
958
+ "count": 149
959
+ },
960
+ "UD": {
961
+ "accuracy": 0.647887323943662,
962
+ "count": 213
963
+ }
964
+ }
965
+ },
966
+ "sub_M4": {
967
+ "full_accuracy": 0.27,
968
+ "digit_accuracy": 0.7414285714285714,
969
+ "n_examples": 100,
970
+ "per_subtask": {
971
+ "MD": {
972
+ "accuracy": 0.985,
973
+ "count": 200
974
+ },
975
+ "MB": {
976
+ "accuracy": 0.99,
977
+ "count": 100
978
+ },
979
+ "UB": {
980
+ "accuracy": 0.71,
981
+ "count": 100
982
+ },
983
+ "UD": {
984
+ "accuracy": 0.5066666666666667,
985
+ "count": 300
986
+ }
987
+ }
988
+ },
989
+ "sub_M5": {
990
+ "full_accuracy": 0.14,
991
+ "digit_accuracy": 0.5971428571428572,
992
+ "n_examples": 100,
993
+ "per_subtask": {
994
+ "MD": {
995
+ "accuracy": 1.0,
996
+ "count": 100
997
+ },
998
+ "MB": {
999
+ "accuracy": 1.0,
1000
+ "count": 100
1001
+ },
1002
+ "UB": {
1003
+ "accuracy": 0.62,
1004
+ "count": 100
1005
+ },
1006
+ "UD": {
1007
+ "accuracy": 0.39,
1008
+ "count": 400
1009
+ }
1010
+ }
1011
+ },
1012
+ "sub_random": {
1013
+ "full_accuracy": 0.57,
1014
+ "digit_accuracy": 0.92,
1015
+ "n_examples": 200,
1016
+ "per_subtask": {
1017
+ "MD": {
1018
+ "accuracy": 0.965,
1019
+ "count": 600
1020
+ },
1021
+ "MB": {
1022
+ "accuracy": 0.9438202247191011,
1023
+ "count": 267
1024
+ },
1025
+ "ME": {
1026
+ "accuracy": 0.9622641509433962,
1027
+ "count": 53
1028
+ },
1029
+ "UB": {
1030
+ "accuracy": 0.8428246013667426,
1031
+ "count": 439
1032
+ },
1033
+ "UD": {
1034
+ "accuracy": 0.8780487804878049,
1035
+ "count": 41
1036
+ }
1037
+ }
1038
+ },
1039
+ "sub_B3": {
1040
+ "full_accuracy": 0.32,
1041
+ "digit_accuracy": 0.8657142857142858,
1042
+ "n_examples": 100,
1043
+ "per_subtask": {
1044
+ "MD": {
1045
+ "accuracy": 0.9633333333333334,
1046
+ "count": 300
1047
+ },
1048
+ "MB": {
1049
+ "accuracy": 0.97,
1050
+ "count": 100
1051
+ },
1052
+ "UB": {
1053
+ "accuracy": 0.7157360406091371,
1054
+ "count": 197
1055
+ },
1056
+ "UD": {
1057
+ "accuracy": 0.7669902912621359,
1058
+ "count": 103
1059
+ }
1060
+ }
1061
+ },
1062
+ "sub_B4": {
1063
+ "full_accuracy": 0.27,
1064
+ "digit_accuracy": 0.8185714285714286,
1065
+ "n_examples": 100,
1066
+ "per_subtask": {
1067
+ "MD": {
1068
+ "accuracy": 0.98,
1069
+ "count": 200
1070
+ },
1071
+ "MB": {
1072
+ "accuracy": 0.98,
1073
+ "count": 100
1074
+ },
1075
+ "UB": {
1076
+ "accuracy": 0.7125506072874493,
1077
+ "count": 247
1078
+ },
1079
+ "UD": {
1080
+ "accuracy": 0.673202614379085,
1081
+ "count": 153
1082
+ }
1083
+ }
1084
+ },
1085
+ "sub_B5": {
1086
+ "full_accuracy": 0.18,
1087
+ "digit_accuracy": 0.7742857142857142,
1088
+ "n_examples": 100,
1089
+ "per_subtask": {
1090
+ "MD": {
1091
+ "accuracy": 1.0,
1092
+ "count": 100
1093
+ },
1094
+ "MB": {
1095
+ "accuracy": 0.99,
1096
+ "count": 100
1097
+ },
1098
+ "UB": {
1099
+ "accuracy": 0.7046979865771812,
1100
+ "count": 298
1101
+ },
1102
+ "UD": {
1103
+ "accuracy": 0.6584158415841584,
1104
+ "count": 202
1105
+ }
1106
+ }
1107
+ }
1108
+ },
1109
+ "summary": {
1110
+ "overall_accuracy": 0.3503846153846154,
1111
+ "digit_accuracy": 0.8206043956043956,
1112
+ "total_examples": 2600,
1113
+ "n_splits": 24
1114
+ }
1115
+ },
1116
+ "sorl_eval": {
1117
+ "config": {
1118
+ "ops": "add_sub",
1119
+ "K": 1,
1120
+ "mode": "sorl",
1121
+ "n_digits": 6,
1122
+ "n_per_split": 100
1123
+ },
1124
+ "splits": {
1125
+ "add_S0": {
1126
+ "full_accuracy": 0.99,
1127
+ "digit_accuracy": 0.9985714285714286,
1128
+ "n_examples": 100,
1129
+ "per_subtask": {
1130
+ "SA": {
1131
+ "accuracy": 0.9983471074380166,
1132
+ "count": 605
1133
+ },
1134
+ "SS": {
1135
+ "accuracy": 1.0,
1136
+ "count": 95
1137
+ }
1138
+ }
1139
+ },
1140
+ "add_S1": {
1141
+ "full_accuracy": 0.99,
1142
+ "digit_accuracy": 0.9985714285714286,
1143
+ "n_examples": 100,
1144
+ "per_subtask": {
1145
+ "SA": {
1146
+ "accuracy": 1.0,
1147
+ "count": 204
1148
+ },
1149
+ "SC": {
1150
+ "accuracy": 0.9940828402366864,
1151
+ "count": 169
1152
+ },
1153
+ "SS": {
1154
+ "accuracy": 1.0,
1155
+ "count": 31
1156
+ },
1157
+ "UC": {
1158
+ "accuracy": 1.0,
1159
+ "count": 296
1160
+ }
1161
+ }
1162
+ },
1163
+ "add_S2": {
1164
+ "full_accuracy": 0.94,
1165
+ "digit_accuracy": 0.9914285714285714,
1166
+ "n_examples": 100,
1167
+ "per_subtask": {
1168
+ "SA": {
1169
+ "accuracy": 1.0,
1170
+ "count": 163
1171
+ },
1172
+ "SC": {
1173
+ "accuracy": 0.9923076923076923,
1174
+ "count": 130
1175
+ },
1176
+ "SS": {
1177
+ "accuracy": 0.9885057471264368,
1178
+ "count": 87
1179
+ },
1180
+ "UC": {
1181
+ "accuracy": 0.9852216748768473,
1182
+ "count": 203
1183
+ },
1184
+ "US": {
1185
+ "accuracy": 0.9914529914529915,
1186
+ "count": 117
1187
+ }
1188
+ }
1189
+ },
1190
+ "add_S3": {
1191
+ "full_accuracy": 0.91,
1192
+ "digit_accuracy": 0.9871428571428571,
1193
+ "n_examples": 100,
1194
+ "per_subtask": {
1195
+ "SA": {
1196
+ "accuracy": 1.0,
1197
+ "count": 121
1198
+ },
1199
+ "SC": {
1200
+ "accuracy": 1.0,
1201
+ "count": 121
1202
+ },
1203
+ "SS": {
1204
+ "accuracy": 1.0,
1205
+ "count": 49
1206
+ },
1207
+ "UC": {
1208
+ "accuracy": 0.9516129032258065,
1209
+ "count": 186
1210
+ },
1211
+ "US": {
1212
+ "accuracy": 1.0,
1213
+ "count": 223
1214
+ }
1215
+ }
1216
+ },
1217
+ "add_S4": {
1218
+ "full_accuracy": 0.81,
1219
+ "digit_accuracy": 0.97,
1220
+ "n_examples": 100,
1221
+ "per_subtask": {
1222
+ "SA": {
1223
+ "accuracy": 1.0,
1224
+ "count": 104
1225
+ },
1226
+ "SC": {
1227
+ "accuracy": 1.0,
1228
+ "count": 106
1229
+ },
1230
+ "SS": {
1231
+ "accuracy": 1.0,
1232
+ "count": 23
1233
+ },
1234
+ "UC": {
1235
+ "accuracy": 0.88125,
1236
+ "count": 160
1237
+ },
1238
+ "US": {
1239
+ "accuracy": 0.993485342019544,
1240
+ "count": 307
1241
+ }
1242
+ }
1243
+ },
1244
+ "add_S5": {
1245
+ "full_accuracy": 0.54,
1246
+ "digit_accuracy": 0.8842857142857142,
1247
+ "n_examples": 100,
1248
+ "per_subtask": {
1249
+ "SA": {
1250
+ "accuracy": 1.0,
1251
+ "count": 100
1252
+ },
1253
+ "SC": {
1254
+ "accuracy": 1.0,
1255
+ "count": 100
1256
+ },
1257
+ "UC": {
1258
+ "accuracy": 0.63,
1259
+ "count": 100
1260
+ },
1261
+ "US": {
1262
+ "accuracy": 0.89,
1263
+ "count": 400
1264
+ }
1265
+ }
1266
+ },
1267
+ "add_S6": {
1268
+ "full_accuracy": 0.62,
1269
+ "digit_accuracy": 0.8571428571428571,
1270
+ "n_examples": 100,
1271
+ "per_subtask": {
1272
+ "SC": {
1273
+ "accuracy": 1.0,
1274
+ "count": 100
1275
+ },
1276
+ "UC": {
1277
+ "accuracy": 0.72,
1278
+ "count": 100
1279
+ },
1280
+ "US": {
1281
+ "accuracy": 0.856,
1282
+ "count": 500
1283
+ }
1284
+ }
1285
+ },
1286
+ "add_random": {
1287
+ "full_accuracy": 0.975,
1288
+ "digit_accuracy": 0.9964285714285714,
1289
+ "n_examples": 200,
1290
+ "per_subtask": {
1291
+ "SA": {
1292
+ "accuracy": 1.0,
1293
+ "count": 447
1294
+ },
1295
+ "SC": {
1296
+ "accuracy": 1.0,
1297
+ "count": 320
1298
+ },
1299
+ "SS": {
1300
+ "accuracy": 1.0,
1301
+ "count": 56
1302
+ },
1303
+ "UC": {
1304
+ "accuracy": 0.9905482041587902,
1305
+ "count": 529
1306
+ },
1307
+ "US": {
1308
+ "accuracy": 1.0,
1309
+ "count": 48
1310
+ }
1311
+ }
1312
+ },
1313
+ "add_C1": {
1314
+ "full_accuracy": 0.98,
1315
+ "digit_accuracy": 0.9971428571428571,
1316
+ "n_examples": 100,
1317
+ "per_subtask": {
1318
+ "SA": {
1319
+ "accuracy": 1.0,
1320
+ "count": 500
1321
+ },
1322
+ "SC": {
1323
+ "accuracy": 1.0,
1324
+ "count": 100
1325
+ },
1326
+ "UC": {
1327
+ "accuracy": 0.98,
1328
+ "count": 100
1329
+ }
1330
+ }
1331
+ },
1332
+ "add_C2": {
1333
+ "full_accuracy": 0.96,
1334
+ "digit_accuracy": 0.9942857142857143,
1335
+ "n_examples": 100,
1336
+ "per_subtask": {
1337
+ "SA": {
1338
+ "accuracy": 1.0,
1339
+ "count": 400
1340
+ },
1341
+ "SC": {
1342
+ "accuracy": 0.99,
1343
+ "count": 100
1344
+ },
1345
+ "UC": {
1346
+ "accuracy": 0.9807692307692307,
1347
+ "count": 156
1348
+ },
1349
+ "US": {
1350
+ "accuracy": 1.0,
1351
+ "count": 44
1352
+ }
1353
+ }
1354
+ },
1355
+ "add_C3": {
1356
+ "full_accuracy": 0.91,
1357
+ "digit_accuracy": 0.9842857142857143,
1358
+ "n_examples": 100,
1359
+ "per_subtask": {
1360
+ "SA": {
1361
+ "accuracy": 1.0,
1362
+ "count": 300
1363
+ },
1364
+ "SC": {
1365
+ "accuracy": 1.0,
1366
+ "count": 100
1367
+ },
1368
+ "UC": {
1369
+ "accuracy": 0.9547738693467337,
1370
+ "count": 199
1371
+ },
1372
+ "US": {
1373
+ "accuracy": 0.9801980198019802,
1374
+ "count": 101
1375
+ }
1376
+ }
1377
+ },
1378
+ "add_C4": {
1379
+ "full_accuracy": 0.94,
1380
+ "digit_accuracy": 0.99,
1381
+ "n_examples": 100,
1382
+ "per_subtask": {
1383
+ "SA": {
1384
+ "accuracy": 1.0,
1385
+ "count": 200
1386
+ },
1387
+ "SC": {
1388
+ "accuracy": 1.0,
1389
+ "count": 100
1390
+ },
1391
+ "UC": {
1392
+ "accuracy": 0.9810606060606061,
1393
+ "count": 264
1394
+ },
1395
+ "US": {
1396
+ "accuracy": 0.9852941176470589,
1397
+ "count": 136
1398
+ }
1399
+ }
1400
+ },
1401
+ "add_C5": {
1402
+ "full_accuracy": 0.94,
1403
+ "digit_accuracy": 0.99,
1404
+ "n_examples": 100,
1405
+ "per_subtask": {
1406
+ "SA": {
1407
+ "accuracy": 1.0,
1408
+ "count": 100
1409
+ },
1410
+ "SC": {
1411
+ "accuracy": 1.0,
1412
+ "count": 100
1413
+ },
1414
+ "UC": {
1415
+ "accuracy": 0.9806451612903225,
1416
+ "count": 310
1417
+ },
1418
+ "US": {
1419
+ "accuracy": 0.9947368421052631,
1420
+ "count": 190
1421
+ }
1422
+ }
1423
+ },
1424
+ "add_C6": {
1425
+ "full_accuracy": 0.86,
1426
+ "digit_accuracy": 0.97,
1427
+ "n_examples": 100,
1428
+ "per_subtask": {
1429
+ "SC": {
1430
+ "accuracy": 1.0,
1431
+ "count": 100
1432
+ },
1433
+ "UC": {
1434
+ "accuracy": 0.9621621621621622,
1435
+ "count": 370
1436
+ },
1437
+ "US": {
1438
+ "accuracy": 0.9695652173913043,
1439
+ "count": 230
1440
+ }
1441
+ }
1442
+ },
1443
+ "sub_M0": {
1444
+ "full_accuracy": 0.98,
1445
+ "digit_accuracy": 0.9971428571428571,
1446
+ "n_examples": 100,
1447
+ "per_subtask": {
1448
+ "MD": {
1449
+ "accuracy": 0.9967479674796748,
1450
+ "count": 615
1451
+ },
1452
+ "ME": {
1453
+ "accuracy": 1.0,
1454
+ "count": 85
1455
+ }
1456
+ }
1457
+ },
1458
+ "sub_M1": {
1459
+ "full_accuracy": 0.97,
1460
+ "digit_accuracy": 0.9957142857142857,
1461
+ "n_examples": 100,
1462
+ "per_subtask": {
1463
+ "MD": {
1464
+ "accuracy": 0.9965753424657534,
1465
+ "count": 292
1466
+ },
1467
+ "MB": {
1468
+ "accuracy": 0.9861111111111112,
1469
+ "count": 144
1470
+ },
1471
+ "ME": {
1472
+ "accuracy": 1.0,
1473
+ "count": 25
1474
+ },
1475
+ "UB": {
1476
+ "accuracy": 1.0,
1477
+ "count": 239
1478
+ }
1479
+ }
1480
+ },
1481
+ "sub_M2": {
1482
+ "full_accuracy": 0.98,
1483
+ "digit_accuracy": 0.9971428571428571,
1484
+ "n_examples": 100,
1485
+ "per_subtask": {
1486
+ "MD": {
1487
+ "accuracy": 1.0,
1488
+ "count": 211
1489
+ },
1490
+ "MB": {
1491
+ "accuracy": 1.0,
1492
+ "count": 115
1493
+ },
1494
+ "ME": {
1495
+ "accuracy": 1.0,
1496
+ "count": 85
1497
+ },
1498
+ "UB": {
1499
+ "accuracy": 0.994475138121547,
1500
+ "count": 181
1501
+ },
1502
+ "UD": {
1503
+ "accuracy": 0.9907407407407407,
1504
+ "count": 108
1505
+ }
1506
+ }
1507
+ },
1508
+ "sub_M3": {
1509
+ "full_accuracy": 0.7,
1510
+ "digit_accuracy": 0.95,
1511
+ "n_examples": 100,
1512
+ "per_subtask": {
1513
+ "MD": {
1514
+ "accuracy": 1.0,
1515
+ "count": 179
1516
+ },
1517
+ "MB": {
1518
+ "accuracy": 1.0,
1519
+ "count": 103
1520
+ },
1521
+ "ME": {
1522
+ "accuracy": 1.0,
1523
+ "count": 56
1524
+ },
1525
+ "UB": {
1526
+ "accuracy": 0.8187919463087249,
1527
+ "count": 149
1528
+ },
1529
+ "UD": {
1530
+ "accuracy": 0.9624413145539906,
1531
+ "count": 213
1532
+ }
1533
+ }
1534
+ },
1535
+ "sub_M4": {
1536
+ "full_accuracy": 0.38,
1537
+ "digit_accuracy": 0.8828571428571429,
1538
+ "n_examples": 100,
1539
+ "per_subtask": {
1540
+ "MD": {
1541
+ "accuracy": 1.0,
1542
+ "count": 200
1543
+ },
1544
+ "MB": {
1545
+ "accuracy": 1.0,
1546
+ "count": 100
1547
+ },
1548
+ "UB": {
1549
+ "accuracy": 0.5,
1550
+ "count": 100
1551
+ },
1552
+ "UD": {
1553
+ "accuracy": 0.8933333333333333,
1554
+ "count": 300
1555
+ }
1556
+ }
1557
+ },
1558
+ "sub_M5": {
1559
+ "full_accuracy": 0.18,
1560
+ "digit_accuracy": 0.7628571428571429,
1561
+ "n_examples": 100,
1562
+ "per_subtask": {
1563
+ "MD": {
1564
+ "accuracy": 1.0,
1565
+ "count": 100
1566
+ },
1567
+ "MB": {
1568
+ "accuracy": 1.0,
1569
+ "count": 100
1570
+ },
1571
+ "UB": {
1572
+ "accuracy": 0.36,
1573
+ "count": 100
1574
+ },
1575
+ "UD": {
1576
+ "accuracy": 0.745,
1577
+ "count": 400
1578
+ }
1579
+ }
1580
+ },
1581
+ "sub_random": {
1582
+ "full_accuracy": 0.95,
1583
+ "digit_accuracy": 0.9928571428571429,
1584
+ "n_examples": 200,
1585
+ "per_subtask": {
1586
+ "MD": {
1587
+ "accuracy": 0.9933333333333333,
1588
+ "count": 600
1589
+ },
1590
+ "MB": {
1591
+ "accuracy": 0.9887640449438202,
1592
+ "count": 267
1593
+ },
1594
+ "ME": {
1595
+ "accuracy": 1.0,
1596
+ "count": 53
1597
+ },
1598
+ "UB": {
1599
+ "accuracy": 0.9931662870159453,
1600
+ "count": 439
1601
+ },
1602
+ "UD": {
1603
+ "accuracy": 1.0,
1604
+ "count": 41
1605
+ }
1606
+ }
1607
+ },
1608
+ "sub_B3": {
1609
+ "full_accuracy": 0.95,
1610
+ "digit_accuracy": 0.9928571428571429,
1611
+ "n_examples": 100,
1612
+ "per_subtask": {
1613
+ "MD": {
1614
+ "accuracy": 1.0,
1615
+ "count": 300
1616
+ },
1617
+ "MB": {
1618
+ "accuracy": 1.0,
1619
+ "count": 100
1620
+ },
1621
+ "UB": {
1622
+ "accuracy": 0.9796954314720813,
1623
+ "count": 197
1624
+ },
1625
+ "UD": {
1626
+ "accuracy": 0.9902912621359223,
1627
+ "count": 103
1628
+ }
1629
+ }
1630
+ },
1631
+ "sub_B4": {
1632
+ "full_accuracy": 0.78,
1633
+ "digit_accuracy": 0.96,
1634
+ "n_examples": 100,
1635
+ "per_subtask": {
1636
+ "MD": {
1637
+ "accuracy": 0.985,
1638
+ "count": 200
1639
+ },
1640
+ "MB": {
1641
+ "accuracy": 1.0,
1642
+ "count": 100
1643
+ },
1644
+ "UB": {
1645
+ "accuracy": 0.9311740890688259,
1646
+ "count": 247
1647
+ },
1648
+ "UD": {
1649
+ "accuracy": 0.9477124183006536,
1650
+ "count": 153
1651
+ }
1652
+ }
1653
+ },
1654
+ "sub_B5": {
1655
+ "full_accuracy": 0.81,
1656
+ "digit_accuracy": 0.9642857142857143,
1657
+ "n_examples": 100,
1658
+ "per_subtask": {
1659
+ "MD": {
1660
+ "accuracy": 1.0,
1661
+ "count": 100
1662
+ },
1663
+ "MB": {
1664
+ "accuracy": 1.0,
1665
+ "count": 100
1666
+ },
1667
+ "UB": {
1668
+ "accuracy": 0.9362416107382551,
1669
+ "count": 298
1670
+ },
1671
+ "UD": {
1672
+ "accuracy": 0.9702970297029703,
1673
+ "count": 202
1674
+ }
1675
+ }
1676
+ }
1677
+ },
1678
+ "summary": {
1679
+ "overall_accuracy": 0.845,
1680
+ "digit_accuracy": 0.9651098901098901,
1681
+ "total_examples": 2600,
1682
+ "n_splits": 24
1683
+ }
1684
+ },
1685
+ "sorl_overall_accuracy": 0.845,
1686
+ "sft_overall_accuracy": 0.3503846153846154
1687
+ }
add_sub_sorl_v1_abs1_K1_10K/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b657f0d2d656ff6596e9436a64636c48f9bd79a42360a8f96f74689d1ad83f91
3
+ size 650266922
add_sub_sorl_v1_abs1_K1_10K/train_config.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_rollouts": 4,
3
+ "K": 1,
4
+ "max_iterations": 2,
5
+ "memory_span_abs": 1792,
6
+ "memory_span_traj": 1792,
7
+ "temperature": 1.0,
8
+ "ar_search": false,
9
+ "response_only_abs": false,
10
+ "alpha_info_gain": 10.0,
11
+ "alpha_abs": 0.1,
12
+ "alpha_soft_zipf": 1.0,
13
+ "alpha_ortho": 0.0,
14
+ "alpha_anchor": 0.0,
15
+ "alpha_jacobi": 0.0,
16
+ "decay": 0.8,
17
+ "target_vocab_util": 0.8,
18
+ "min_abs_ppl": 0.0,
19
+ "zipf_alpha": 1.0,
20
+ "lr": 8e-05,
21
+ "emb_lr_mult": 1.0,
22
+ "weight_decay": 0.01,
23
+ "warmup_steps": 100,
24
+ "cooldown_frac": 0.4,
25
+ "max_grad_norm": 1.0,
26
+ "vq_abs_pretrain_steps": 0,
27
+ "vq_abs_pretrain_lr": 0.001,
28
+ "vq_abs_pretrain_layer": -1,
29
+ "vq_abs_pretrain_batch_size": 256,
30
+ "vq_abs_pretrain_target_vectors": 20000,
31
+ "batch_size": 64,
32
+ "gradient_accumulation_steps": 1,
33
+ "num_epochs": 20,
34
+ "emb_warmup_steps": 0,
35
+ "log_every": 50,
36
+ "eval_every": 156,
37
+ "save_every": 999999,
38
+ "eval_samples": 100,
39
+ "output_dir": "ckpt/sweep/as_sorl_abs1_K1_10K_2L3H510d",
40
+ "eval_K": 4,
41
+ "alpha_traj": 0.0,
42
+ "corrupt_method": "shuffle",
43
+ "corrupt_ratio": 0.3,
44
+ "alpha_contrastive": 1.0,
45
+ "gamma_contrastive": 0.5,
46
+ "alpha_masked_traj": 0.0,
47
+ "mask_nl_ratio": 0.3,
48
+ "mask_nl_mode": "fixed",
49
+ "mask_nl_fixed_id": 0,
50
+ "use_ste": true,
51
+ "n_inner": 1,
52
+ "random_K": null,
53
+ "strip_suffix": null,
54
+ "compress_prefix": null,
55
+ "random_mem_span": null,
56
+ "warmup_ratio": 0.03,
57
+ "beta2": 0.999,
58
+ "seed": 42,
59
+ "n_digits": 6,
60
+ "n_layer": 2,
61
+ "n_head": 3,
62
+ "n_embd": 510,
63
+ "ops": "add_sub",
64
+ "abs_vocab": 1,
65
+ "dataset_size": 10000,
66
+ "mode": "sorl",
67
+ "device": "cuda",
68
+ "push_to_hub": true,
69
+ "no_wandb": false,
70
+ "n_params": 162490082,
71
+ "run_name": "add_sub_sorl_v1_abs1_K1_10K",
72
+ "git_commit": "f835493c19eb98267697007042c9d440cad2afbb",
73
+ "timestamp": "2026-04-16T04:12:31.461525+00:00",
74
+ "tokenizer": "Qwen/Qwen3-0.6B",
75
+ "dataset_repo": "thoughtworks/arithmetic-sorl-data",
76
+ "dataset_config": "add_sub_6digit",
77
+ "train_dataset": "fixed_train/train_10K_seed42.pt",
78
+ "model_repo": "thoughtworks/arithmetic-sorl",
79
+ "trainer_version": "v1",
80
+ "wandb_run_id": "oqawpqvh",
81
+ "wandb_url": "https://wandb.ai/nlp_and_interpretability/sorl-arithmetic/runs/oqawpqvh",
82
+ "eval_final_dataset": "eval_sets/eval_add_sub_6d_N100_seed42.json",
83
+ "eval_epoch_dataset": "eval_sets/eval_add_sub_6d_N25_seed42.json",
84
+ "eval_hf_repo": "thoughtworks/arithmetic-sorl-data",
85
+ "config_hash": "0a607564f712",
86
+ "final_accuracy": 0.845,
87
+ "sft_accuracy": 0.3503846153846154,
88
+ "eval_method": "ArithmeticEvaluator"
89
+ }