bdellabe commited on
Commit
620829d
·
verified ·
1 Parent(s): 37c4dbf

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -68,7 +68,7 @@
68
  "dynamic": false,
69
  "group_size": null,
70
  "num_bits": 8,
71
- "observer": "minmax",
72
  "observer_kwargs": {},
73
  "scale_dtype": null,
74
  "strategy": "block",
 
68
  "dynamic": false,
69
  "group_size": null,
70
  "num_bits": 8,
71
+ "observer": "memoryless_minmax",
72
  "observer_kwargs": {},
73
  "scale_dtype": null,
74
  "strategy": "block",
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b496e9fc3c141af9803beecbf3984f164f0482fed9ed607a7a0836e93935978
3
- size 4998513864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69093fb4b3dab0e5e21cd61bf0294417406f74dbfd915a6fa5ae9a5c3dcfc7fb
3
+ size 5000218408
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b72a878a3a95718a8a8bd0eb3503ae538d3ccb272a0c88b02dd5e114c295265
3
- size 4998892464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efc05f7a7f1105612716b93ed8546fcd9aa1ef248c81525569af8e6454ae4369
3
+ size 4997712816
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f9d29ecd5296f619d966b323f5546fe276c72dbe0125f5c2da35dc926f8cfaf
3
- size 4999146688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b58c2678e2c9b85f466e2042fd3705b2fa535c1323d8770e4436befa7c7eb62
3
+ size 4998098120
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a646d15af273fbf82c44cb6d78c6854153fa076074a1ce14b04fcb233e21890e
3
- size 1206834408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:527ad81efaa289cf16dfa80b06e8c7390a529050dba46bdf7e37b0bd2b280ff8
3
+ size 1203819144
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 15710952112,
4
- "total_size": 16202085728
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00004-of-00004.safetensors",
@@ -3575,8 +3575,8 @@
3575
  "model.layers.17.mlp.experts.59.up_proj.weight_scale": "model-00003-of-00004.safetensors",
3576
  "model.layers.17.mlp.experts.6.down_proj.weight": "model-00003-of-00004.safetensors",
3577
  "model.layers.17.mlp.experts.6.down_proj.weight_scale": "model-00003-of-00004.safetensors",
3578
- "model.layers.17.mlp.experts.6.gate_proj.weight": "model-00003-of-00004.safetensors",
3579
- "model.layers.17.mlp.experts.6.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
3580
  "model.layers.17.mlp.experts.6.up_proj.weight": "model-00003-of-00004.safetensors",
3581
  "model.layers.17.mlp.experts.6.up_proj.weight_scale": "model-00003-of-00004.safetensors",
3582
  "model.layers.17.mlp.experts.60.down_proj.weight": "model-00003-of-00004.safetensors",
@@ -7077,8 +7077,8 @@
7077
  "model.layers.25.mlp.experts.41.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
7078
  "model.layers.25.mlp.experts.41.up_proj.weight": "model-00003-of-00004.safetensors",
7079
  "model.layers.25.mlp.experts.41.up_proj.weight_scale": "model-00003-of-00004.safetensors",
7080
- "model.layers.25.mlp.experts.42.down_proj.weight": "model-00004-of-00004.safetensors",
7081
- "model.layers.25.mlp.experts.42.down_proj.weight_scale": "model-00004-of-00004.safetensors",
7082
  "model.layers.25.mlp.experts.42.gate_proj.weight": "model-00003-of-00004.safetensors",
7083
  "model.layers.25.mlp.experts.42.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
7084
  "model.layers.25.mlp.experts.42.up_proj.weight": "model-00003-of-00004.safetensors",
@@ -9857,8 +9857,8 @@
9857
  "model.layers.8.mlp.experts.36.up_proj.weight_scale": "model-00001-of-00004.safetensors",
9858
  "model.layers.8.mlp.experts.37.down_proj.weight": "model-00002-of-00004.safetensors",
9859
  "model.layers.8.mlp.experts.37.down_proj.weight_scale": "model-00002-of-00004.safetensors",
9860
- "model.layers.8.mlp.experts.37.gate_proj.weight": "model-00002-of-00004.safetensors",
9861
- "model.layers.8.mlp.experts.37.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
9862
  "model.layers.8.mlp.experts.37.up_proj.weight": "model-00002-of-00004.safetensors",
9863
  "model.layers.8.mlp.experts.37.up_proj.weight_scale": "model-00002-of-00004.safetensors",
9864
  "model.layers.8.mlp.experts.38.down_proj.weight": "model-00002-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 15707413168,
4
+ "total_size": 16198546784
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00004-of-00004.safetensors",
 
3575
  "model.layers.17.mlp.experts.59.up_proj.weight_scale": "model-00003-of-00004.safetensors",
3576
  "model.layers.17.mlp.experts.6.down_proj.weight": "model-00003-of-00004.safetensors",
3577
  "model.layers.17.mlp.experts.6.down_proj.weight_scale": "model-00003-of-00004.safetensors",
3578
+ "model.layers.17.mlp.experts.6.gate_proj.weight": "model-00002-of-00004.safetensors",
3579
+ "model.layers.17.mlp.experts.6.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
3580
  "model.layers.17.mlp.experts.6.up_proj.weight": "model-00003-of-00004.safetensors",
3581
  "model.layers.17.mlp.experts.6.up_proj.weight_scale": "model-00003-of-00004.safetensors",
3582
  "model.layers.17.mlp.experts.60.down_proj.weight": "model-00003-of-00004.safetensors",
 
7077
  "model.layers.25.mlp.experts.41.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
7078
  "model.layers.25.mlp.experts.41.up_proj.weight": "model-00003-of-00004.safetensors",
7079
  "model.layers.25.mlp.experts.41.up_proj.weight_scale": "model-00003-of-00004.safetensors",
7080
+ "model.layers.25.mlp.experts.42.down_proj.weight": "model-00003-of-00004.safetensors",
7081
+ "model.layers.25.mlp.experts.42.down_proj.weight_scale": "model-00003-of-00004.safetensors",
7082
  "model.layers.25.mlp.experts.42.gate_proj.weight": "model-00003-of-00004.safetensors",
7083
  "model.layers.25.mlp.experts.42.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
7084
  "model.layers.25.mlp.experts.42.up_proj.weight": "model-00003-of-00004.safetensors",
 
9857
  "model.layers.8.mlp.experts.36.up_proj.weight_scale": "model-00001-of-00004.safetensors",
9858
  "model.layers.8.mlp.experts.37.down_proj.weight": "model-00002-of-00004.safetensors",
9859
  "model.layers.8.mlp.experts.37.down_proj.weight_scale": "model-00002-of-00004.safetensors",
9860
+ "model.layers.8.mlp.experts.37.gate_proj.weight": "model-00001-of-00004.safetensors",
9861
+ "model.layers.8.mlp.experts.37.gate_proj.weight_scale": "model-00001-of-00004.safetensors",
9862
  "model.layers.8.mlp.experts.37.up_proj.weight": "model-00002-of-00004.safetensors",
9863
  "model.layers.8.mlp.experts.37.up_proj.weight_scale": "model-00002-of-00004.safetensors",
9864
  "model.layers.8.mlp.experts.38.down_proj.weight": "model-00002-of-00004.safetensors",