JunHowie commited on
Commit
c9f2192
·
verified ·
1 Parent(s): 2e1ee8d

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -35,3 +35,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text
37
  figures/agent_teams.gif filter=lfs diff=lfs merge=lfs -text
 
 
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text
37
  figures/agent_teams.gif filter=lfs diff=lfs merge=lfs -text
38
+ figures/banner.png filter=lfs diff=lfs merge=lfs -text
39
+ figures/mle_bench.png filter=lfs diff=lfs merge=lfs -text
40
+ figures/agent_harness.png filter=lfs diff=lfs merge=lfs -text
.mdl ADDED
Binary file (48 Bytes). View file
 
chat_template.jinja ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# ----------‑‑‑ special token variables ‑‑‑---------- #}
2
+ {%- set toolcall_begin_token = '<minimax:tool_call>' -%}
3
+ {%- set toolcall_end_token = '</minimax:tool_call>' -%}
4
+ {#- Tool Rendering Functions ============================================== -#}
5
+ {%- macro render_tool_namespace(namespace_name, tool_list) -%}
6
+ {%- for tool in tool_list -%}
7
+ <tool>{{ tool.function | tojson(ensure_ascii=False) }}</tool>
8
+ {% endfor -%}
9
+ {%- endmacro -%}
10
+ {%- macro visible_text(content) -%}
11
+ {%- if content is string -%}
12
+ {{ content }}
13
+ {%- elif content is iterable and content is not mapping -%}
14
+ {%- for item in content -%}
15
+ {%- if item is mapping and item.type == 'text' -%}
16
+ {{- item.text }}
17
+ {%- elif item is string -%}
18
+ {{- item }}
19
+ {%- endif -%}
20
+ {%- endfor -%}
21
+ {%- else -%}
22
+ {{- content }}
23
+ {%- endif -%}
24
+ {%- endmacro -%}
25
+ {#- System Message Construction ============================================ -#}
26
+ {%- macro build_system_message(system_message) -%}
27
+ {%- if system_message and system_message.content -%}
28
+ {{- visible_text(system_message.content) }}
29
+ {%- else -%}
30
+ {%- if model_identity is not defined -%}
31
+ {%- set model_identity = "You are a helpful assistant. Your name is MiniMax-M2.7 and is built by MiniMax." -%}
32
+ {%- endif -%}
33
+ {{- model_identity }}
34
+ {%- endif -%}
35
+
36
+ {#- Handle current_date -#}
37
+ {%- if system_message and system_message.current_date -%}
38
+ {{- '\n' ~ 'Current date: ' + system_message.current_date }}
39
+ {%- endif -%}
40
+ {#- Handle current_location -#}
41
+ {%- if system_message and system_message.current_location -%}
42
+ {{- '\n' ~ 'Current location: ' + system_message.current_location }}
43
+ {%- endif -%}
44
+ {%- endmacro -%}
45
+ {#- Main Template Logic ================================================= -#}
46
+ {#- Extract system message (only first message if it's system) -#}
47
+ {%- set system_message = none -%}
48
+ {%- set conversation_messages = messages -%}
49
+ {%- if messages and messages[0].role == "system" -%}
50
+ {%- set system_message = messages[0] -%}
51
+ {%- set conversation_messages = messages[1:] -%}
52
+ {%- endif -%}
53
+ {#- Get the last user message turn, for interleved thinking -#}
54
+ {%- set ns = namespace(last_user_index=-1) %}
55
+ {% for m in conversation_messages %}
56
+ {%- if m.role == 'user' %}
57
+ {% set ns.last_user_index = loop.index0 -%}
58
+ {%- endif %}
59
+ {%- endfor %}
60
+ {#- Render system message -#}
61
+ {{- ']~!b[' ~ ']~b]system' ~ '\n' }}
62
+ {{- build_system_message(system_message) }}
63
+ {#- Render tools if available -#}
64
+ {%- if tools -%}
65
+ {{- '\n\n' ~ '# Tools' ~ '\n' ~ 'You may call one or more tools to assist with the user query.\nHere are the tools available in JSONSchema format:' ~ '\n' }}
66
+ {{- '\n' ~ '<tools>' ~ '\n' }}
67
+ {{- render_tool_namespace("functions", tools) }}
68
+ {{- '</tools>' ~ '\n\n' }}
69
+ {{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\n' }}
70
+ {{- '\n' ~ toolcall_begin_token }}
71
+ <invoke name="tool-name-1">
72
+ <parameter name="param-key-1">param-value-1</parameter>
73
+ <parameter name="param-key-2">param-value-2</parameter>
74
+ ...
75
+ </invoke>
76
+ {{- '\n' ~ toolcall_end_token }}
77
+ {%- endif -%}
78
+ {{- '[e~[\n' }}
79
+
80
+ {#- Render messages -#}
81
+ {%- set last_tool_call = namespace(name=none) -%}
82
+ {%- for message in conversation_messages -%}
83
+ {%- if message.role == 'assistant' -%}
84
+ {#- Only render reasoning_content if no user message follows -#}
85
+ {{- ']~b]ai' ~ '\n' }}
86
+
87
+ {%- set reasoning_content = '' %}
88
+ {%- set content = visible_text(message.content) %}
89
+ {%- if message.reasoning_content is string %}
90
+ {%- set reasoning_content = message.reasoning_content %}
91
+ {%- else %}
92
+ {%- if '</think>' in content %}
93
+ {%- set reasoning_content = content.split('</think>')[0].strip('\n').split('<think>')[-1].strip('\n') %}
94
+ {%- set content = content.split('</think>')[-1].strip('\n') %}
95
+ {%- endif %}
96
+ {%- endif %}
97
+ {%- if reasoning_content and loop.index0 > ns.last_user_index -%}
98
+ {{- '<think>' ~ '\n' ~ reasoning_content ~ '\n' ~ '</think>' ~ '\n\n' }}
99
+ {%- endif -%}
100
+ {%- if content -%}
101
+ {{- content }}
102
+ {%- endif -%}
103
+ {%- if message.tool_calls -%}
104
+ {{- '\n' ~ toolcall_begin_token ~ '\n' }}
105
+
106
+ {%- for tool_call in message.tool_calls -%}
107
+ {%- if tool_call.function %}
108
+ {%- set tool_call = tool_call.function %}
109
+ {%- endif %}
110
+ {{- '<invoke name="' + tool_call.name + '">' }}
111
+ {% set _args = tool_call.arguments %}
112
+ {%- for k, v in _args.items() %}
113
+ {{- '<parameter name="' + k + '">' }}
114
+ {{- v | tojson(ensure_ascii=False) if v is not string else v }}
115
+ {{- '</parameter>' }}
116
+ {% endfor %}
117
+ {{- '</invoke>' ~ '\n' }}
118
+ {%- endfor -%}
119
+
120
+ {{- toolcall_end_token}}
121
+ {%- set last_tool_call.name = message.tool_calls[-1].name -%}
122
+ {%- else -%}
123
+ {%- set last_tool_call.name = none -%}
124
+ {%- endif -%}
125
+ {{- '[e~[' ~ '\n' }}
126
+
127
+ {%- elif message.role == 'tool' -%}
128
+ {%- if last_tool_call.name is none -%}
129
+ {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
130
+ {%- endif -%}
131
+ {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%}
132
+ {{- ']~b]tool' }}
133
+ {%- endif -%}
134
+ {%- if message.content is string -%}
135
+ {{- '\n<response>' }}
136
+ {{- message.content }}
137
+ {{- '</response>' }}
138
+ {%- else -%}
139
+ {%- for tr in message.content -%}
140
+ {{- '\n<response>' }}
141
+ {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }}
142
+ {{- '\n</response>' }}
143
+ {%- endfor -%}
144
+ {%- endif -%}
145
+ {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}
146
+ {{- '[e~[\n' -}}
147
+ {%- endif -%}
148
+
149
+ {%- elif message.role == 'user' -%}
150
+ {{- ']~b]user' ~ '\n' }}
151
+ {{- visible_text(message.content) }}
152
+ {{- '[e~[' ~ '\n' }}
153
+ {%- endif -%}
154
+ {%- endfor -%}
155
+
156
+ {#- Generation prompt -#}
157
+ {%- if add_generation_prompt -%}
158
+ {{- ']~b]ai' ~ '\n' ~ '<think>' ~ '\n' }}
159
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name_or_path": "tclf90/MiniMax-M2.7-AWQ",
3
+ "architectures": [
4
+ "MiniMaxM2ForCausalLM"
5
+ ],
6
+ "attn_type_list": [
7
+ 1,
8
+ 1,
9
+ 1,
10
+ 1,
11
+ 1,
12
+ 1,
13
+ 1,
14
+ 1,
15
+ 1,
16
+ 1,
17
+ 1,
18
+ 1,
19
+ 1,
20
+ 1,
21
+ 1,
22
+ 1,
23
+ 1,
24
+ 1,
25
+ 1,
26
+ 1,
27
+ 1,
28
+ 1,
29
+ 1,
30
+ 1,
31
+ 1,
32
+ 1,
33
+ 1,
34
+ 1,
35
+ 1,
36
+ 1,
37
+ 1,
38
+ 1,
39
+ 1,
40
+ 1,
41
+ 1,
42
+ 1,
43
+ 1,
44
+ 1,
45
+ 1,
46
+ 1,
47
+ 1,
48
+ 1,
49
+ 1,
50
+ 1,
51
+ 1,
52
+ 1,
53
+ 1,
54
+ 1,
55
+ 1,
56
+ 1,
57
+ 1,
58
+ 1,
59
+ 1,
60
+ 1,
61
+ 1,
62
+ 1,
63
+ 1,
64
+ 1,
65
+ 1,
66
+ 1,
67
+ 1,
68
+ 1
69
+ ],
70
+ "auto_map": {
71
+ "AutoConfig": "configuration_minimax_m2.MiniMaxM2Config",
72
+ "AutoModelForCausalLM": "modeling_minimax_m2.MiniMaxM2ForCausalLM"
73
+ },
74
+ "dtype": "float16",
75
+ "head_dim": 128,
76
+ "hidden_act": "silu",
77
+ "hidden_size": 3072,
78
+ "intermediate_size": 1536,
79
+ "max_position_embeddings": 196608,
80
+ "model_type": "minimax_m2",
81
+ "mtp_transformer_layers": 1,
82
+ "num_attention_heads": 48,
83
+ "num_experts_per_tok": 8,
84
+ "num_hidden_layers": 62,
85
+ "num_key_value_heads": 8,
86
+ "num_local_experts": 256,
87
+ "num_mtp_modules": 3,
88
+ "qk_norm_type": "per_layer",
89
+ "rms_norm_eps": 1e-06,
90
+ "rope_theta": 5000000,
91
+ "rotary_dim": 64,
92
+ "scoring_func": "sigmoid",
93
+ "shared_intermediate_size": 0,
94
+ "tie_word_embeddings": false,
95
+ "transformers_version": "4.46.1",
96
+ "use_cache": true,
97
+ "use_mtp": true,
98
+ "use_qk_norm": true,
99
+ "use_routing_bias": true,
100
+ "vocab_size": 200064,
101
+ "quantization_config": {
102
+ "quant_method": "awq",
103
+ "bits": 4,
104
+ "group_size": 128,
105
+ "version": "gemm",
106
+ "zero_point": true,
107
+ "modules_to_not_convert": [
108
+ "self_attn",
109
+ "block_sparse_moe.gate",
110
+ "model.layers.0."
111
+ ]
112
+ }
113
+ }
docs/sglang_deploy_guide.md ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MiniMax M2.7 Model SGLang Deployment Guide
2
+
3
+ [English Version](./sglang_deploy_guide.md) | [Chinese Version](./sglang_deploy_guide_cn.md)
4
+
5
+ We recommend using [SGLang](https://github.com/sgl-project/sglang) to deploy the [MiniMax-M2.7](https://huggingface.co/MiniMaxAI/MiniMax-M2.7) model. SGLang is a high-performance inference engine with excellent serving throughput, efficient and intelligent memory management, powerful batch request processing capabilities, and deeply optimized underlying performance. We recommend reviewing SGLang's official documentation to check hardware compatibility before deployment.
6
+
7
+ ## Applicable Models
8
+
9
+ This document applies to the following models. You only need to change the model name during deployment.
10
+
11
+ - [MiniMaxAI/MiniMax-M2.7](https://huggingface.co/MiniMaxAI/MiniMax-M2.7)
12
+ - [MiniMaxAI/MiniMax-M2.5](https://huggingface.co/MiniMaxAI/MiniMax-M2.5)
13
+ - [MiniMaxAI/MiniMax-M2.1](https://huggingface.co/MiniMaxAI/MiniMax-M2.1)
14
+ - [MiniMaxAI/MiniMax-M2](https://huggingface.co/MiniMaxAI/MiniMax-M2)
15
+
16
+ The deployment process is illustrated below using MiniMax-M2.7 as an example.
17
+
18
+ ## System Requirements
19
+
20
+ - OS: Linux
21
+
22
+ - Python: 3.9 - 3.12
23
+
24
+ - GPU:
25
+
26
+ - compute capability 7.0 or higher
27
+
28
+ - Memory requirements: 220 GB for weights, 240 GB per 1M context tokens
29
+
30
+ The following are recommended configurations; actual requirements should be adjusted based on your use case:
31
+
32
+ - **96G x4** GPU: Supports a total KV Cache capacity of 400K tokens.
33
+
34
+ - **144G x8** GPU: Supports a total KV Cache capacity of up to 3M tokens.
35
+
36
+ > **Note**: The values above represent the total aggregate hardware KV Cache capacity. The maximum context length per individual sequence remains **196K** tokens.
37
+
38
+ ## Deployment with Python
39
+
40
+ It is recommended to use a virtual environment (such as **venv**, **conda**, or **uv**) to avoid dependency conflicts.
41
+
42
+ We recommend installing SGLang in a fresh Python environment:
43
+
44
+ ```bash
45
+ uv venv
46
+ source .venv/bin/activate
47
+ uv pip install sglang
48
+ ```
49
+
50
+ Run the following command to start the SGLang server. SGLang will automatically download and cache the MiniMax-M2.7 model from Hugging Face.
51
+
52
+ 4-GPU deployment command:
53
+
54
+ ```bash
55
+ python -m sglang.launch_server \
56
+ --model-path MiniMaxAI/MiniMax-M2.7 \
57
+ --tp-size 4 \
58
+ --tool-call-parser minimax-m2 \
59
+ --reasoning-parser minimax-append-think \
60
+ --host 0.0.0.0 \
61
+ --trust-remote-code \
62
+ --port 8000 \
63
+ --mem-fraction-static 0.85
64
+ ```
65
+
66
+ 8-GPU deployment command:
67
+
68
+ ```bash
69
+ python -m sglang.launch_server \
70
+ --model-path MiniMaxAI/MiniMax-M2.7 \
71
+ --tp-size 8 \
72
+ --ep-size 8 \
73
+ --tool-call-parser minimax-m2 \
74
+ --trust-remote-code \
75
+ --host 0.0.0.0 \
76
+ --reasoning-parser minimax-append-think \
77
+ --port 8000 \
78
+ --mem-fraction-static 0.85
79
+ ```
80
+
81
+ ## Testing Deployment
82
+
83
+ After startup, you can test the SGLang OpenAI-compatible API with the following command:
84
+
85
+ ```bash
86
+ curl http://localhost:8000/v1/chat/completions \
87
+ -H "Content-Type: application/json" \
88
+ -d '{
89
+ "model": "MiniMaxAI/MiniMax-M2.7",
90
+ "messages": [
91
+ {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
92
+ {"role": "user", "content": [{"type": "text", "text": "Who won the world series in 2020?"}]}
93
+ ]
94
+ }'
95
+ ```
96
+
97
+ ## Common Issues
98
+
99
+ ### MiniMax-M2 model is not currently supported
100
+
101
+ Please upgrade to the latest stable version, >= v0.5.4.post1.
102
+
103
+ ## Getting Support
104
+
105
+ If you encounter any issues while deploying the MiniMax model:
106
+
107
+ - Contact our technical support team through official channels such as email at [model@minimax.io](mailto:model@minimax.io)
108
+
109
+ - Submit an issue on our [GitHub](https://github.com/MiniMax-AI) repository
110
+
111
+ We continuously optimize the deployment experience for our models. Feedback is welcome!
112
+
docs/tool_calling_guide.md ADDED
@@ -0,0 +1,487 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MiniMax-M2.7 Tool Calling Guide
2
+
3
+ [English Version](./tool_calling_guide.md) | [Chinese Version](./tool_calling_guide_cn.md)
4
+
5
+ MiniMax-M2.7 supports the same toolcall syntax as MiniMax-M2.
6
+
7
+ ## Introduction
8
+
9
+ The MiniMax-M2.7 model supports tool calling capabilities, enabling the model to identify when external tools need to be called and output tool call parameters in a structured format. This document provides detailed instructions on how to use the tool calling features of MiniMax-M2.7.
10
+
11
+ ## Basic Example
12
+
13
+ The following Python script implements a weather query tool call example based on the OpenAI SDK:
14
+
15
+ ```python
16
+ from openai import OpenAI
17
+ import json
18
+
19
+ client = OpenAI(base_url="http://localhost:8000/v1", api_key="dummy")
20
+
21
+ def get_weather(location: str, unit: str):
22
+ return f"Getting the weather for {location} in {unit}..."
23
+
24
+ tool_functions = {"get_weather": get_weather}
25
+
26
+ tools = [{
27
+ "type": "function",
28
+ "function": {
29
+ "name": "get_weather",
30
+ "description": "Get the current weather in a given location",
31
+ "parameters": {
32
+ "type": "object",
33
+ "properties": {
34
+ "location": {"type": "string", "description": "City and state, e.g., 'San Francisco, CA'"},
35
+ "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
36
+ },
37
+ "required": ["location", "unit"]
38
+ }
39
+ }
40
+ }]
41
+
42
+ response = client.chat.completions.create(
43
+ model=client.models.list().data[0].id,
44
+ messages=[{"role": "user", "content": "What's the weather like in San Francisco? use celsius."}],
45
+ tools=tools,
46
+ tool_choice="auto"
47
+ )
48
+
49
+ print(response)
50
+
51
+ tool_call = response.choices[0].message.tool_calls[0].function
52
+ print(f"Function called: {tool_call.name}")
53
+ print(f"Arguments: {tool_call.arguments}")
54
+ print(f"Result: {get_weather(**json.loads(tool_call.arguments))}")
55
+ ```
56
+
57
+ **Output Example:**
58
+ ```
59
+ Function called: get_weather
60
+ Arguments: {"location": "San Francisco, CA", "unit": "celsius"}
61
+ Result: Getting the weather for San Francisco, CA in celsius...
62
+ ```
63
+
64
+ ## Manually Parsing Model Output
65
+
66
+ **We strongly recommend using vLLM or SGLang for parsing tool calls.** If you cannot use the built-in parser of inference engines (e.g., vLLM and SGLang) that support MiniMax-M2.7, or need to use other inference frameworks (such as transformers, TGI, etc.), you can manually parse the model's raw output using the following method. This approach requires you to parse the XML tag format of the model output yourself.
67
+
68
+ ### Example Using Transformers
69
+
70
+ Here is a complete example using the transformers library:
71
+
72
+ ```python
73
+ from transformers import AutoTokenizer
74
+
75
+ def get_default_tools():
76
+ return [
77
+ {
78
+ "name": "get_current_weather",
79
+ "description": "Get the latest weather for a location",
80
+ "parameters": {
81
+ "type": "object",
82
+ "properties": {
83
+ "location": {
84
+ "type": "string",
85
+ "description": "A certain city, such as Beijing, Shanghai"
86
+ }
87
+ },
88
+ }
89
+ "required": ["location"],
90
+ "type": "object"
91
+ }
92
+ ]
93
+
94
+ # Load model and tokenizer
95
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
96
+ prompt = "What's the weather like in Shanghai today?"
97
+ messages = [
98
+ {"role": "system", "content": "You are a helpful assistant."},
99
+ {"role": "user", "content": prompt},
100
+ ]
101
+
102
+ # Enable function calling tools
103
+ tools = get_default_tools()
104
+
105
+ # Apply chat template and include tool definitions
106
+ text = tokenizer.apply_chat_template(
107
+ messages,
108
+ tokenize=False,
109
+ add_generation_prompt=True,
110
+ tools=tools
111
+ )
112
+
113
+ # Send request (using any inference service)
114
+ import requests
115
+ payload = {
116
+ "model": "MiniMaxAI/MiniMax-M2.7",
117
+ "prompt": text,
118
+ "max_tokens": 4096
119
+ }
120
+ response = requests.post(
121
+ "http://localhost:8000/v1/completions",
122
+ headers={"Content-Type": "application/json"},
123
+ json=payload,
124
+ stream=False,
125
+ )
126
+
127
+ # Model output needs manual parsing
128
+ raw_output = response.json()["choices"][0]["text"]
129
+ print("Raw output:", raw_output)
130
+
131
+ # Use the parsing function below to process the output
132
+ tool_calls = parse_tool_calls(raw_output, tools)
133
+ ```
134
+
135
+ ## 🛠️ Tool Call Definition
136
+
137
+ ### Tool Structure
138
+
139
+ Tool calls need to define the `tools` field in the request body. Each tool consists of the following parts:
140
+
141
+ ```json
142
+ {
143
+ "tools": [
144
+ {
145
+ "name": "search_web",
146
+ "description": "Search function.",
147
+ "parameters": {
148
+ "properties": {
149
+ "query_list": {
150
+ "description": "Keywords for search, list should contain 1 element.",
151
+ "items": { "type": "string" },
152
+ "type": "array"
153
+ },
154
+ "query_tag": {
155
+ "description": "Category of query",
156
+ "items": { "type": "string" },
157
+ "type": "array"
158
+ }
159
+ },
160
+ "required": [ "query_list", "query_tag" ],
161
+ "type": "object"
162
+ }
163
+ }
164
+ ]
165
+ }
166
+ ```
167
+
168
+ **Field Descriptions:**
169
+ - `name`: Function name
170
+ - `description`: Function description
171
+ - `parameters`: Function parameter definition
172
+ - `properties`: Parameter property definition, where key is the parameter name and value contains detailed parameter description
173
+ - `required`: List of required parameters
174
+ - `type`: Parameter type (usually "object")
175
+
176
+ ### Internal Processing Format
177
+
178
+ When processing within the MiniMax-M2.7 model, tool definitions are converted to a special format and concatenated to the input text. Here is a complete example:
179
+
180
+ ```
181
+ ]~!b[]~b]system
182
+ You are a helpful assistant.
183
+
184
+ # Tools
185
+ You may call one or more tools to assist with the user query.
186
+ Here are the tools available in JSONSchema format:
187
+
188
+ <tools>
189
+ <tool>{"name": "search_web", "description": "Search function.", "parameters": {"type": "object", "properties": {"query_list": {"type": "array", "items": {"type": "string"}, "description": "Keywords for search, list should contain 1 element."}, "query_tag": {"type": "array", "items": {"type": "string"}, "description": "Category of query"}}, "required": ["query_list", "query_tag"]}}</tool>
190
+ </tools>
191
+
192
+ When making tool calls, use XML format to invoke tools and pass parameters:
193
+
194
+ <minimax:tool_call>
195
+ <invoke name="tool-name-1">
196
+ <parameter name="param-key-1">param-value-1</parameter>
197
+ <parameter name="param-key-2">param-value-2</parameter>
198
+ ...
199
+ </invoke>
200
+ [e~[
201
+ ]~b]user
202
+ When were the latest announcements from OpenAI and Gemini?[e~[
203
+ ]~b]ai
204
+ <think>
205
+ ```
206
+
207
+ **Format Description:**
208
+
209
+ - `]~!b[]~b]system`: System message start marker
210
+ - `[e~[`: Message end marker
211
+ - `]~b]user`: User message start marker
212
+ - `]~b]ai`: Assistant message start marker
213
+ - `]~b]tool`: Tool result message start marker
214
+ - `<tools>...</tools>`: Tool definition area, each tool is wrapped with `<tool>` tag, content is JSON Schema
215
+ - `<minimax:tool_call>...</minimax:tool_call>`: Tool call area
216
+ - `<think>...</think>`: Thinking process marker during generation
217
+
218
+ ### Model Output Format
219
+
220
+ MiniMax-M2.7 uses structured XML tag format:
221
+
222
+ ```xml
223
+ <minimax:tool_call>
224
+ <invoke name="search_web">
225
+ <parameter name="query_tag">["technology", "events"]</parameter>
226
+ <parameter name="query_list">["\"OpenAI\" \"latest\" \"release\""]</parameter>
227
+ </invoke>
228
+ <invoke name="search_web">
229
+ <parameter name="query_tag">["technology", "events"]</parameter>
230
+ <parameter name="query_list">["\"Gemini\" \"latest\" \"release\""]</parameter>
231
+ </invoke>
232
+ </minimax:tool_call>
233
+ ```
234
+
235
+ Each tool call uses the `<invoke name="function_name">` tag, and parameters use the `<parameter name="parameter_name">` tag wrapper.
236
+
237
+ ## Manually Parsing Tool Call Results
238
+
239
+ ### Parsing Tool Calls
240
+
241
+ MiniMax-M2.7 uses structured XML tags, which require a different parsing approach. The core function is as follows:
242
+
243
+ ```python
244
+ import re
245
+ import json
246
+ from typing import Any, Optional, List, Dict
247
+
248
+
249
+ def extract_name(name_str: str) -> str:
250
+ """Extract name from quoted string"""
251
+ name_str = name_str.strip()
252
+ if name_str.startswith('"') and name_str.endswith('"'):
253
+ return name_str[1:-1]
254
+ elif name_str.startswith("'") and name_str.endswith("'"):
255
+ return name_str[1:-1]
256
+ return name_str
257
+
258
+
259
+ def convert_param_value(value: str, param_type: str) -> Any:
260
+ """Convert parameter value based on parameter type"""
261
+ if value.lower() == "null":
262
+ return None
263
+
264
+ param_type = param_type.lower()
265
+
266
+ if param_type in ["string", "str", "text"]:
267
+ return value
268
+ elif param_type in ["integer", "int"]:
269
+ try:
270
+ return int(value)
271
+ except (ValueError, TypeError):
272
+ return value
273
+ elif param_type in ["number", "float"]:
274
+ try:
275
+ val = float(value)
276
+ return val if val != int(val) else int(val)
277
+ except (ValueError, TypeError):
278
+ return value
279
+ elif param_type in ["boolean", "bool"]:
280
+ return value.lower() in ["true", "1"]
281
+ elif param_type in ["object", "array"]:
282
+ try:
283
+ return json.loads(value)
284
+ except json.JSONDecodeError:
285
+ return value
286
+ else:
287
+ # Try JSON parsing, return string if failed
288
+ try:
289
+ return json.loads(value)
290
+ except json.JSONDecodeError:
291
+ return value
292
+
293
+
294
+ def parse_tool_calls(model_output: str, tools: Optional[List[Dict]] = None) -> List[Dict]:
295
+ """
296
+ Extract all tool calls from model output
297
+
298
+ Args:
299
+ model_output: Complete output text from the model
300
+ tools: Tool definition list for getting parameter type information, format can be:
301
+ - [{"name": "...", "parameters": {...}}]
302
+ - [{"type": "function", "function": {"name": "...", "parameters": {...}}}]
303
+
304
+ Returns:
305
+ Parsed tool call list, each element contains name and arguments fields
306
+
307
+ Example:
308
+ >>> tools = [{
309
+ ... "name": "get_weather",
310
+ ... "parameters": {
311
+ ... "type": "object",
312
+ ... "properties": {
313
+ ... "location": {"type": "string"},
314
+ ... "unit": {"type": "string"}
315
+ ... }
316
+ ... }
317
+ ... }]
318
+ >>> output = '''<minimax:tool_call>
319
+ ... <invoke name="get_weather">
320
+ ... <parameter name="location">San Francisco</parameter>
321
+ ... <parameter name="unit">celsius</parameter>
322
+ ... </invoke>
323
+ ... </minimax:tool_call>'''
324
+ >>> result = parse_tool_calls(output, tools)
325
+ >>> print(result)
326
+ [{'name': 'get_weather', 'arguments': {'location': 'San Francisco', 'unit': 'celsius'}}]
327
+ """
328
+ # Quick check if tool call marker is present
329
+ if "<minimax:tool_call>" not in model_output:
330
+ return []
331
+
332
+ tool_calls = []
333
+
334
+ try:
335
+ # Match all <minimax:tool_call> blocks
336
+ tool_call_regex = re.compile(r"<minimax:tool_call>(.*?)</minimax:tool_call>", re.DOTALL)
337
+ invoke_regex = re.compile(r"<invoke name=(.*?)</invoke>", re.DOTALL)
338
+ parameter_regex = re.compile(r"<parameter name=(.*?)</parameter>", re.DOTALL)
339
+
340
+ # Iterate through all tool_call blocks
341
+ for tool_call_match in tool_call_regex.findall(model_output):
342
+ # Iterate through all invokes in this block
343
+ for invoke_match in invoke_regex.findall(tool_call_match):
344
+ # Extract function name
345
+ name_match = re.search(r'^([^>]+)', invoke_match)
346
+ if not name_match:
347
+ continue
348
+
349
+ function_name = extract_name(name_match.group(1))
350
+
351
+ # Get parameter configuration
352
+ param_config = {}
353
+ if tools:
354
+ for tool in tools:
355
+ tool_name = tool.get("name") or tool.get("function", {}).get("name")
356
+ if tool_name == function_name:
357
+ params = tool.get("parameters") or tool.get("function", {}).get("parameters")
358
+ if isinstance(params, dict) and "properties" in params:
359
+ param_config = params["properties"]
360
+ break
361
+
362
+ # Extract parameters
363
+ param_dict = {}
364
+ for match in parameter_regex.findall(invoke_match):
365
+ param_match = re.search(r'^([^>]+)>(.*)', match, re.DOTALL)
366
+ if param_match:
367
+ param_name = extract_name(param_match.group(1))
368
+ param_value = param_match.group(2).strip()
369
+
370
+ # Remove leading and trailing newlines
371
+ if param_value.startswith('\n'):
372
+ param_value = param_value[1:]
373
+ if param_value.endswith('\n'):
374
+ param_value = param_value[:-1]
375
+
376
+ # Get parameter type and convert
377
+ param_type = "string"
378
+ if param_name in param_config:
379
+ if isinstance(param_config[param_name], dict) and "type" in param_config[param_name]:
380
+ param_type = param_config[param_name]["type"]
381
+
382
+ param_dict[param_name] = convert_param_value(param_value, param_type)
383
+
384
+ tool_calls.append({
385
+ "name": function_name,
386
+ "arguments": param_dict
387
+ })
388
+
389
+ except Exception as e:
390
+ print(f"Failed to parse tool calls: {e}")
391
+ return []
392
+
393
+ return tool_calls
394
+ ```
395
+
396
+ **Usage Example:**
397
+
398
+ ```python
399
+ # Define tools
400
+ tools = [
401
+ {
402
+ "name": "get_weather",
403
+ "parameters": {
404
+ "type": "object",
405
+ "properties": {
406
+ "location": {"type": "string"},
407
+ "unit": {"type": "string"}
408
+ },
409
+ "required": ["location", "unit"]
410
+ }
411
+ }
412
+ ]
413
+
414
+ # Model output
415
+ model_output = """Let me help you query the weather.
416
+ <minimax:tool_call>
417
+ <invoke name="get_weather">
418
+ <parameter name="location">San Francisco</parameter>
419
+ <parameter name="unit">celsius</parameter>
420
+ </invoke>
421
+ </minimax:tool_call>"""
422
+
423
+ # Parse tool calls
424
+ tool_calls = parse_tool_calls(model_output, tools)
425
+
426
+ # Output results
427
+ for call in tool_calls:
428
+ print(f"Function called: {call['name']}")
429
+ print(f"Arguments: {call['arguments']}")
430
+ # Output: Function called: get_weather
431
+ # Arguments: {'location': 'San Francisco', 'unit': 'celsius'}
432
+ ```
433
+
434
+ ### Executing Tool Calls
435
+
436
+ After parsing is complete, you can execute the corresponding tool and construct the return result:
437
+
438
+ ```python
439
+ def execute_function_call(function_name: str, arguments: dict):
440
+ """Execute function call and return result"""
441
+ if function_name == "get_weather":
442
+ location = arguments.get("location", "Unknown location")
443
+ unit = arguments.get("unit", "celsius")
444
+ # Build function execution result
445
+ return {
446
+ "role": "tool",
447
+ "content": [
448
+ {
449
+ "name": function_name,
450
+ "type": "text",
451
+ "text": json.dumps({
452
+ "location": location,
453
+ "temperature": "25",
454
+ "unit": unit,
455
+ "weather": "Sunny"
456
+ }, ensure_ascii=False)
457
+ }
458
+ ]
459
+ }
460
+ elif function_name == "search_web":
461
+ query_list = arguments.get("query_list", [])
462
+ query_tag = arguments.get("query_tag", [])
463
+ # Simulate search results
464
+ return {
465
+ "role": "tool",
466
+ "content": [
467
+ {
468
+ "name": function_name,
469
+ "type": "text",
470
+ "text": f"Search keywords: {query_list}, Category: {query_tag}\nSearch results: Relevant information found"
471
+ }
472
+ ]
473
+ }
474
+
475
+ return None
476
+ ```
477
+
478
+ ### Returning Tool Execution Results to the Model
479
+
480
+ After successfully parsing tool calls, you should add the tool execution results to the conversation history so that the model can access and utilize this information in subsequent interactions. Refer to [chat_template.jinja](https://huggingface.co/MiniMaxAI/MiniMax-M2.7/blob/main/chat_template.jinja) for concatenation format.
481
+
482
+ ## References
483
+
484
+ - [MiniMax-M2.7 Model Repository](https://github.com/MiniMax-AI/MiniMax-M2.7)
485
+ - [vLLM Project Homepage](https://github.com/vllm-project/vllm)
486
+ - [SGLang Project Homepage](https://github.com/sgl-project/sglang)
487
+ - [OpenAI Python SDK](https://github.com/openai/openai-python)
docs/vllm_deploy_guide_cn.md ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MiniMax M2.7 模型 vLLM 部署指南
2
+
3
+ [英文版](./vllm_deploy_guide.md) | [中文版](./vllm_deploy_guide_cn.md)
4
+
5
+ 我们推荐使用 [vLLM](https://docs.vllm.ai/en/stable/) 来部署 [MiniMax-M2.7](https://huggingface.co/MiniMaxAI/MiniMax-M2.7) 模型。vLLM 是一个高性能的推理引擎,其具有卓越的服务吞吐、高效智能的内存管理机制、强大的批量请求处理能力、深度优化的底层性能等特性。我们建议在部署之前查看 vLLM 的官方文档以检查硬件兼容性。
6
+
7
+ ## 本文档适用模型
8
+
9
+ 本文档适用以下模型,只需在部署时修改模型名称即可。
10
+
11
+ - [MiniMaxAI/MiniMax-M2.7](https://huggingface.co/MiniMaxAI/MiniMax-M2.7)
12
+ - [MiniMaxAI/MiniMax-M2.5](https://huggingface.co/MiniMaxAI/MiniMax-M2.5)
13
+ - [MiniMaxAI/MiniMax-M2.1](https://huggingface.co/MiniMaxAI/MiniMax-M2.1)
14
+ - [MiniMaxAI/MiniMax-M2](https://huggingface.co/MiniMaxAI/MiniMax-M2)
15
+
16
+ 以下以 MiniMax-M2.7 为例说明部署流程。
17
+
18
+ ## 环境要求
19
+
20
+ - OS:Linux
21
+
22
+ - Python:3.9 - 3.12
23
+
24
+ - GPU:
25
+
26
+ - compute capability 7.0 or higher
27
+
28
+ - 显存需求:权重需要 220 GB,每 1M 上下文 token 需要 240 GB
29
+
30
+ 以下为推荐配置,实际需求请根据业务场景调整:
31
+
32
+ - **96G x4 GPU**:总 KV Cache 容量支持 40 万 token。
33
+
34
+ - **144G x8 GPU**:总 KV Cache 容量支持高达 300 万 token。
35
+
36
+ > **注**:以上数值为硬件支持的最大并发缓存总量,模型单序列(Single Sequence)长度上限仍为 196k。
37
+
38
+ ## 使用 Python 部署
39
+
40
+ 建议使用虚拟环境(如 **venv**、**conda**、**uv**)以避免依赖冲突。
41
+
42
+ 建议在全新的 Python 环境中安装 vLLM:
43
+
44
+ ```bash
45
+ uv venv
46
+ source .venv/bin/activate
47
+ uv pip install vllm --torch-backend=auto
48
+ ```
49
+
50
+ 运行如下命令启动 vLLM 服务器,vLLM 会自动从 Huggingface 下载并缓存 MiniMax-M2.7 模型。
51
+
52
+ 4 卡部署命令:
53
+
54
+ ```bash
55
+ SAFETENSORS_FAST_GPU=1 vllm serve \
56
+ MiniMaxAI/MiniMax-M2.7 --trust-remote-code \
57
+ --tensor-parallel-size 4 \
58
+ --enable-auto-tool-choice --tool-call-parser minimax_m2 \
59
+ --reasoning-parser minimax_m2_append_think
60
+ ```
61
+
62
+ 8 卡部署命令:
63
+
64
+ ```bash
65
+ SAFETENSORS_FAST_GPU=1 vllm serve \
66
+ MiniMaxAI/MiniMax-M2.7 --trust-remote-code \
67
+ --enable_expert_parallel --tensor-parallel-size 8 \
68
+ --enable-auto-tool-choice --tool-call-parser minimax_m2 \
69
+ --reasoning-parser minimax_m2_append_think
70
+ ```
71
+
72
+ ## 测试部署
73
+
74
+ 启动后,可以通过如下命令测试 vLLM OpenAI 兼容接口:
75
+
76
+ ```bash
77
+ curl http://localhost:8000/v1/chat/completions \
78
+ -H "Content-Type: application/json" \
79
+ -d '{
80
+ "model": "MiniMaxAI/MiniMax-M2.7",
81
+ "messages": [
82
+ {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
83
+ {"role": "user", "content": [{"type": "text", "text": "Who won the world series in 2020?"}]}
84
+ ]
85
+ }'
86
+ ```
87
+
88
+ ## 常见问题
89
+
90
+ ### Huggingface 网络问题
91
+
92
+ 如果遇到网络问题,可以设置代理后再进行拉取。
93
+
94
+ ```bash
95
+ export HF_ENDPOINT=https://hf-mirror.com
96
+ ```
97
+
98
+ ### MiniMax-M2 model is not currently supported
99
+
100
+ 该 vLLM 版本过旧,请升级到最新版本。
101
+
102
+ ### torch.AcceleratorError: CUDA error: an illegal memory access was encountered
103
+ 在启动参数添加 `--compilation-config "{\"cudagraph_mode\": \"PIECEWISE\"}"` 可以解决。例如:
104
+
105
+ ```bash
106
+ SAFETENSORS_FAST_GPU=1 vllm serve \
107
+ MiniMaxAI/MiniMax-M2.7 --trust-remote-code \
108
+ --enable_expert_parallel --tensor-parallel-size 8 \
109
+ --enable-auto-tool-choice --tool-call-parser minimax_m2 \
110
+ --reasoning-parser minimax_m2_append_think \
111
+ --compilation-config "{\"cudagraph_mode\": \"PIECEWISE\"}"
112
+ ```
113
+
114
+ ### 模型输出乱码
115
+
116
+ 如果您在使用 vLLM 运行这些模型时遇到输出乱码,可以升级到最新版本(请至少确保版本在提交 [cf3eacfe58fa9e745c2854782ada884a9f992cf7](https://github.com/vllm-project/vllm/commit/cf3eacfe58fa9e745c2854782ada884a9f992cf7) 之后)。
117
+
118
+ ## 获取支持
119
+
120
+ 如果在部署 MiniMax 模型过程中遇到任何问题:
121
+
122
+ - 通过邮箱 [model@minimax.io](mailto:model@minimax.io) 等官方渠道联系我们的技术支持团队
123
+
124
+ - 在我们的 [GitHub](https://github.com/MiniMax-AI) 仓库提交 Issue
125
+
126
+ - 通过我们的 [官方企业微信交流群](https://github.com/MiniMax-AI/MiniMax-AI.github.io/blob/main/images/wechat-qrcode.jpeg) 反馈
127
+
128
+ 我们会持续优化模型的部署体验,欢迎反馈!
figures/agent_harness.png ADDED

Git LFS Details

  • SHA256: 7c661c39ff84fcc10a000f0dbc3b648e22bda4c8ebb43ce30cee1bc3d21c6c01
  • Pointer size: 131 Bytes
  • Size of remote file: 312 kB
figures/banner.png ADDED

Git LFS Details

  • SHA256: d524f03ea8db52076ed29a070526c170ecf5f20db789d4e79cf92521234758d8
  • Pointer size: 131 Bytes
  • Size of remote file: 120 kB
figures/mle_bench.png ADDED

Git LFS Details

  • SHA256: 6bdbff9fc7f90735f75bd51ecd1eb1a1db8b4804392744c4e8544988bdc7978c
  • Pointer size: 131 Bytes
  • Size of remote file: 123 kB
model-00039-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f22a75ed40f37ef5d3ab3b3c003e735c72b088f3834022795856cb9d5d9481
3
+ size 2999141944
model-00042-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4e7feb101da477ae04dc237b5d47b52c748886206c8867ed7ba68bd4b9d16c2
3
+ size 2999679088
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "200000": {
4
+ "content": "]!p~[",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "200001": {
12
+ "content": "<fim_prefix>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "200002": {
20
+ "content": "<fim_middle>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "200003": {
28
+ "content": "<fim_suffix>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "200004": {
36
+ "content": "<fim_pad>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "200005": {
44
+ "content": "<reponame>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "200006": {
52
+ "content": "<filename>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "200007": {
60
+ "content": "<gh_stars>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "200008": {
68
+ "content": "<issue_start>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "200009": {
76
+ "content": "<issue_comment>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "200010": {
84
+ "content": "<issue_closed>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "200011": {
92
+ "content": "<jupyter_start>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "200012": {
100
+ "content": "<jupyter_text>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "200013": {
108
+ "content": "<jupyter_code>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "200014": {
116
+ "content": "<jupyter_output>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "200015": {
124
+ "content": "<empty_output>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "200016": {
132
+ "content": "<commit_before>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "200017": {
140
+ "content": "<commit_msg>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "200018": {
148
+ "content": "<commit_after>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "200019": {
156
+ "content": "]~b]",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "200020": {
164
+ "content": "[e~[",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "200021": {
172
+ "content": "]!d~[",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "200022": {
180
+ "content": "<function_call>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "200023": {
188
+ "content": "<code_interpreter>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "200024": {
196
+ "content": "]<]speech[>[",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "200025": {
204
+ "content": "]<]image[>[",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "200026": {
212
+ "content": "]<]video[>[",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "200027": {
220
+ "content": "]<]start of speech[>[",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "200028": {
228
+ "content": "]<]end of speech[>[",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "200029": {
236
+ "content": "]<]start of image[>[",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "200030": {
244
+ "content": "]<]end of image[>[",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "200031": {
252
+ "content": "]<]start of video[>[",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "200032": {
260
+ "content": "]<]end of video[>[",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "200033": {
268
+ "content": "]<]vision pad[>[",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "200034": {
276
+ "content": "]~!b[",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "200035": {
284
+ "content": "<jupyter_error>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "200036": {
292
+ "content": "<add_file>",
293
+ "single_word": false,
294
+ "lstrip": false,
295
+ "rstrip": false,
296
+ "normalized": false,
297
+ "special": true
298
+ },
299
+ "200037": {
300
+ "content": "<delete_file>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "200038": {
308
+ "content": "<rename_file>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "200039": {
316
+ "content": "<edit_file>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "200040": {
324
+ "content": "<commit_message>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "200041": {
332
+ "content": "<empty_source_file>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "200042": {
340
+ "content": "<repo_struct>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "200043": {
348
+ "content": "<code_context>",
349
+ "single_word": false,
350
+ "lstrip": false,
351
+ "rstrip": false,
352
+ "normalized": false,
353
+ "special": true
354
+ },
355
+ "200044": {
356
+ "content": "<file_content>",
357
+ "single_word": false,
358
+ "lstrip": false,
359
+ "rstrip": false,
360
+ "normalized": false,
361
+ "special": true
362
+ },
363
+ "200045": {
364
+ "content": "<source_files>",
365
+ "single_word": false,
366
+ "lstrip": false,
367
+ "rstrip": false,
368
+ "normalized": false,
369
+ "special": true
370
+ },
371
+ "200046": {
372
+ "content": "<pr_start>",
373
+ "single_word": false,
374
+ "lstrip": false,
375
+ "rstrip": false,
376
+ "normalized": false,
377
+ "special": true
378
+ },
379
+ "200047": {
380
+ "content": "<review_comment>",
381
+ "single_word": false,
382
+ "lstrip": false,
383
+ "rstrip": false,
384
+ "normalized": false,
385
+ "special": true
386
+ },
387
+ "200048": {
388
+ "content": "<filepath>",
389
+ "single_word": false,
390
+ "lstrip": false,
391
+ "rstrip": false,
392
+ "normalized": false,
393
+ "special": true
394
+ },
395
+ "200049": {
396
+ "content": "<file_sep>",
397
+ "single_word": false,
398
+ "lstrip": false,
399
+ "rstrip": false,
400
+ "normalized": false,
401
+ "special": true
402
+ },
403
+ "200050": {
404
+ "content": "<think>",
405
+ "single_word": false,
406
+ "lstrip": false,
407
+ "rstrip": false,
408
+ "normalized": false,
409
+ "special": false
410
+ },
411
+ "200051": {
412
+ "content": "</think>",
413
+ "single_word": false,
414
+ "lstrip": false,
415
+ "rstrip": false,
416
+ "normalized": false,
417
+ "special": false
418
+ },
419
+ "200052": {
420
+ "content": "<minimax:tool_call>",
421
+ "single_word": false,
422
+ "lstrip": false,
423
+ "rstrip": false,
424
+ "normalized": false,
425
+ "special": false
426
+ },
427
+ "200053": {
428
+ "content": "</minimax:tool_call>",
429
+ "single_word": false,
430
+ "lstrip": false,
431
+ "rstrip": false,
432
+ "normalized": false,
433
+ "special": false
434
+ }
435
+ },
436
+ "additional_special_tokens": [
437
+ "<code_interpreter>",
438
+ "<commit_after>",
439
+ "<commit_before>",
440
+ "<commit_msg>",
441
+ "<empty_output>",
442
+ "<filename>",
443
+ "<fim_middle>",
444
+ "<fim_pad>",
445
+ "<fim_prefix>",
446
+ "<fim_suffix>",
447
+ "<function_call>",
448
+ "<gh_stars>",
449
+ "]<]speech[>[",
450
+ "]<]image[>[",
451
+ "]<]video[>[",
452
+ "]<]start of speech[>[",
453
+ "]<]end of speech[>[",
454
+ "]<]start of image[>[",
455
+ "]<]end of image[>[",
456
+ "]<]start of video[>[",
457
+ "]<]end of video[>[",
458
+ "]<]vision pad[>[",
459
+ "]~!b[",
460
+ "<issue_closed>",
461
+ "<issue_comment>",
462
+ "<issue_start>",
463
+ "<jupyter_code>",
464
+ "<jupyter_output>",
465
+ "<jupyter_start>",
466
+ "<jupyter_text>",
467
+ "<reponame>",
468
+ "[e~[",
469
+ "]!d~[",
470
+ "]!p~[",
471
+ "]~b]",
472
+ "<jupyter_error>",
473
+ "<add_file>",
474
+ "<delete_file>",
475
+ "<rename_file>",
476
+ "<edit_file>",
477
+ "<commit_message>",
478
+ "<empty_source_file>",
479
+ "<repo_struct>",
480
+ "<code_context>",
481
+ "<file_content>",
482
+ "<source_files>",
483
+ "<pr_start>",
484
+ "<review_comment>",
485
+ "<filepath>",
486
+ "<file_sep>"
487
+ ],
488
+ "add_prefix_space": false,
489
+ "bos_token": "]~!b[",
490
+ "clean_up_tokenization_spaces": false,
491
+ "eos_token": "[e~[",
492
+ "model_max_length": 40960000,
493
+ "tokenizer_class": "GPT2Tokenizer",
494
+ "unk_token": "]!d~["
495
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff