sanbo110 commited on
Commit
af5d696
·
1 Parent(s): 8780af9

update sth at 2025-10-15 15:46:55

Browse files
.replit ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ modules = ["python-3.11"]
2
+ # 这是最关键的修改:我们强制 Replit 构建一个完整的 Python 3.11 环境。
3
+ [nix]
4
+ channel = "stable-23.11"
5
+ packages = ["cargo", "libiconv", "libxcrypt", "python311", "rustc"]
6
+
7
+ # 这是第二关键的修改:我们使用 FastAPI 官方推荐的 uvicorn 来启动服务器。
8
+ # 这将彻底解决所有兼容性问题。
9
+ run = "uvicorn main:app --host 0.0.0.0 --port 8080"
10
+
11
+ # 这一行保持不变。
12
+ entrypoint = "main.py"
13
+
14
+ [[ports]]
15
+ localPort = 8080
16
+ externalPort = 8080
Dockerfile ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.12 slim image for better performance and smaller size
2
+ FROM python:3.12-slim
3
+
4
+ # Set environment variables
5
+ ENV PYTHONUNBUFFERED=1 \
6
+ PYTHONDONTWRITEBYTECODE=1 \
7
+ PIP_NO_CACHE_DIR=1 \
8
+ PIP_DISABLE_PIP_VERSION_CHECK=1
9
+
10
+ # Set work directory
11
+ WORKDIR /app
12
+
13
+ # Install system dependencies
14
+ RUN apt-get update && \
15
+ apt-get install -y --no-install-recommends curl && \
16
+ rm -rf /var/lib/apt/lists/*
17
+
18
+ # Copy requirements first for better caching
19
+ COPY requirements.txt .
20
+
21
+ # Install Python dependencies with Brotli support
22
+ RUN pip install --no-cache-dir -r requirements.txt
23
+
24
+ # Copy application code
25
+ COPY . .
26
+
27
+ # Create non-root user for security
28
+ RUN useradd --create-home --shell /bin/bash app && \
29
+ chown -R app:app /app
30
+
31
+ # Create tokens directory and set permissions
32
+ RUN mkdir -p /app/data && \
33
+ chown -R app:app /app/data
34
+
35
+ USER app
36
+
37
+ # Expose port
38
+ EXPOSE 8080
39
+
40
+ # Health check
41
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
42
+ CMD curl -f http://localhost:8080/ || exit 1
43
+
44
+ # Run the application
45
+ CMD ["python", "main.py"]
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
app/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """
2
+ Application package initialization
3
+ """
4
+
5
+ from app import core, models, utils
6
+
7
+ __all__ = ["core", "models", "utils"]
app/api/admin.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Admin API endpoints for token management
3
+ """
4
+
5
+ from fastapi import APIRouter, Depends, HTTPException, status
6
+ from fastapi.security import HTTPBearer
7
+ from fastapi.security.http import HTTPAuthorizationCredentials
8
+ from typing import Dict, Any
9
+
10
+ from app.core.config import settings
11
+ from app.core.token_manager import token_manager
12
+
13
+ router = APIRouter(prefix="/admin", tags=["admin"])
14
+ security = HTTPBearer()
15
+
16
+
17
+ def verify_admin_token(credentials: HTTPAuthorizationCredentials = Depends(security)) -> str:
18
+ """Verify admin authentication token"""
19
+ if settings.SKIP_AUTH_TOKEN:
20
+ return credentials.credentials
21
+
22
+ if credentials.credentials != settings.AUTH_TOKEN:
23
+ raise HTTPException(
24
+ status_code=status.HTTP_401_UNAUTHORIZED,
25
+ detail="Invalid authentication credentials",
26
+ headers={"WWW-Authenticate": "Bearer"},
27
+ )
28
+ return credentials.credentials
29
+
30
+
31
+ @router.get("/token-stats")
32
+ async def get_token_stats(token: str = Depends(verify_admin_token)) -> Dict[str, Any]:
33
+ """Get token pool statistics"""
34
+ return token_manager.get_token_stats()
35
+
36
+
37
+ @router.post("/reload-tokens")
38
+ async def reload_tokens(token: str = Depends(verify_admin_token)) -> Dict[str, str]:
39
+ """Force reload tokens from file"""
40
+ try:
41
+ token_manager.reload_tokens()
42
+ return {"message": "Token池已重新加载"}
43
+ except Exception as e:
44
+ raise HTTPException(
45
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
46
+ detail=f"重新加载失败: {str(e)}"
47
+ )
48
+
49
+
50
+ @router.post("/reset-tokens")
51
+ async def reset_tokens(token: str = Depends(verify_admin_token)) -> Dict[str, str]:
52
+ """Reset all tokens (clear failure counts)"""
53
+ try:
54
+ token_manager.reset_all_tokens()
55
+ return {"message": "所有token状态已重置"}
56
+ except Exception as e:
57
+ raise HTTPException(
58
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
59
+ detail=f"重置失败: {str(e)}"
60
+ )
app/core/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """
2
+ Core module initialization
3
+ """
4
+
5
+ from app.core import config, response_handlers, openai
6
+
7
+ __all__ = ["config", "response_handlers", "openai"]
app/core/config.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI application configuration module
3
+ """
4
+
5
+ import os
6
+ from typing import Dict, Optional
7
+ from pydantic_settings import BaseSettings
8
+
9
+
10
+ class Settings(BaseSettings):
11
+ """Application settings"""
12
+
13
+ # API Configuration
14
+ API_ENDPOINT: str = os.getenv("API_ENDPOINT", "https://chat.z.ai/api/chat/completions")
15
+ AUTH_TOKEN: str = os.getenv("AUTH_TOKEN", "sk-your-api-key")
16
+ BACKUP_TOKEN: str = os.getenv("BACKUP_TOKEN", "eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjMxNmJjYjQ4LWZmMmYtNGExNS04NTNkLWYyYTI5YjY3ZmYwZiIsImVtYWlsIjoiR3Vlc3QtMTc1NTg0ODU4ODc4OEBndWVzdC5jb20ifQ.PktllDySS3trlyuFpTeIZf-7hl8Qu1qYF3BxjgIul0BrNux2nX9hVzIjthLXKMWAf9V0qM8Vm_iyDqkjPGsaiQ")
17
+
18
+ # Model Configuration
19
+ PRIMARY_MODEL: str = os.getenv("PRIMARY_MODEL", "GLM-4.5")
20
+ THINKING_MODEL: str = os.getenv("THINKING_MODEL", "GLM-4.5-Thinking")
21
+ SEARCH_MODEL: str = os.getenv("SEARCH_MODEL", "GLM-4.5-Search")
22
+ AIR_MODEL: str = os.getenv("AIR_MODEL", "GLM-4.5-Air")
23
+ GLM_46_MODEL: str = os.getenv("GLM_46_MODEL", "GLM-4.6")
24
+ GLM_46_THINKING_MODEL: str = os.getenv("GLM_46_THINKING_MODEL", "GLM-4.6-Thinking")
25
+
26
+ # Server Configuration
27
+ LISTEN_PORT: int = int(os.getenv("LISTEN_PORT", "8080"))
28
+ DEBUG_LOGGING: bool = os.getenv("DEBUG_LOGGING", "true").lower() == "true"
29
+
30
+ # Feature Configuration
31
+ THINKING_PROCESSING: str = os.getenv("THINKING_PROCESSING", "think") # strip: 去除<details>标签;think: 转为<span>标签;raw: 保留原样
32
+ ANONYMOUS_MODE: bool = os.getenv("ANONYMOUS_MODE", "true").lower() == "true"
33
+ TOOL_SUPPORT: bool = os.getenv("TOOL_SUPPORT", "true").lower() == "true"
34
+ SCAN_LIMIT: int = int(os.getenv("SCAN_LIMIT", "200000"))
35
+ SKIP_AUTH_TOKEN: bool = os.getenv("SKIP_AUTH_TOKEN", "false").lower() == "true"
36
+
37
+ # Signature Configuration - 强制禁用,忽略所有环境变量
38
+ ENABLE_SIGNATURE: bool = False # 强制禁用签名验证
39
+ SIGNATURE_SECRET_KEY: str = "disabled" # 已禁用
40
+ SIGNATURE_ALGORITHM: str = "disabled" # 已禁用
41
+
42
+ # Token Pool Configuration
43
+ TOKEN_FILE_PATH: str = os.getenv("TOKEN_FILE_PATH", "./tokens.txt")
44
+ TOKEN_MAX_FAILURES: int = int(os.getenv("TOKEN_MAX_FAILURES", "3"))
45
+ TOKEN_RELOAD_INTERVAL: int = int(os.getenv("TOKEN_RELOAD_INTERVAL", "60"))
46
+
47
+ # Request Configuration
48
+ REQUEST_TIMEOUT: int = int(os.getenv("REQUEST_TIMEOUT", "120"))
49
+ CONNECTION_TIMEOUT: int = int(os.getenv("CONNECTION_TIMEOUT", "30"))
50
+ MAX_RETRIES: int = int(os.getenv("MAX_RETRIES", "3"))
51
+
52
+ # Proxy Configuration
53
+ HTTP_PROXY: Optional[str] = os.getenv("HTTP_PROXY")
54
+ HTTPS_PROXY: Optional[str] = os.getenv("HTTPS_PROXY")
55
+
56
+ # Browser Headers - 匹配真实F12调试信息
57
+ CLIENT_HEADERS: Dict[str, str] = {
58
+ "Accept": "*/*",
59
+ "Accept-Encoding": "gzip, deflate, br, zstd",
60
+ "Accept-Language": "zh-CN",
61
+ "Content-Type": "application/json",
62
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0",
63
+ "Sec-Ch-Ua": '"Chromium";v="140", "Not=A?Brand";v="24", "Microsoft Edge";v="140"',
64
+ "Sec-Ch-Ua-Mobile": "?0",
65
+ "Sec-Ch-Ua-Platform": '"Windows"',
66
+ "Sec-Fetch-Dest": "empty",
67
+ "Sec-Fetch-Mode": "cors",
68
+ "Sec-Fetch-Site": "same-origin",
69
+ "X-Fe-Version": "prod-fe-1.0.83", # 匹配F12信息中的版本
70
+ "Origin": "https://chat.z.ai",
71
+ "Connection": "keep-alive",
72
+ }
73
+
74
+ class Config:
75
+ env_file = ".env"
76
+
77
+
78
+ settings = Settings()
app/core/openai.py ADDED
@@ -0,0 +1,651 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OpenAI API endpoints
3
+ """
4
+
5
+ import time
6
+ import json
7
+ import asyncio
8
+ from datetime import datetime
9
+ from typing import List, Dict, Any
10
+ from fastapi import APIRouter, Header, HTTPException
11
+ from fastapi.responses import StreamingResponse
12
+ import httpx
13
+
14
+ from app.core.config import settings
15
+ from app.models.schemas import OpenAIRequest, Message, ModelsResponse, Model
16
+ from app.utils.helpers import debug_log
17
+ from app.core.zai_transformer import ZAITransformer, generate_uuid
18
+ from app.utils.sse_tool_handler import SSEToolHandler
19
+
20
+ router = APIRouter()
21
+
22
+ # 全局转换器实例
23
+ transformer = ZAITransformer()
24
+
25
+
26
+ @router.get("/v1/models")
27
+ async def list_models():
28
+ """List available models"""
29
+ current_time = int(time.time())
30
+ response = ModelsResponse(
31
+ data=[
32
+ Model(id=settings.PRIMARY_MODEL, created=current_time, owned_by="z.ai"),
33
+ Model(id=settings.THINKING_MODEL, created=current_time, owned_by="z.ai"),
34
+ Model(id=settings.SEARCH_MODEL, created=current_time, owned_by="z.ai"),
35
+ Model(id=settings.AIR_MODEL, created=current_time, owned_by="z.ai"),
36
+ Model(id=settings.GLM_46_MODEL, created=current_time, owned_by="z.ai"),
37
+ Model(id=settings.GLM_46_THINKING_MODEL, created=current_time, owned_by="z.ai"),
38
+ ]
39
+ )
40
+ return response
41
+
42
+
43
+ @router.post("/v1/chat/completions")
44
+ async def chat_completions(request: OpenAIRequest, authorization: str = Header(...)):
45
+ """Handle chat completion requests with ZAI transformer"""
46
+ role = request.messages[0].role if request.messages else "unknown"
47
+ debug_log(f"😶‍🌫️ 收到 客户端 请求 - 模型: {request.model}, 流式: {request.stream}, 消息数: {len(request.messages)}, 角色: {role}, 工具数: {len(request.tools) if request.tools else 0}")
48
+
49
+ try:
50
+ # Validate API key (skip if SKIP_AUTH_TOKEN is enabled)
51
+ if not settings.SKIP_AUTH_TOKEN:
52
+ if not authorization.startswith("Bearer "):
53
+ raise HTTPException(status_code=401, detail="Missing or invalid Authorization header")
54
+
55
+ api_key = authorization[7:]
56
+ if api_key != settings.AUTH_TOKEN:
57
+ raise HTTPException(status_code=401, detail="Invalid API key")
58
+
59
+ # 使用新的转换器转换请求
60
+ request_dict = request.model_dump()
61
+ debug_log("🔄 开始转换请求格式: OpenAI -> Z.AI")
62
+
63
+ transformed = await transformer.transform_request_in(request_dict)
64
+
65
+ # 调用上游API
66
+ async def stream_response():
67
+ """流式响应生成器(包含重试机制)"""
68
+ retry_count = 0
69
+ last_error = None
70
+ current_token = transformed.get("token", "") # 获取当前使用的token
71
+
72
+ while retry_count <= settings.MAX_RETRIES:
73
+ try:
74
+ # 如果是重试,重新获取令牌并更新请求
75
+ if retry_count > 0:
76
+ delay = 2.0
77
+ debug_log(f"重试请求 ({retry_count}/{settings.MAX_RETRIES}) - 等待 {delay:.1f}s")
78
+ await asyncio.sleep(delay)
79
+
80
+ # 标记前一个token失败
81
+ if current_token:
82
+ transformer.mark_token_failure(current_token, Exception(f"Retry {retry_count}: {last_error}"))
83
+
84
+ # 重新获取令牌
85
+ debug_log("🔑 重新获取令牌用于重试...")
86
+ new_token = await transformer.get_token()
87
+ if not new_token:
88
+ debug_log("❌ 重试时无法获取有效的认证令牌")
89
+ raise Exception("重试时无法获取有效的认证令牌")
90
+ transformed["config"]["headers"]["Authorization"] = f"Bearer {new_token}"
91
+ current_token = new_token
92
+
93
+ async with httpx.AsyncClient(timeout=60.0) as client:
94
+ # 发送请求到上游
95
+ # debug_log(f"🎯 发送请求到 Z.AI: {transformed['config']['url']}")
96
+ async with client.stream(
97
+ "POST",
98
+ transformed["config"]["url"],
99
+ json=transformed["body"],
100
+ headers=transformed["config"]["headers"],
101
+ ) as response:
102
+ # 检查响应状态码
103
+ if response.status_code == 400:
104
+ # 400 错误,触发重试
105
+ error_text = await response.aread()
106
+ error_msg = error_text.decode('utf-8', errors='ignore')
107
+ debug_log(f"❌ 上游返回 400 错误 (尝试 {retry_count + 1}/{settings.MAX_RETRIES + 1})")
108
+ debug_log(f"上游错误响应: {error_msg}")
109
+
110
+ retry_count += 1
111
+ last_error = f"400 Bad Request: {error_msg}"
112
+
113
+ # 如果还有重试机会,继续循环
114
+ if retry_count <= settings.MAX_RETRIES:
115
+ continue
116
+ else:
117
+ # 达到最大重试次数,抛出错误
118
+ debug_log(f"❌ 达到最大重试次数 ({settings.MAX_RETRIES}),请求失败")
119
+ error_response = {
120
+ "error": {
121
+ "message": f"Request failed after {settings.MAX_RETRIES} retries: {last_error}",
122
+ "type": "upstream_error",
123
+ "code": 400
124
+ }
125
+ }
126
+ yield f"data: {json.dumps(error_response)}\n\n"
127
+ yield "data: [DONE]\n\n"
128
+ return
129
+
130
+ elif response.status_code == 401:
131
+ # 认证错误,可能需要重新获取token
132
+ debug_log(f"❌ 认证失败 (401),标记token失效")
133
+ if current_token:
134
+ transformer.mark_token_failure(current_token, Exception("401 Unauthorized"))
135
+
136
+ retry_count += 1
137
+ last_error = "401 Unauthorized - Token may be invalid"
138
+
139
+ if retry_count <= settings.MAX_RETRIES:
140
+ continue
141
+ else:
142
+ error_response = {
143
+ "error": {
144
+ "message": "Authentication failed after retries",
145
+ "type": "auth_error",
146
+ "code": 401
147
+ }
148
+ }
149
+ yield f"data: {json.dumps(error_response)}\n\n"
150
+ yield "data: [DONE]\n\n"
151
+ return
152
+
153
+ elif response.status_code == 429:
154
+ # 速率限制,延长等待时间重试
155
+ debug_log(f"❌ 速率限制 (429),将延长等待时间重试")
156
+ retry_count += 1
157
+ last_error = "429 Rate Limited"
158
+
159
+ if retry_count <= settings.MAX_RETRIES:
160
+ continue
161
+ else:
162
+ error_response = {
163
+ "error": {
164
+ "message": "Rate limit exceeded",
165
+ "type": "rate_limit_error",
166
+ "code": 429
167
+ }
168
+ }
169
+ yield f"data: {json.dumps(error_response)}\n\n"
170
+ yield "data: [DONE]\n\n"
171
+ return
172
+
173
+ elif response.status_code != 200:
174
+ # 其他错误,检查是否需要重试
175
+ error_text = await response.aread()
176
+ error_msg = error_text.decode('utf-8', errors='ignore')
177
+ debug_log(f"❌ 上游返回错误: {response.status_code}, 详情: {error_msg}")
178
+
179
+ # 某些错误可以重试
180
+ retryable_codes = [502, 503, 504]
181
+ if response.status_code in retryable_codes and retry_count < settings.MAX_RETRIES:
182
+ retry_count += 1
183
+ last_error = f"{response.status_code}: {error_msg}"
184
+ debug_log(f"⚠️ 服务器错误 {response.status_code},准备重试")
185
+ continue
186
+
187
+ # 不可重试的错误或已达到重试上限
188
+ error_response = {
189
+ "error": {
190
+ "message": f"Upstream error: {response.status_code}",
191
+ "type": "upstream_error",
192
+ "code": response.status_code,
193
+ "details": error_msg[:500] # 限制错误详情长度
194
+ }
195
+ }
196
+ yield f"data: {json.dumps(error_response)}\n\n"
197
+ yield "data: [DONE]\n\n"
198
+ return
199
+
200
+ # 200 成功,处理响应
201
+ debug_log(f"✅ Z.AI 响应成功,开始处理 SSE 流")
202
+ if retry_count > 0:
203
+ debug_log(f"✨ 第 {retry_count} 次重试成功")
204
+
205
+ # 标记token使用成功
206
+ if current_token:
207
+ transformer.mark_token_success(current_token)
208
+
209
+ # 初始化工具处理器(如果需要)
210
+ has_tools = transformed["body"].get("tools") is not None
211
+ has_mcp_servers = bool(transformed["body"].get("mcp_servers"))
212
+ tool_handler = None
213
+
214
+ # 如果有工具定义或MCP服务器,都需要工具处理器
215
+ if has_tools or has_mcp_servers:
216
+ chat_id = transformed["body"]["chat_id"]
217
+ model = request.model
218
+ tool_handler = SSEToolHandler(chat_id, model)
219
+
220
+ if has_tools and has_mcp_servers:
221
+ debug_log(f"🔧 初始化工具处理器: {len(transformed['body'].get('tools', []))} 个OpenAI工具 + {len(transformed['body'].get('mcp_servers', []))} 个MCP服务器")
222
+ elif has_tools:
223
+ debug_log(f"🔧 初始化工具处理器: {len(transformed['body'].get('tools', []))} 个OpenAI工具")
224
+ elif has_mcp_servers:
225
+ debug_log(f"🔧 初始化工具处理器: {len(transformed['body'].get('mcp_servers', []))} 个MCP服务器")
226
+
227
+ # 处理状态
228
+ has_thinking = False
229
+ thinking_signature = None
230
+ first_thinking_chunk = True
231
+
232
+ # 处理SSE流 - 优化的buffer处理
233
+ buffer = bytearray()
234
+ incomplete_line = ""
235
+ line_count = 0
236
+ chunk_count = 0
237
+ last_activity = time.time()
238
+ debug_log("📡 开始接收 SSE 流数据...")
239
+
240
+ async for chunk in response.aiter_bytes():
241
+ chunk_count += 1
242
+ last_activity = time.time()
243
+
244
+ if not chunk:
245
+ continue
246
+
247
+ # 将新数据添加到buffer
248
+ buffer.extend(chunk)
249
+
250
+ # 尝试解码并处理完整的行
251
+ try:
252
+ # 解码为字符串并处理
253
+ text_data = buffer.decode('utf-8')
254
+
255
+ # 分割为行
256
+ lines = text_data.split('\n')
257
+
258
+ # 最后一行可能不完整,保存到incomplete_line
259
+ if not text_data.endswith('\n'):
260
+ incomplete_line = lines[-1]
261
+ lines = lines[:-1]
262
+ else:
263
+ # 如果有未完成的行,将其与第一行合并
264
+ if incomplete_line:
265
+ lines[0] = incomplete_line + lines[0]
266
+ incomplete_line = ""
267
+
268
+ # 清空buffer,开始处理新的数据
269
+ buffer = bytearray()
270
+ if incomplete_line:
271
+ buffer.extend(incomplete_line.encode('utf-8'))
272
+
273
+ # 处理完整的行
274
+ for current_line in lines:
275
+ line_count += 1
276
+ if not current_line.strip():
277
+ continue
278
+
279
+ if current_line.startswith("data:"):
280
+ chunk_str = current_line[5:].strip()
281
+ if not chunk_str or chunk_str == "[DONE]":
282
+ if chunk_str == "[DONE]":
283
+ debug_log("📡 收到 [DONE] 信号")
284
+ yield "data: [DONE]\n\n"
285
+ continue
286
+
287
+ # debug_log(f"📦 解析数据块: {chunk_str[:200]}..." if len(chunk_str) > 200 else f"📦 解析数据块: {chunk_str}")
288
+
289
+ try:
290
+ chunk = json.loads(chunk_str)
291
+
292
+ if chunk.get("type") == "chat:completion":
293
+ data = chunk.get("data", {})
294
+ phase = data.get("phase")
295
+
296
+ # 记录每个阶段(只在阶段变化时记录)
297
+ if phase and phase != getattr(stream_response, '_last_phase', None):
298
+ debug_log(f"📈 SSE 阶段: {phase}")
299
+ stream_response._last_phase = phase
300
+
301
+ # 处理工具调用
302
+ if phase == "tool_call" and tool_handler:
303
+ for output in tool_handler.process_tool_call_phase(data, True):
304
+ yield output
305
+
306
+ # 处理其他阶段(工具结束)
307
+ elif phase == "other" and tool_handler:
308
+ for output in tool_handler.process_other_phase(data, True):
309
+ yield output
310
+
311
+ # 处理思考内容
312
+ elif phase == "thinking":
313
+ if not has_thinking:
314
+ has_thinking = True
315
+ # 发送初始角色
316
+ role_chunk = {
317
+ "choices": [
318
+ {
319
+ "delta": {"role": "assistant"},
320
+ "finish_reason": None,
321
+ "index": 0,
322
+ "logprobs": None,
323
+ }
324
+ ],
325
+ "created": int(time.time()),
326
+ "id": transformed["body"]["chat_id"],
327
+ "model": request.model,
328
+ "object": "chat.completion.chunk",
329
+ "system_fingerprint": "fp_zai_001",
330
+ }
331
+ yield f"data: {json.dumps(role_chunk)}\n\n"
332
+
333
+ delta_content = data.get("delta_content", "")
334
+ if delta_content:
335
+ # 处理思考内容格式
336
+ if delta_content.startswith("<details"):
337
+ content = (
338
+ delta_content.split("</summary>\n>")[-1].strip()
339
+ if "</summary>\n>" in delta_content
340
+ else delta_content
341
+ )
342
+ else:
343
+ content = delta_content
344
+
345
+ # 第一个思考块添加<think>开始标签,其他块保持纯内容
346
+ if first_thinking_chunk:
347
+ formatted_content = f"<think>{content}"
348
+ first_thinking_chunk = False
349
+ else:
350
+ formatted_content = content
351
+
352
+ thinking_chunk = {
353
+ "choices": [
354
+ {
355
+ "delta": {
356
+ "role": "assistant",
357
+ "content": formatted_content,
358
+ },
359
+ "finish_reason": None,
360
+ "index": 0,
361
+ "logprobs": None,
362
+ }
363
+ ],
364
+ "created": int(time.time()),
365
+ "id": transformed["body"]["chat_id"],
366
+ "model": request.model,
367
+ "object": "chat.completion.chunk",
368
+ "system_fingerprint": "fp_zai_001",
369
+ }
370
+ yield f"data: {json.dumps(thinking_chunk)}\n\n"
371
+
372
+ # 处理答案内容
373
+ elif phase == "answer":
374
+ edit_content = data.get("edit_content", "")
375
+ delta_content = data.get("delta_content", "")
376
+
377
+ # 如果还没有发送角色,先发送角色chunk
378
+ if not has_thinking:
379
+ has_thinking = True # 设置标志避免重复发送
380
+ role_chunk = {
381
+ "choices": [
382
+ {
383
+ "delta": {"role": "assistant"},
384
+ "finish_reason": None,
385
+ "index": 0,
386
+ "logprobs": None,
387
+ }
388
+ ],
389
+ "created": int(time.time()),
390
+ "id": transformed["body"]["chat_id"],
391
+ "model": request.model,
392
+ "object": "chat.completion.chunk",
393
+ "system_fingerprint": "fp_zai_001",
394
+ }
395
+ debug_log("➡️ 发送初始角色chunk")
396
+ yield f"data: {json.dumps(role_chunk)}\n\n"
397
+
398
+ # 处理思考结束和答案开始
399
+ if edit_content and "</details>\n" in edit_content:
400
+ if has_thinking and not first_thinking_chunk:
401
+ # 发送思考���束标记</think>
402
+ thinking_signature = str(int(time.time() * 1000))
403
+ sig_chunk = {
404
+ "choices": [
405
+ {
406
+ "delta": {
407
+ "role": "assistant",
408
+ "content": "</think>",
409
+ },
410
+ "finish_reason": None,
411
+ "index": 0,
412
+ "logprobs": None,
413
+ }
414
+ ],
415
+ "created": int(time.time()),
416
+ "id": transformed["body"]["chat_id"],
417
+ "model": request.model,
418
+ "object": "chat.completion.chunk",
419
+ "system_fingerprint": "fp_zai_001",
420
+ }
421
+ yield f"data: {json.dumps(sig_chunk)}\n\n"
422
+
423
+ # 提取答案内容
424
+ content_after = edit_content.split("</details>\n")[-1]
425
+ if content_after:
426
+ content_chunk = {
427
+ "choices": [
428
+ {
429
+ "delta": {
430
+ "role": "assistant",
431
+ "content": content_after,
432
+ },
433
+ "finish_reason": None,
434
+ "index": 0,
435
+ "logprobs": None,
436
+ }
437
+ ],
438
+ "created": int(time.time()),
439
+ "id": transformed["body"]["chat_id"],
440
+ "model": request.model,
441
+ "object": "chat.completion.chunk",
442
+ "system_fingerprint": "fp_zai_001",
443
+ }
444
+ yield f"data: {json.dumps(content_chunk)}\n\n"
445
+
446
+ # 处理增量内容
447
+ elif delta_content:
448
+ # 如果还没有发送角色
449
+ if not has_thinking:
450
+ has_thinking = True # 避免重复发送
451
+ role_chunk = {
452
+ "choices": [
453
+ {
454
+ "delta": {"role": "assistant"},
455
+ "finish_reason": None,
456
+ "index": 0,
457
+ "logprobs": None,
458
+ }
459
+ ],
460
+ "created": int(time.time()),
461
+ "id": transformed["body"]["chat_id"],
462
+ "model": request.model,
463
+ "object": "chat.completion.chunk",
464
+ "system_fingerprint": "fp_zai_001",
465
+ }
466
+ debug_log("➡️ 发送初始角色chunk")
467
+ yield f"data: {json.dumps(role_chunk)}\n\n"
468
+
469
+ content_chunk = {
470
+ "choices": [
471
+ {
472
+ "delta": {
473
+ "content": delta_content,
474
+ },
475
+ "finish_reason": None,
476
+ "index": 0,
477
+ "logprobs": None,
478
+ }
479
+ ],
480
+ "created": int(time.time()),
481
+ "id": transformed["body"]["chat_id"],
482
+ "model": request.model,
483
+ "object": "chat.completion.chunk",
484
+ "system_fingerprint": "fp_zai_001",
485
+ }
486
+ output_data = f"data: {json.dumps(content_chunk)}\n\n"
487
+ # debug_log(f"➡️ 输出内容块到客户端: {delta_content[:50]}...")
488
+ yield output_data
489
+
490
+ # 处理完成 - 当收到usage信息时
491
+ if data.get("usage"):
492
+ debug_log(f"📦 完成响应 - 使用统计: {json.dumps(data['usage'])}")
493
+
494
+ # 只有在非工具调用模式下才发送普通完成信号
495
+ if not tool_handler or not tool_handler.has_tool_call:
496
+ finish_chunk = {
497
+ "choices": [
498
+ {
499
+ "delta": {}, # 空的delta表示结束
500
+ "finish_reason": "stop",
501
+ "index": 0,
502
+ "logprobs": None,
503
+ }
504
+ ],
505
+ "usage": data["usage"],
506
+ "created": int(time.time()),
507
+ "id": transformed["body"]["chat_id"],
508
+ "model": request.model,
509
+ "object": "chat.completion.chunk",
510
+ "system_fingerprint": "fp_zai_001",
511
+ }
512
+ finish_output = f"data: {json.dumps(finish_chunk)}\n\n"
513
+ debug_log("➡️ 发送完成信号")
514
+ yield finish_output
515
+ debug_log("➡️ 发送 [DONE]")
516
+ yield "data: [DONE]\n\n"
517
+
518
+ except json.JSONDecodeError as e:
519
+ debug_log(f"❌ JSON解析错误: {e}, 内容: {chunk_str[:200]}")
520
+ except Exception as e:
521
+ debug_log(f"❌ 处理chunk错误: {e}")
522
+
523
+ except UnicodeDecodeError:
524
+ # 如果解码失败,可能是数据不完整,继续接收
525
+ debug_log(f"⚠️ 数据解码失败,缓冲区大小: {len(buffer)}")
526
+ if len(buffer) > 1024 * 1024: # 1MB限制
527
+ debug_log("❌ 缓冲区过大,清空重试")
528
+ buffer = bytearray()
529
+ incomplete_line = ""
530
+ except Exception as e:
531
+ debug_log(f"❌ Buffer处理异常: {e}")
532
+ # 清空buffer继续处理
533
+ buffer = bytearray()
534
+ incomplete_line = ""
535
+
536
+ # 检查是否长时间没有活动(超时检查)
537
+ if time.time() - last_activity > 30: # 30秒超时
538
+ debug_log("⚠️ 检测到长时间无活动,可能连接中断")
539
+ break
540
+
541
+ # 确保发送结束信号
542
+ if not tool_handler or not tool_handler.has_tool_call:
543
+ debug_log("📤 发送最终 [DONE] 信号")
544
+ yield "data: [DONE]\n\n"
545
+
546
+ debug_log(f"✅ SSE 流处理完成,共处理 {line_count} 行数据,{chunk_count} 个数据块")
547
+
548
+ # 检查处理完整性
549
+ is_complete = True
550
+ completion_issues = []
551
+
552
+ if line_count == 0:
553
+ is_complete = False
554
+ completion_issues.append("没有处理任何数据行")
555
+ elif chunk_count == 0:
556
+ is_complete = False
557
+ completion_issues.append("没有收到任何数据块")
558
+ elif chunk_count > 0:
559
+ debug_log(f"📊 平均每个数据块包含 {line_count/chunk_count:.1f} 行")
560
+
561
+ # 检查工具调用完整性
562
+ if tool_handler and tool_handler.has_tool_call:
563
+ if not tool_handler.completed_tools:
564
+ completion_issues.append("工具调用未正常完成")
565
+ else:
566
+ debug_log(f"✅ 工具调用完成: {len(tool_handler.completed_tools)} 个工具")
567
+
568
+ # 检查思考内容完整性(只有真正的thinking模式才需要签名)
569
+ # 注意:普通的answer阶段不需要thinking签名,只有thinking阶段才需要
570
+ # if has_thinking and not thinking_signature:
571
+ # completion_issues.append("思考内容缺少签名")
572
+
573
+ # 报告完整性状态
574
+ if is_complete and not completion_issues:
575
+ debug_log("✅ 响应完整性检查通过")
576
+ else:
577
+ debug_log(f"⚠️ 响应完整性问题: {', '.join(completion_issues)}")
578
+
579
+ # 如果问题严重且还有重试机会,考虑重试
580
+ critical_issues = ["没有处理任何数据行", "没有收到任何数据块"]
581
+ has_critical_issue = any(issue in completion_issues for issue in critical_issues)
582
+
583
+ if has_critical_issue and retry_count < settings.MAX_RETRIES:
584
+ debug_log("🔄 检测到严重完整性问题,准备重试")
585
+ retry_count += 1
586
+ last_error = f"Incomplete response: {', '.join(completion_issues)}"
587
+ continue
588
+
589
+ # 成功处理完成,退出重试循环
590
+ return
591
+
592
+ except Exception as e:
593
+ debug_log(f"❌ 流处理错误: {e}")
594
+ import traceback
595
+ debug_log(traceback.format_exc())
596
+
597
+ # 标记token失败
598
+ if current_token:
599
+ transformer.mark_token_failure(current_token, e)
600
+
601
+ # 检查是否还可以重试
602
+ retry_count += 1
603
+ last_error = str(e)
604
+
605
+ if retry_count > settings.MAX_RETRIES:
606
+ # 达到最大重试次数,返回错误
607
+ debug_log(f"❌ 达到最大重试次数 ({settings.MAX_RETRIES}),流处理失败")
608
+ error_response = {
609
+ "error": {
610
+ "message": f"Stream processing failed after {settings.MAX_RETRIES} retries: {last_error}",
611
+ "type": "stream_error"
612
+ }
613
+ }
614
+ yield f"data: {json.dumps(error_response)}\n\n"
615
+ yield "data: [DONE]\n\n"
616
+ return
617
+
618
+ # 返回流式响应
619
+ debug_log("🚀 启动 SSE 流式响应")
620
+
621
+ # 创建一个包装的生成器来追踪数据流
622
+ async def logged_stream():
623
+ chunk_count = 0
624
+ try:
625
+ debug_log("📤 开始向客户端流式传输数据...")
626
+ async for chunk in stream_response():
627
+ chunk_count += 1
628
+ # debug_log(f"📤 发送块[{chunk_count}]: {chunk[:200]}..." if len(chunk) > 200 else f" 📤 发送块[{chunk_count}]: {chunk}")
629
+ yield chunk
630
+ debug_log(f"✅ 流式传输完成,共发送 {chunk_count} 个数据块")
631
+ except Exception as e:
632
+ debug_log(f"❌ 流式传输中断: {e}")
633
+ raise
634
+
635
+ return StreamingResponse(
636
+ logged_stream(),
637
+ media_type="text/event-stream",
638
+ headers={
639
+ "Cache-Control": "no-cache",
640
+ "Connection": "keep-alive",
641
+ },
642
+ )
643
+
644
+ except HTTPException:
645
+ raise
646
+ except Exception as e:
647
+ debug_log(f"❌ 处理请求时发生错误: {str(e)}")
648
+ import traceback
649
+
650
+ debug_log(f"❌ 错误堆栈: {traceback.format_exc()}")
651
+ raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
app/core/response_handlers.py ADDED
@@ -0,0 +1,546 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Response handlers for streaming and non-streaming responses
3
+ """
4
+
5
+ import json
6
+ import time
7
+ from typing import Generator, Optional
8
+ import requests
9
+ from fastapi import HTTPException
10
+ from fastapi.responses import JSONResponse, StreamingResponse
11
+
12
+ from app.core.config import settings
13
+ from app.models.schemas import (
14
+ Message, Delta, Choice, Usage, OpenAIResponse,
15
+ UpstreamRequest, UpstreamData, UpstreamError, ModelItem
16
+ )
17
+ from app.utils.helpers import debug_log, call_upstream_api, transform_thinking_content
18
+ from app.core.token_manager import token_manager
19
+ from app.utils.sse_parser import SSEParser
20
+ from app.utils.tools import extract_tool_invocations, remove_tool_json_content
21
+ from app.utils.sse_tool_handler import SSEToolHandler
22
+
23
+
24
+ def create_openai_response_chunk(
25
+ model: str,
26
+ delta: Optional[Delta] = None,
27
+ finish_reason: Optional[str] = None
28
+ ) -> OpenAIResponse:
29
+ """Create OpenAI response chunk for streaming"""
30
+ return OpenAIResponse(
31
+ id=f"chatcmpl-{int(time.time())}",
32
+ object="chat.completion.chunk",
33
+ created=int(time.time()),
34
+ model=model,
35
+ choices=[Choice(
36
+ index=0,
37
+ delta=delta or Delta(),
38
+ finish_reason=finish_reason
39
+ )]
40
+ )
41
+
42
+
43
+ def handle_upstream_error(error: UpstreamError) -> Generator[str, None, None]:
44
+ """Handle upstream error response"""
45
+ debug_log(f"上游错误: code={error.code}, detail={error.detail}")
46
+
47
+ # Send end chunk
48
+ end_chunk = create_openai_response_chunk(
49
+ model=settings.PRIMARY_MODEL,
50
+ finish_reason="stop"
51
+ )
52
+ yield f"data: {end_chunk.model_dump_json()}\n\n"
53
+ yield "data: [DONE]\n\n"
54
+
55
+
56
+ class ResponseHandler:
57
+ """Base class for response handling"""
58
+
59
+ def __init__(self, upstream_req: UpstreamRequest, chat_id: str, auth_token: str):
60
+ self.upstream_req = upstream_req
61
+ self.chat_id = chat_id
62
+ self.auth_token = auth_token
63
+
64
+ def _call_upstream(self) -> requests.Response:
65
+ """Call upstream API with error handling"""
66
+ max_retries = settings.MAX_RETRIES
67
+ retry_count = 0
68
+
69
+ while retry_count < max_retries:
70
+ try:
71
+ debug_log(f"尝试调用上游API (第 {retry_count + 1}/{max_retries} 次)")
72
+ response = call_upstream_api(self.upstream_req, self.chat_id, self.auth_token)
73
+
74
+ # Check if response is successful
75
+ if response.status_code == 200:
76
+ # Mark token as successful
77
+ token_manager.mark_token_success(self.auth_token)
78
+ debug_log("上游API调用成功")
79
+ return response
80
+ elif response.status_code in [401, 403]:
81
+ # Authentication/authorization error - mark token as failed
82
+ debug_log(f"Token认证失败 (状态码: {response.status_code}): {self.auth_token[:20]}...")
83
+ token_manager.mark_token_failed(self.auth_token)
84
+
85
+ # Try to get a new token
86
+ new_token = token_manager.get_next_token()
87
+ if new_token and new_token != self.auth_token:
88
+ debug_log(f"尝试使用新token: {new_token[:20]}...")
89
+ self.auth_token = new_token
90
+ retry_count += 1
91
+ continue
92
+ else:
93
+ debug_log("没有更多可用token")
94
+ return response
95
+ elif response.status_code in [429]:
96
+ # Rate limit - don't mark token as failed, just retry
97
+ debug_log(f"遇到速率限制 (状态码: {response.status_code}),等待后重试")
98
+ if retry_count < max_retries - 1:
99
+ import time
100
+ time.sleep(2 ** retry_count) # 指数退避
101
+ retry_count += 1
102
+ continue
103
+ else:
104
+ return response
105
+ elif response.status_code >= 500:
106
+ # Server error - retry without marking token as failed
107
+ debug_log(f"服务器错误 (状态码: {response.status_code}),稍后重试")
108
+ if retry_count < max_retries - 1:
109
+ import time
110
+ time.sleep(1)
111
+ retry_count += 1
112
+ continue
113
+ else:
114
+ return response
115
+ else:
116
+ # Other client errors, return response as-is
117
+ debug_log(f"客户端错误 (状态码: {response.status_code})")
118
+ return response
119
+
120
+ except Exception as e:
121
+ error_msg = str(e)
122
+ debug_log(f"调用上游失败 (尝试 {retry_count + 1}/{max_retries}): {error_msg}")
123
+
124
+ # 判断是否是连接问题还是token问题
125
+ is_connection_error = any(keyword in error_msg.lower() for keyword in [
126
+ 'connection', 'timeout', 'network', 'dns', 'socket', 'ssl'
127
+ ])
128
+
129
+ if is_connection_error:
130
+ debug_log("检测到网络连接问题,不标记token失败")
131
+ # 网络问题不标记token失败,直接重试
132
+ if retry_count < max_retries - 1:
133
+ import time
134
+ time.sleep(2) # 等待2秒后重试
135
+ retry_count += 1
136
+ continue
137
+ else:
138
+ raise Exception(f"网络连接问题,重试{max_retries}次后仍失败: {error_msg}")
139
+ else:
140
+ # 其他错误可能是token问题,标记失败并尝试新token
141
+ debug_log("检测到可能的token问题,标记token失败")
142
+ token_manager.mark_token_failed(self.auth_token)
143
+
144
+ # Try to get a new token
145
+ new_token = token_manager.get_next_token()
146
+ if new_token and new_token != self.auth_token and retry_count < max_retries - 1:
147
+ debug_log(f"尝试使用新token: {new_token[:20]}...")
148
+ self.auth_token = new_token
149
+ retry_count += 1
150
+ continue
151
+ else:
152
+ raise
153
+
154
+ # If we get here, all retries failed
155
+ raise Exception("所有重试尝试均失败")
156
+
157
+ def _handle_upstream_error(self, response: requests.Response) -> None:
158
+ """Handle upstream error response"""
159
+ debug_log(f"上游返回错误状态: {response.status_code}")
160
+ if settings.DEBUG_LOGGING:
161
+ debug_log(f"上游错误响应: {response.text}")
162
+
163
+
164
+ class StreamResponseHandler(ResponseHandler):
165
+ """Handler for streaming responses"""
166
+
167
+ def __init__(self, upstream_req: UpstreamRequest, chat_id: str, auth_token: str, has_tools: bool = False):
168
+ super().__init__(upstream_req, chat_id, auth_token)
169
+ self.has_tools = has_tools
170
+ self.buffered_content = ""
171
+ self.tool_calls = None
172
+ # Initialize SSE tool handler for improved tool processing
173
+ self.tool_handler = SSEToolHandler(chat_id, settings.PRIMARY_MODEL) if has_tools else None
174
+ # 思考状态跟踪
175
+ self.first_thinking_chunk = True
176
+
177
+ def handle(self) -> Generator[str, None, None]:
178
+ """Handle streaming response"""
179
+ debug_log(f"开始处理流式响应 (chat_id={self.chat_id})")
180
+
181
+ try:
182
+ response = self._call_upstream()
183
+ except Exception:
184
+ yield "data: {\"error\": \"Failed to call upstream\"}\n\n"
185
+ return
186
+
187
+ if response.status_code != 200:
188
+ self._handle_upstream_error(response)
189
+ yield "data: {\"error\": \"Upstream error\"}\n\n"
190
+ return
191
+
192
+ # Send initial role chunk
193
+ first_chunk = create_openai_response_chunk(
194
+ model=settings.PRIMARY_MODEL,
195
+ delta=Delta(role="assistant")
196
+ )
197
+ yield f"data: {first_chunk.model_dump_json()}\n\n"
198
+
199
+ # Process stream
200
+ debug_log("开始读取上游SSE流")
201
+ sent_initial_answer = False
202
+ stream_ended_normally = False
203
+
204
+ try:
205
+ with SSEParser(response, debug_mode=settings.DEBUG_LOGGING) as parser:
206
+ for event in parser.iter_json_data(UpstreamData):
207
+ upstream_data = event['data']
208
+
209
+ # Check for errors
210
+ if self._has_error(upstream_data):
211
+ error = self._get_error(upstream_data)
212
+ yield from handle_upstream_error(error)
213
+ stream_ended_normally = True
214
+ break
215
+
216
+ debug_log(f"解析成功 - 类型: {upstream_data.type}, 阶段: {upstream_data.data.phase}, "
217
+ f"内容长度: {len(upstream_data.data.delta_content or '')}, 完成: {upstream_data.data.done}")
218
+
219
+ # Process content
220
+ yield from self._process_content_with_tools(upstream_data, sent_initial_answer)
221
+
222
+ # Update sent_initial_answer flag if we sent content
223
+ if not sent_initial_answer and (upstream_data.data.delta_content or upstream_data.data.edit_content):
224
+ sent_initial_answer = True
225
+
226
+ # Check if done
227
+ if upstream_data.data.done or upstream_data.data.phase == "done":
228
+ debug_log("检测到流结束信号")
229
+ yield from self._send_end_chunk()
230
+ stream_ended_normally = True
231
+ break
232
+
233
+ except Exception as e:
234
+ debug_log(f"SSE流处理异常: {e}")
235
+ # 流异常结束,发送错误响应
236
+ if not stream_ended_normally:
237
+ error_chunk = create_openai_response_chunk(
238
+ model=settings.PRIMARY_MODEL,
239
+ delta=Delta(content=f"\n\n[系统提示: 连接中断,响应可能不完整]")
240
+ )
241
+ yield f"data: {error_chunk.model_dump_json()}\n\n"
242
+
243
+ # 确保流正常结束
244
+ if not stream_ended_normally:
245
+ debug_log("流未正常结束,发送结束信号")
246
+ yield from self._send_end_chunk(force_stop=True)
247
+
248
+ def _has_error(self, upstream_data: UpstreamData) -> bool:
249
+ """Check if upstream data contains error"""
250
+ return bool(
251
+ upstream_data.error or
252
+ upstream_data.data.error or
253
+ (upstream_data.data.inner and upstream_data.data.inner.error)
254
+ )
255
+
256
+ def _get_error(self, upstream_data: UpstreamData) -> UpstreamError:
257
+ """Get error from upstream data"""
258
+ return (
259
+ upstream_data.error or
260
+ upstream_data.data.error or
261
+ (upstream_data.data.inner.error if upstream_data.data.inner else None)
262
+ )
263
+
264
+ def _process_content(
265
+ self,
266
+ upstream_data: UpstreamData,
267
+ sent_initial_answer: bool
268
+ ) -> Generator[str, None, None]:
269
+ """Process content from upstream data"""
270
+ content = upstream_data.data.delta_content or upstream_data.data.edit_content
271
+
272
+ if not content:
273
+ return
274
+
275
+ # Transform thinking content
276
+ if upstream_data.data.phase == "thinking":
277
+ content = transform_thinking_content(content)
278
+
279
+ # Buffer content if tools are enabled
280
+ if self.has_tools:
281
+ self.buffered_content += content
282
+ else:
283
+ # Handle initial answer content
284
+ if (not sent_initial_answer and
285
+ upstream_data.data.edit_content and
286
+ upstream_data.data.phase == "answer"):
287
+
288
+ content = self._extract_edit_content(upstream_data.data.edit_content)
289
+ if content:
290
+ debug_log(f"发送普通内容: {content}")
291
+ chunk = create_openai_response_chunk(
292
+ model=settings.PRIMARY_MODEL,
293
+ delta=Delta(content=content)
294
+ )
295
+ yield f"data: {chunk.model_dump_json()}\n\n"
296
+ sent_initial_answer = True
297
+
298
+ # Handle delta content
299
+ if upstream_data.data.delta_content:
300
+ if content:
301
+ if upstream_data.data.phase == "thinking":
302
+ # 第一个思考块添加<think>开始标签,其他块保持纯内容
303
+ if self.first_thinking_chunk:
304
+ formatted_content = f"<think>{content}"
305
+ self.first_thinking_chunk = False
306
+ else:
307
+ formatted_content = content
308
+
309
+ debug_log(f"发送思考内容: {content}")
310
+ chunk = create_openai_response_chunk(
311
+ model=settings.PRIMARY_MODEL,
312
+ delta=Delta(content=formatted_content)
313
+ )
314
+ else:
315
+ # 如果从thinking阶段转到其他阶段,需要结束thinking标签
316
+ if not self.first_thinking_chunk and upstream_data.data.phase == "answer":
317
+ # 先发送思考结束标签
318
+ thinking_end_chunk = create_openai_response_chunk(
319
+ model=settings.PRIMARY_MODEL,
320
+ delta=Delta(content="</think>")
321
+ )
322
+ yield f"data: {thinking_end_chunk.model_dump_json()}\n\n"
323
+ # 重置状态
324
+ self.first_thinking_chunk = True
325
+
326
+ debug_log(f"发送普通内容: {content}")
327
+ chunk = create_openai_response_chunk(
328
+ model=settings.PRIMARY_MODEL,
329
+ delta=Delta(content=content)
330
+ )
331
+ yield f"data: {chunk.model_dump_json()}\n\n"
332
+
333
+ def _extract_edit_content(self, edit_content: str) -> str:
334
+ """Extract content from edit_content field"""
335
+ parts = edit_content.split("</details>")
336
+ return parts[1] if len(parts) > 1 else ""
337
+
338
+ def _send_end_chunk(self, force_stop: bool = False) -> Generator[str, None, None]:
339
+ """Send end chunk and DONE signal"""
340
+ finish_reason = "stop"
341
+
342
+ if self.has_tools and not force_stop:
343
+ # Try to extract tool calls from buffered content
344
+ self.tool_calls = extract_tool_invocations(self.buffered_content)
345
+
346
+ if self.tool_calls:
347
+ debug_log(f"检测到工具调用: {len(self.tool_calls)} 个")
348
+ # Send tool calls with proper format
349
+ for i, tc in enumerate(self.tool_calls):
350
+ tool_call_delta = {
351
+ "index": i,
352
+ "id": tc.get("id"),
353
+ "type": tc.get("type", "function"),
354
+ "function": tc.get("function", {}),
355
+ }
356
+
357
+ out_chunk = create_openai_response_chunk(
358
+ model=settings.PRIMARY_MODEL,
359
+ delta=Delta(tool_calls=[tool_call_delta])
360
+ )
361
+ yield f"data: {out_chunk.model_dump_json()}\n\n"
362
+
363
+ finish_reason = "tool_calls"
364
+ else:
365
+ # Send regular content
366
+ trimmed_content = remove_tool_json_content(self.buffered_content)
367
+ if trimmed_content:
368
+ debug_log(f"发送常规内容: {len(trimmed_content)} 字符")
369
+ content_chunk = create_openai_response_chunk(
370
+ model=settings.PRIMARY_MODEL,
371
+ delta=Delta(content=trimmed_content)
372
+ )
373
+ yield f"data: {content_chunk.model_dump_json()}\n\n"
374
+ elif force_stop:
375
+ # 强制结束时,发送缓冲的内容(如果有)
376
+ if self.buffered_content:
377
+ debug_log(f"强制结束,发送缓冲内容: {len(self.buffered_content)} 字符")
378
+ content_chunk = create_openai_response_chunk(
379
+ model=settings.PRIMARY_MODEL,
380
+ delta=Delta(content=self.buffered_content)
381
+ )
382
+ yield f"data: {content_chunk.model_dump_json()}\n\n"
383
+
384
+ # Send final chunk
385
+ end_chunk = create_openai_response_chunk(
386
+ model=settings.PRIMARY_MODEL,
387
+ finish_reason=finish_reason
388
+ )
389
+ yield f"data: {end_chunk.model_dump_json()}\n\n"
390
+ yield "data: [DONE]\n\n"
391
+ debug_log(f"流式响应完成 (finish_reason: {finish_reason})")
392
+
393
+
394
+
395
+ def _process_content_with_tools(
396
+ self,
397
+ upstream_data: UpstreamData,
398
+ sent_initial_answer: bool
399
+ ) -> Generator[str, None, None]:
400
+ """Process content with improved tool handling"""
401
+ # Handle tool calls with improved SSE tool handler
402
+ if self.has_tools and self.tool_handler:
403
+ # Check if this is a tool_call phase
404
+ if upstream_data.data.phase == "tool_call":
405
+ # Use the improved tool handler for tool call processing
406
+ yield from self.tool_handler.process_tool_call_phase(
407
+ upstream_data.data.model_dump(),
408
+ is_stream=True
409
+ )
410
+ return
411
+ elif upstream_data.data.phase == "other":
412
+ # Handle other phase which may contain tool completion signals
413
+ yield from self.tool_handler.process_other_phase(
414
+ upstream_data.data.model_dump(),
415
+ is_stream=True
416
+ )
417
+ return
418
+
419
+ # Fall back to original content processing
420
+ yield from self._process_content(upstream_data, sent_initial_answer)
421
+
422
+
423
+ class NonStreamResponseHandler(ResponseHandler):
424
+ """Handler for non-streaming responses"""
425
+
426
+ def __init__(self, upstream_req: UpstreamRequest, chat_id: str, auth_token: str, has_tools: bool = False):
427
+ super().__init__(upstream_req, chat_id, auth_token)
428
+ self.has_tools = has_tools
429
+ # 思考状态跟踪
430
+ self.first_thinking_chunk = True
431
+ self.in_thinking_phase = False
432
+
433
+ def handle(self) -> JSONResponse:
434
+ """Handle non-streaming response"""
435
+ debug_log(f"开始处理非流式响应 (chat_id={self.chat_id})")
436
+
437
+ try:
438
+ response = self._call_upstream()
439
+ except Exception as e:
440
+ debug_log(f"调用上游失败: {e}")
441
+ raise HTTPException(status_code=502, detail="Failed to call upstream")
442
+
443
+ if response.status_code != 200:
444
+ self._handle_upstream_error(response)
445
+ raise HTTPException(status_code=502, detail="Upstream error")
446
+
447
+ # Collect full response
448
+ full_content = []
449
+ debug_log("开始收集完整响应内容")
450
+ response_completed = False
451
+
452
+ try:
453
+ with SSEParser(response, debug_mode=settings.DEBUG_LOGGING) as parser:
454
+ for event in parser.iter_json_data(UpstreamData):
455
+ upstream_data = event['data']
456
+
457
+ if upstream_data.data.delta_content:
458
+ content = upstream_data.data.delta_content
459
+
460
+ if upstream_data.data.phase == "thinking":
461
+ content = transform_thinking_content(content)
462
+
463
+ # 处理思考内容的分块格式
464
+ if not self.in_thinking_phase:
465
+ # 进入思考阶段,添加开始标签
466
+ self.in_thinking_phase = True
467
+ if self.first_thinking_chunk:
468
+ content = f"<think>{content}"
469
+ self.first_thinking_chunk = False
470
+ else:
471
+ content = f"<think>{content}"
472
+ # 如果已经在思考阶段,保持纯内容
473
+ else:
474
+ # 如果从thinking阶段转到其他阶段
475
+ if self.in_thinking_phase:
476
+ # 添加结束标签到前一个内容
477
+ if full_content and not self.first_thinking_chunk:
478
+ full_content.append("</think>")
479
+ self.in_thinking_phase = False
480
+ self.first_thinking_chunk = True
481
+
482
+ if content:
483
+ full_content.append(content)
484
+
485
+ if upstream_data.data.done or upstream_data.data.phase == "done":
486
+ debug_log("检测到完成信号,停止收集")
487
+ response_completed = True
488
+ break
489
+
490
+ except Exception as e:
491
+ debug_log(f"非流式响应收集异常: {e}")
492
+ if not full_content:
493
+ # 如果没有收集到任何内容,抛出异常
494
+ raise HTTPException(status_code=502, detail=f"Response collection failed: {str(e)}")
495
+ else:
496
+ debug_log(f"部分内容收集成功,继续处理 ({len(full_content)} 个片段)")
497
+
498
+ if not response_completed and not full_content:
499
+ debug_log("响应未完成且无内容,可能是连接问题")
500
+ raise HTTPException(status_code=502, detail="Incomplete response from upstream")
501
+
502
+ # 如果响应结束时还在思考阶段,需要添加结束标签
503
+ if self.in_thinking_phase and not self.first_thinking_chunk:
504
+ full_content.append("</think>")
505
+
506
+ final_content = "".join(full_content)
507
+ debug_log(f"内容收集完成,最终长度: {len(final_content)}")
508
+
509
+ # Handle tool calls for non-streaming
510
+ tool_calls = None
511
+ finish_reason = "stop"
512
+ message_content = final_content
513
+
514
+ if self.has_tools:
515
+ tool_calls = extract_tool_invocations(final_content)
516
+ if tool_calls:
517
+ # Content must be null when tool_calls are present (OpenAI spec)
518
+ message_content = None
519
+ finish_reason = "tool_calls"
520
+ debug_log(f"提取到工具调用: {json.dumps(tool_calls, ensure_ascii=False)}")
521
+ else:
522
+ # Remove tool JSON from content
523
+ message_content = remove_tool_json_content(final_content)
524
+ if not message_content:
525
+ message_content = final_content # 保留原内容如果清理后为空
526
+
527
+ # Build response
528
+ response_data = OpenAIResponse(
529
+ id=f"chatcmpl-{int(time.time())}",
530
+ object="chat.completion",
531
+ created=int(time.time()),
532
+ model=settings.PRIMARY_MODEL,
533
+ choices=[Choice(
534
+ index=0,
535
+ message=Message(
536
+ role="assistant",
537
+ content=message_content,
538
+ tool_calls=tool_calls
539
+ ),
540
+ finish_reason=finish_reason
541
+ )],
542
+ usage=Usage()
543
+ )
544
+
545
+ debug_log("非流式响应发送完成")
546
+ return JSONResponse(content=response_data.model_dump(exclude_none=True))
app/core/token_manager.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Token pool management with load balancing and round-robin mechanism
3
+ """
4
+
5
+ import os
6
+ import time
7
+ import threading
8
+ from typing import List, Optional, Dict, Any, Set
9
+ from dataclasses import dataclass, field
10
+
11
+
12
+ def debug_log(message: str, *args) -> None:
13
+ """Log debug message if debug mode is enabled"""
14
+ # Import here to avoid circular import
15
+ try:
16
+ from app.core.config import settings
17
+ if settings.DEBUG_LOGGING:
18
+ if args:
19
+ print(f"[DEBUG] {message % args}")
20
+ else:
21
+ print(f"[DEBUG] {message}")
22
+ except:
23
+ # Fallback if settings not available
24
+ print(f"[DEBUG] {message}")
25
+
26
+
27
+ @dataclass
28
+ class TokenInfo:
29
+ """Token information with failure tracking"""
30
+ token: str
31
+ failure_count: int = 0
32
+ is_active: bool = True
33
+ last_failure_time: Optional[float] = None
34
+ last_used_time: Optional[float] = None
35
+
36
+
37
+ class TokenManager:
38
+ """Token pool manager with load balancing and failure handling"""
39
+
40
+ def __init__(self, token_file_path: str = None):
41
+ try:
42
+ from app.core.config import settings
43
+ self.token_file_path = token_file_path or getattr(settings, 'TOKEN_FILE_PATH', './tokens.txt')
44
+ self.max_failures = getattr(settings, 'TOKEN_MAX_FAILURES', 3)
45
+ self.reload_interval = getattr(settings, 'TOKEN_RELOAD_INTERVAL', 60)
46
+ except ImportError:
47
+ # Fallback values if settings not available
48
+ self.token_file_path = token_file_path or './tokens.txt'
49
+ self.max_failures = 3
50
+ self.reload_interval = 60
51
+
52
+ self.tokens: List[TokenInfo] = []
53
+ self.current_index = 0
54
+ self.last_reload_time = 0
55
+ self._lock = threading.Lock()
56
+
57
+ # Load tokens on initialization
58
+ self._load_tokens()
59
+
60
+ def _load_tokens(self) -> None:
61
+ """Load tokens from file"""
62
+ try:
63
+ new_tokens = []
64
+
65
+ # 首先尝试从tokens.txt文件加载token
66
+ if os.path.exists(self.token_file_path):
67
+ with open(self.token_file_path, 'r', encoding='utf-8') as f:
68
+ lines = f.readlines()
69
+
70
+ for line in lines:
71
+ token = line.strip()
72
+ if token and not token.startswith('#'): # Skip empty lines and comments
73
+ # Check if this token already exists to preserve failure count
74
+ existing_token = next((t for t in self.tokens if t.token == token), None)
75
+ if existing_token:
76
+ new_tokens.append(existing_token)
77
+ else:
78
+ new_tokens.append(TokenInfo(token=token))
79
+
80
+ if new_tokens:
81
+ debug_log(f"从tokens.txt文件加载了 {len(new_tokens)} 个token")
82
+ else:
83
+ debug_log("Token文件为空或无有效token")
84
+
85
+ # 然后尝试从BACKUP_TOKEN环境变量加载token
86
+ try:
87
+ from app.core.config import settings
88
+ if hasattr(settings, 'BACKUP_TOKEN') and settings.BACKUP_TOKEN:
89
+ # 支持多个BACKUP_TOKEN值,以逗号分隔
90
+ backup_tokens = [token.strip() for token in settings.BACKUP_TOKEN.split(',') if token.strip()]
91
+
92
+ # 添加不重复的backup token
93
+ for backup_token in backup_tokens:
94
+ # 检查是否已经存在相同的token
95
+ existing_token = next((t for t in new_tokens if t.token == backup_token), None)
96
+ if not existing_token:
97
+ # 检查是否在原有tokens中存在,以保留失败计数
98
+ old_token = next((t for t in self.tokens if t.token == backup_token), None)
99
+ if old_token:
100
+ new_tokens.append(old_token)
101
+ else:
102
+ new_tokens.append(TokenInfo(token=backup_token))
103
+
104
+ debug_log(f"从BACKUP_TOKEN加载了 {len(backup_tokens)} 个token")
105
+ except ImportError:
106
+ pass
107
+
108
+ # 如果没有任何token,尝试仅使用BACKUP_TOKEN
109
+ if not new_tokens:
110
+ try:
111
+ from app.core.config import settings
112
+ if hasattr(settings, 'BACKUP_TOKEN') and settings.BACKUP_TOKEN:
113
+ # 支持多个BACKUP_TOKEN值,以逗号分隔
114
+ backup_tokens = [token.strip() for token in settings.BACKUP_TOKEN.split(',') if token.strip()]
115
+ new_tokens = [TokenInfo(token=token) for token in backup_tokens]
116
+ debug_log(f"仅使用BACKUP_TOKEN,共{len(backup_tokens)}个token")
117
+ except ImportError:
118
+ pass
119
+
120
+ if new_tokens:
121
+ with self._lock:
122
+ self.tokens = new_tokens
123
+ # Reset index if it's out of bounds
124
+ if self.current_index >= len(self.tokens):
125
+ self.current_index = 0
126
+ self.last_reload_time = time.time()
127
+
128
+ debug_log(f"总共加载了 {len(self.tokens)} 个token")
129
+ active_count = sum(1 for t in self.tokens if t.is_active)
130
+ debug_log(f"活跃token数量: {active_count}")
131
+ else:
132
+ debug_log("没有找到任何可用的token")
133
+
134
+ except Exception as e:
135
+ debug_log(f"加载token失败: {e}")
136
+
137
+ def _should_reload(self) -> bool:
138
+ """Check if tokens should be reloaded"""
139
+ return time.time() - self.last_reload_time > self.reload_interval
140
+
141
+ def get_next_token(self) -> Optional[str]:
142
+ """Get next available token using round-robin with load balancing"""
143
+ # Reload tokens if needed
144
+ if self._should_reload():
145
+ self._load_tokens()
146
+
147
+ with self._lock:
148
+ if not self.tokens:
149
+ debug_log("没有可用的token")
150
+ return None
151
+
152
+ # Find active tokens
153
+ active_tokens = [i for i, t in enumerate(self.tokens) if t.is_active]
154
+
155
+ if not active_tokens:
156
+ debug_log("没有活跃的token,尝试重置失败计数")
157
+ # Reset all tokens if none are active (maybe temporary network issues)
158
+ for token in self.tokens:
159
+ token.is_active = True
160
+ token.failure_count = 0
161
+ active_tokens = list(range(len(self.tokens)))
162
+
163
+ # Round-robin selection from active tokens
164
+ attempts = 0
165
+ max_attempts = len(active_tokens)
166
+
167
+ while attempts < max_attempts:
168
+ # Find next active token starting from current_index
169
+ token_index = None
170
+ for i in range(len(self.tokens)):
171
+ idx = (self.current_index + i) % len(self.tokens)
172
+ if idx in active_tokens:
173
+ token_index = idx
174
+ break
175
+
176
+ if token_index is not None:
177
+ self.current_index = (token_index + 1) % len(self.tokens)
178
+ token_info = self.tokens[token_index]
179
+ token_info.last_used_time = time.time()
180
+ debug_log(f"选择token[{token_index}]: {token_info.token[:20]}...")
181
+ return token_info.token
182
+
183
+ attempts += 1
184
+
185
+ debug_log("无法找到可用的token")
186
+ return None
187
+
188
+ def mark_token_failed(self, token: str) -> None:
189
+ """Mark a token as failed and deactivate if necessary"""
190
+ with self._lock:
191
+ for token_info in self.tokens:
192
+ if token_info.token == token:
193
+ token_info.failure_count += 1
194
+ token_info.last_failure_time = time.time()
195
+
196
+ if token_info.failure_count >= self.max_failures:
197
+ token_info.is_active = False
198
+ debug_log(f"Token失效 (失败{token_info.failure_count}次): {token[:20]}...")
199
+ else:
200
+ debug_log(f"Token失败 ({token_info.failure_count}/{self.max_failures}): {token[:20]}...")
201
+ break
202
+
203
+ def mark_token_success(self, token: str) -> None:
204
+ """Mark a token as successful (reset failure count)"""
205
+ with self._lock:
206
+ for token_info in self.tokens:
207
+ if token_info.token == token:
208
+ if token_info.failure_count > 0:
209
+ debug_log(f"Token恢复正常: {token[:20]}...")
210
+ token_info.failure_count = 0
211
+ token_info.is_active = True
212
+ break
213
+
214
+ def get_token_stats(self) -> Dict[str, Any]:
215
+ """Get token pool statistics"""
216
+ with self._lock:
217
+ if not self.tokens:
218
+ return {
219
+ "total": 0,
220
+ "active": 0,
221
+ "failed": 0,
222
+ "tokens": []
223
+ }
224
+
225
+ active_count = sum(1 for t in self.tokens if t.is_active)
226
+ failed_count = len(self.tokens) - active_count
227
+
228
+ token_details = []
229
+ for i, token_info in enumerate(self.tokens):
230
+ token_details.append({
231
+ "index": i,
232
+ "token_preview": token_info.token[:20] + "...",
233
+ "is_active": token_info.is_active,
234
+ "failure_count": token_info.failure_count,
235
+ "last_failure_time": token_info.last_failure_time,
236
+ "last_used_time": token_info.last_used_time
237
+ })
238
+
239
+ return {
240
+ "total": len(self.tokens),
241
+ "active": active_count,
242
+ "failed": failed_count,
243
+ "current_index": self.current_index,
244
+ "last_reload_time": self.last_reload_time,
245
+ "tokens": token_details
246
+ }
247
+
248
+ def reset_all_tokens(self) -> None:
249
+ """Reset all tokens (clear failure counts and reactivate)"""
250
+ with self._lock:
251
+ for token_info in self.tokens:
252
+ token_info.is_active = True
253
+ token_info.failure_count = 0
254
+ token_info.last_failure_time = None
255
+ debug_log("已重置所有token状态")
256
+
257
+ def reload_tokens(self) -> None:
258
+ """Force reload tokens from file"""
259
+ debug_log("强制重新加载token文件")
260
+ self._load_tokens()
261
+
262
+
263
+ # Global token manager instance
264
+ token_manager = TokenManager()
app/core/zai_transformer.py ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import json
5
+ import time
6
+ import uuid
7
+ import random
8
+ import hashlib
9
+ import hmac
10
+ import urllib.parse
11
+ from datetime import datetime
12
+ from typing import Dict, List, Any, Optional, Generator, AsyncGenerator
13
+ import httpx
14
+ import asyncio
15
+ from fake_useragent import UserAgent
16
+
17
+ from app.core.config import settings
18
+ from app.utils.helpers import debug_log
19
+ from app.core.token_manager import token_manager
20
+
21
+ # 全局 UserAgent 实例(单例模式)
22
+ _user_agent_instance = None
23
+
24
+
25
+ def get_user_agent_instance() -> UserAgent:
26
+ """获取或创建 UserAgent 实例(单例模式)"""
27
+ global _user_agent_instance
28
+ if _user_agent_instance is None:
29
+ _user_agent_instance = UserAgent()
30
+ return _user_agent_instance
31
+
32
+
33
+ def get_dynamic_headers(chat_id: str = "", user_agent: str = "") -> Dict[str, str]:
34
+ """生成动态浏览器headers,包含随机User-Agent"""
35
+ if not user_agent:
36
+ ua = get_user_agent_instance()
37
+ # 随机选择浏览器类型,偏向Chrome和Edge
38
+ browser_choices = ["chrome", "chrome", "chrome", "edge", "edge", "firefox", "safari"]
39
+ browser_type = random.choice(browser_choices)
40
+
41
+ try:
42
+ if browser_type == "chrome":
43
+ user_agent = ua.chrome
44
+ elif browser_type == "edge":
45
+ user_agent = ua.edge
46
+ elif browser_type == "firefox":
47
+ user_agent = ua.firefox
48
+ elif browser_type == "safari":
49
+ user_agent = ua.safari
50
+ else:
51
+ user_agent = ua.random
52
+ except:
53
+ user_agent = ua.random
54
+
55
+ # 提取版本信息
56
+ chrome_version = "140" # 更新版本号匹配F12信息
57
+ edge_version = "140"
58
+
59
+ if "Chrome/" in user_agent:
60
+ try:
61
+ chrome_version = user_agent.split("Chrome/")[1].split(".")[0]
62
+ except:
63
+ pass
64
+
65
+ if "Edg/" in user_agent:
66
+ try:
67
+ edge_version = user_agent.split("Edg/")[1].split(".")[0]
68
+ sec_ch_ua = f'"Microsoft Edge";v="{edge_version}", "Chromium";v="{chrome_version}", "Not=A?Brand";v="24"'
69
+ except:
70
+ sec_ch_ua = f'"Chromium";v="{chrome_version}", "Not=A?Brand";v="24", "Microsoft Edge";v="{edge_version}"'
71
+ elif "Firefox/" in user_agent:
72
+ sec_ch_ua = None # Firefox不使用sec-ch-ua
73
+ else:
74
+ sec_ch_ua = f'"Chromium";v="{chrome_version}", "Not=A?Brand";v="24", "Google Chrome";v="{chrome_version}"'
75
+
76
+ headers = {
77
+ "Accept": "*/*",
78
+ "Accept-Encoding": "gzip, deflate, br, zstd",
79
+ "Accept-Language": "zh-CN",
80
+ "Content-Type": "application/json",
81
+ "User-Agent": user_agent,
82
+ "X-Fe-Version": "prod-fe-1.0.83", # 匹配F12信息中的版本
83
+ "Origin": "https://chat.z.ai",
84
+ "Connection": "keep-alive",
85
+ "Sec-Fetch-Dest": "empty",
86
+ "Sec-Fetch-Mode": "cors",
87
+ "Sec-Fetch-Site": "same-origin",
88
+ }
89
+
90
+ if sec_ch_ua:
91
+ headers["Sec-Ch-Ua"] = sec_ch_ua
92
+ headers["Sec-Ch-Ua-Mobile"] = "?0"
93
+ headers["Sec-Ch-Ua-Platform"] = '"Windows"'
94
+
95
+ if chat_id:
96
+ headers["Referer"] = f"https://chat.z.ai/c/{chat_id}"
97
+ else:
98
+ headers["Referer"] = "https://chat.z.ai/"
99
+
100
+ return headers
101
+
102
+
103
+ def generate_uuid() -> str:
104
+ """生成UUID v4"""
105
+ return str(uuid.uuid4())
106
+
107
+
108
+ def generate_signature(data: str, timestamp: str, secret_key: str = "") -> str:
109
+ """生成请求签名
110
+
111
+ Args:
112
+ data: 请求数据
113
+ timestamp: 时间戳
114
+ secret_key: 密钥(使用配置中的值)
115
+
116
+ Returns:
117
+ 签名字符串
118
+ """
119
+ if not settings.ENABLE_SIGNATURE:
120
+ return "" # 如果禁用签名,返回空字符串
121
+
122
+ if not secret_key:
123
+ secret_key = settings.SIGNATURE_SECRET_KEY
124
+
125
+ # 构建签名字符串
126
+ sign_string = f"{data}{timestamp}{secret_key}"
127
+
128
+ # 根据配置选择签名算法
129
+ if settings.SIGNATURE_ALGORITHM.lower() == "md5":
130
+ signature = hashlib.md5(sign_string.encode('utf-8')).hexdigest()
131
+ elif settings.SIGNATURE_ALGORITHM.lower() == "sha1":
132
+ signature = hashlib.sha1(sign_string.encode('utf-8')).hexdigest()
133
+ else: # 默认使用sha256
134
+ signature = hashlib.sha256(sign_string.encode('utf-8')).hexdigest()
135
+
136
+ return signature
137
+
138
+
139
+ def build_query_params(
140
+ timestamp: int,
141
+ request_id: str,
142
+ token: str,
143
+ user_agent: str,
144
+ chat_id: str = ""
145
+ ) -> Dict[str, str]:
146
+ """构建查询参数,模拟真实的浏览器请求
147
+
148
+ Args:
149
+ timestamp: 时间戳(毫秒)
150
+ request_id: 请求ID
151
+ token: 用户token
152
+ user_agent: 用户代理字符串
153
+ chat_id: 聊天ID
154
+
155
+ Returns:
156
+ 查询参数字典
157
+ """
158
+ # 生成用���ID(从token中提取或生成假的)
159
+ user_id = "guest-user-" + str(abs(hash(token)) % 1000000)
160
+
161
+ # 编码用户代理
162
+ encoded_user_agent = urllib.parse.quote_plus(user_agent)
163
+
164
+ # 当前时间相关
165
+ current_time = datetime.now()
166
+ local_time = current_time.isoformat() + "Z"
167
+ utc_time = current_time.strftime("%a, %d %b %Y %H:%M:%S GMT")
168
+
169
+ # 构建当前URL
170
+ current_url = f"https://chat.z.ai/c/{chat_id}" if chat_id else "https://chat.z.ai/"
171
+ pathname = f"/c/{chat_id}" if chat_id else "/"
172
+
173
+ query_params = {
174
+ "timestamp": str(timestamp),
175
+ "requestId": request_id,
176
+ "version": "0.0.1",
177
+ "platform": "web",
178
+ "user_id": user_id,
179
+ "token": token,
180
+ "user_agent": encoded_user_agent,
181
+ "language": "zh-CN",
182
+ "languages": "zh-CN,en,en-GB,en-US",
183
+ "timezone": "Asia/Shanghai",
184
+ "cookie_enabled": "true",
185
+ "screen_width": "1536",
186
+ "screen_height": "864",
187
+ "screen_resolution": "1536x864",
188
+ "viewport_height": "331",
189
+ "viewport_width": "1528",
190
+ "viewport_size": "1528x331",
191
+ "color_depth": "24",
192
+ "pixel_ratio": "1.25",
193
+ "current_url": urllib.parse.quote_plus(current_url),
194
+ "pathname": pathname,
195
+ "search": "",
196
+ "hash": "",
197
+ "host": "chat.z.ai",
198
+ "hostname": "chat.z.ai",
199
+ "protocol": "https:",
200
+ "referrer": "",
201
+ "title": "Chat with Z.ai - Free AI Chatbot powered by GLM-4.5",
202
+ "timezone_offset": "-480",
203
+ "local_time": local_time,
204
+ "utc_time": utc_time,
205
+ "is_mobile": "false",
206
+ "is_touch": "false",
207
+ "max_touch_points": "10",
208
+ "browser_name": "Chrome",
209
+ "os_name": "Windows",
210
+ # "signature_timestamp": str(timestamp), # 已移除签名相关参数
211
+ }
212
+
213
+ return query_params
214
+
215
+
216
+ def get_auth_token_sync() -> str:
217
+ """同步获取认证令牌(用于非异步场景)"""
218
+ if settings.ANONYMOUS_MODE:
219
+ try:
220
+ headers = get_dynamic_headers()
221
+ with httpx.Client() as client:
222
+ response = client.get("https://chat.z.ai/api/v1/auths/", headers=headers, timeout=10.0)
223
+ if response.status_code == 200:
224
+ data = response.json()
225
+ token = data.get("token", "")
226
+ if token:
227
+ debug_log(f"获取访客令牌成功: {token[:20]}...")
228
+ return token
229
+ except Exception as e:
230
+ debug_log(f"获取访客令牌失败: {e}")
231
+
232
+ # 使用token管理器获取备份令牌
233
+ token = token_manager.get_next_token()
234
+ if token:
235
+ debug_log(f"从token池获取令牌: {token[:20]}...")
236
+ return token
237
+
238
+ # 没有可用的token
239
+ debug_log("⚠️ 没有可用的备份token")
240
+ return ""
241
+
242
+
243
+ class ZAITransformer:
244
+ """ZAI转换器类"""
245
+
246
+ def __init__(self):
247
+ """初始化转换器"""
248
+ self.name = "zai"
249
+ self.base_url = "https://chat.z.ai"
250
+ self.api_url = settings.API_ENDPOINT
251
+ self.auth_url = f"{self.base_url}/api/v1/auths/"
252
+
253
+ # 模型映射
254
+ self.model_mapping = {
255
+ settings.PRIMARY_MODEL: "0727-360B-API", # GLM-4.5
256
+ settings.THINKING_MODEL: "0727-360B-API", # GLM-4.5-Thinking
257
+ settings.SEARCH_MODEL: "0727-360B-API", # GLM-4.5-Search
258
+ settings.AIR_MODEL: "0727-106B-API", # GLM-4.5-Air
259
+ settings.GLM_46_MODEL: "GLM-4-6-API-V1", # GLM-4.6
260
+ settings.GLM_46_THINKING_MODEL: "GLM-4-6-API-V1", # GLM-4.6-Thinking
261
+ }
262
+
263
+ async def get_token(self) -> str:
264
+ """异步获取认证令牌"""
265
+ if settings.ANONYMOUS_MODE:
266
+ try:
267
+ headers = get_dynamic_headers()
268
+ async with httpx.AsyncClient() as client:
269
+ response = await client.get(self.auth_url, headers=headers, timeout=10.0)
270
+ if response.status_code == 200:
271
+ data = response.json()
272
+ token = data.get("token", "")
273
+ if token:
274
+ debug_log(f"获取访客令牌成功: {token[:20]}...")
275
+ return token
276
+ except Exception as e:
277
+ debug_log(f"异步获取访客令牌失败: {e}")
278
+
279
+ # 使用token管理器获取备份令牌
280
+ token = token_manager.get_next_token()
281
+ if token:
282
+ debug_log(f"从token池获取令牌: {token[:20]}...")
283
+ return token
284
+
285
+ # 没有可用的token
286
+ debug_log("⚠️ 没有可用的备份token")
287
+ return ""
288
+
289
+ def mark_token_success(self, token: str):
290
+ """标记token使用成功"""
291
+ token_manager.mark_token_success(token)
292
+
293
+ def mark_token_failure(self, token: str, error: Exception = None):
294
+ """标记token使用失败"""
295
+ token_manager.mark_token_failed(token)
296
+
297
+ async def transform_request_in(self, request: Dict[str, Any]) -> Dict[str, Any]:
298
+ """
299
+ 转换OpenAI请求为z.ai格式
300
+ 整合现有功能:模型映射、MCP服务器等
301
+ """
302
+ debug_log(f"🔄 开始转换 OpenAI 请求到 Z.AI 格式: {request.get('model', settings.PRIMARY_MODEL)} -> Z.AI")
303
+
304
+ # 获取认证令牌
305
+ token = await self.get_token()
306
+ debug_log(f" 使用令牌: {token[:20] if token else 'None'}...")
307
+
308
+ # 检查token是否有效
309
+ if not token:
310
+ debug_log("❌ 无法获取有效的认证令牌")
311
+ raise Exception("无法获取有效的认证令牌,请检查匿名模式配置或token池配置")
312
+
313
+ # 确定请求的模型特性
314
+ requested_model = request.get("model", settings.PRIMARY_MODEL)
315
+ is_thinking = (requested_model == settings.THINKING_MODEL or
316
+ requested_model == settings.GLM_46_THINKING_MODEL or
317
+ request.get("reasoning", False))
318
+ is_search = requested_model == settings.SEARCH_MODEL
319
+ is_air = requested_model == settings.AIR_MODEL
320
+
321
+ # 获取上游模型ID(使用模型映射)
322
+ upstream_model_id = self.model_mapping.get(requested_model, "0727-360B-API")
323
+ debug_log(f" 模型映射: {requested_model} -> {upstream_model_id}")
324
+ debug_log(f" 模型特性检测: is_search={is_search}, is_thinking={is_thinking}, is_air={is_air}")
325
+
326
+ # 处理消息列表
327
+ debug_log(f" 开始处理 {len(request.get('messages', []))} 条消息")
328
+ messages = []
329
+ for idx, orig_msg in enumerate(request.get("messages", [])):
330
+ msg = orig_msg.copy()
331
+
332
+ # 处理system角色转换
333
+ if msg.get("role") == "system":
334
+ msg["role"] = "user"
335
+ content = msg.get("content")
336
+
337
+ if isinstance(content, list):
338
+ msg["content"] = [
339
+ {"type": "text", "text": "This is a system command, you must enforce compliance."}
340
+ ] + content
341
+ elif isinstance(content, str):
342
+ msg["content"] = f"This is a system command, you must enforce compliance.{content}"
343
+
344
+ # 处理user角色的图片内容
345
+ elif msg.get("role") == "user":
346
+ content = msg.get("content")
347
+ if isinstance(content, list):
348
+ new_content = []
349
+ for part_idx, part in enumerate(content):
350
+ # 处理图片URL(支持base64和http URL)
351
+ if (
352
+ part.get("type") == "image_url"
353
+ and part.get("image_url", {}).get("url")
354
+ and isinstance(part["image_url"]["url"], str)
355
+ ):
356
+ debug_log(f" 消息[{idx}]内容[{part_idx}]: 检测到图片URL")
357
+ # 直接传递图片内容
358
+ new_content.append(part)
359
+ else:
360
+ new_content.append(part)
361
+ msg["content"] = new_content
362
+
363
+ # 处理assistant消息中的reasoning_content
364
+ elif msg.get("role") == "assistant" and msg.get("reasoning_content"):
365
+ # 如果有reasoning_content,保留它
366
+ pass
367
+
368
+ messages.append(msg)
369
+
370
+ # 构建MCP服务器列表
371
+ mcp_servers = []
372
+ if is_search:
373
+ mcp_servers.append("deep-web-search")
374
+ debug_log(f"🔍 检测到搜索模型,添加 deep-web-search MCP 服务器")
375
+
376
+ debug_log(f" MCP服务器列表: {mcp_servers}")
377
+
378
+ # 构建上游请求体
379
+ chat_id = generate_uuid()
380
+
381
+ body = {
382
+ "stream": True, # 总是使用流式
383
+ "model": upstream_model_id, # 使用映射后的模型ID
384
+ "messages": messages,
385
+ "params": {},
386
+ "features": {
387
+ "image_generation": False,
388
+ "web_search": is_search,
389
+ "auto_web_search": is_search,
390
+ "preview_mode": False,
391
+ "flags": [],
392
+ "features": [],
393
+ "enable_thinking": is_thinking,
394
+ },
395
+ "background_tasks": {
396
+ "title_generation": False,
397
+ "tags_generation": False,
398
+ },
399
+ "mcp_servers": mcp_servers, # 保留MCP服务器支持
400
+ "variables": {
401
+ "{{USER_NAME}}": "Guest",
402
+ "{{USER_LOCATION}}": "Unknown",
403
+ "{{CURRENT_DATETIME}}": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
404
+ "{{CURRENT_DATE}}": datetime.now().strftime("%Y-%m-%d"),
405
+ "{{CURRENT_TIME}}": datetime.now().strftime("%H:%M:%S"),
406
+ "{{CURRENT_WEEKDAY}}": datetime.now().strftime("%A"),
407
+ "{{CURRENT_TIMEZONE}}": "Asia/Shanghai", # 使用更合适的时区
408
+ "{{USER_LANGUAGE}}": "zh-CN",
409
+ },
410
+ "model_item": {
411
+ "id": upstream_model_id,
412
+ "name": requested_model,
413
+ "owned_by": "z.ai"
414
+ },
415
+ "chat_id": chat_id,
416
+ "id": generate_uuid(),
417
+ }
418
+
419
+ # 处理工具支持
420
+ if settings.TOOL_SUPPORT and not is_thinking and request.get("tools"):
421
+ body["tools"] = request["tools"]
422
+ debug_log(f"启用工具支持: {len(request['tools'])} 个工具")
423
+ else:
424
+ body["tools"] = None
425
+
426
+ # 生成时间戳和请求ID
427
+ timestamp = int(time.time() * 1000) # 毫秒时间戳
428
+ request_id = generate_uuid()
429
+
430
+ # 构建请求配置
431
+ user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0"
432
+ dynamic_headers = get_dynamic_headers(chat_id, user_agent)
433
+
434
+ # 构建查询参数
435
+ query_params = build_query_params(timestamp, request_id, token, user_agent, chat_id)
436
+
437
+ # 签名已强制禁用 - 不生成任何签名
438
+ # request_body_str = json.dumps(body, ensure_ascii=False, separators=(',', ':'))
439
+ # signature = generate_signature(request_body_str, str(timestamp))
440
+
441
+ # 构建完整的URL(包含查询参数)
442
+ url_with_params = f"{self.api_url}?" + "&".join([f"{k}={v}" for k, v in query_params.items()])
443
+
444
+ headers = {
445
+ **dynamic_headers, # 使用动态生成的headers
446
+ "Authorization": f"Bearer {token}",
447
+ "Cache-Control": "no-cache",
448
+ "Pragma": "no-cache",
449
+ }
450
+
451
+ # 签名功能已禁用
452
+ debug_log(" 🔓 签名验证已禁用")
453
+
454
+ config = {
455
+ "url": url_with_params,
456
+ "headers": headers,
457
+ }
458
+
459
+ debug_log("✅ 请求转换完成")
460
+
461
+ # 记录关键的请求信息用于调试
462
+ debug_log(f" 📋 发送到Z.AI的关键信息:")
463
+ debug_log(f" - 上游模型: {body['model']}")
464
+ debug_log(f" - MCP服务器: {body['mcp_servers']}")
465
+ debug_log(f" - web_search: {body['features']['web_search']}")
466
+ debug_log(f" - auto_web_search: {body['features']['auto_web_search']}")
467
+ debug_log(f" - 消息数量: {len(body['messages'])}")
468
+ tools_count = len(body.get('tools') or [])
469
+ debug_log(f" - 工具数量: {tools_count}")
470
+
471
+ # 返回转换后的请求数据
472
+ return {
473
+ "body": body,
474
+ "config": config,
475
+ "token": token
476
+ }
app/models/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """
2
+ Models module initialization
3
+ """
4
+
5
+ from app.models import schemas
6
+
7
+ __all__ = ["schemas"]
app/models/schemas.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Application data models
3
+ """
4
+
5
+ from typing import Dict, List, Optional, Any, Union, Literal
6
+ from pydantic import BaseModel
7
+
8
+
9
+ class ContentPart(BaseModel):
10
+ """Content part model for OpenAI's new content format"""
11
+
12
+ type: str
13
+ text: Optional[str] = None
14
+
15
+
16
+ class Message(BaseModel):
17
+ """Chat message model"""
18
+
19
+ role: str
20
+ content: Optional[Union[str, List[ContentPart]]] = None
21
+ reasoning_content: Optional[str] = None
22
+ tool_calls: Optional[List[Dict[str, Any]]] = None
23
+
24
+
25
+ class OpenAIRequest(BaseModel):
26
+ """OpenAI-compatible request model"""
27
+
28
+ model: str
29
+ messages: List[Message]
30
+ stream: Optional[bool] = False
31
+ temperature: Optional[float] = None
32
+ max_tokens: Optional[int] = None
33
+ tools: Optional[List[Dict[str, Any]]] = None
34
+ tool_choice: Optional[Any] = None
35
+
36
+
37
+ class ModelItem(BaseModel):
38
+ """Model information item"""
39
+
40
+ id: str
41
+ name: str
42
+ owned_by: str
43
+
44
+
45
+ class UpstreamRequest(BaseModel):
46
+ """Upstream service request model"""
47
+
48
+ stream: bool
49
+ model: str
50
+ messages: List[Message]
51
+ params: Dict[str, Any] = {}
52
+ features: Dict[str, Any] = {}
53
+ background_tasks: Optional[Dict[str, bool]] = None
54
+ chat_id: Optional[str] = None
55
+ id: Optional[str] = None
56
+ mcp_servers: Optional[List[str]] = None
57
+ model_item: Optional[ModelItem] = None
58
+ tool_servers: Optional[List[str]] = None
59
+ variables: Optional[Dict[str, str]] = None
60
+ model_config = {"protected_namespaces": ()}
61
+
62
+
63
+ class Delta(BaseModel):
64
+ """Stream delta model"""
65
+
66
+ role: Optional[str] = None
67
+ content: Optional[str] = "" or None
68
+ reasoning_content: Optional[str] = None
69
+ tool_calls: Optional[List[Dict[str, Any]]] = None
70
+
71
+
72
+ class Choice(BaseModel):
73
+ """Response choice model"""
74
+
75
+ index: int
76
+ message: Optional[Message] = None
77
+ delta: Optional[Delta] = None
78
+ finish_reason: Optional[str] = None
79
+
80
+
81
+ class Usage(BaseModel):
82
+ """Token usage statistics"""
83
+
84
+ prompt_tokens: int = 0
85
+ completion_tokens: int = 0
86
+ total_tokens: int = 0
87
+
88
+
89
+ class OpenAIResponse(BaseModel):
90
+ """OpenAI-compatible response model"""
91
+
92
+ id: str
93
+ object: str
94
+ created: int
95
+ model: str
96
+ choices: List[Choice]
97
+ usage: Optional[Usage] = None
98
+
99
+
100
+ class UpstreamError(BaseModel):
101
+ """Upstream error model"""
102
+
103
+ detail: str
104
+ code: int
105
+
106
+
107
+ class UpstreamDataInner(BaseModel):
108
+ """Inner upstream data model"""
109
+
110
+ error: Optional[UpstreamError] = None
111
+
112
+
113
+ class UpstreamDataData(BaseModel):
114
+ """Upstream data content model"""
115
+
116
+ delta_content: str = ""
117
+ edit_content: str = ""
118
+ phase: str = ""
119
+ done: bool = False
120
+ usage: Optional[Usage] = None
121
+ error: Optional[UpstreamError] = None
122
+ inner: Optional[UpstreamDataInner] = None
123
+
124
+
125
+ class UpstreamData(BaseModel):
126
+ """Upstream data model"""
127
+
128
+ type: str
129
+ data: UpstreamDataData
130
+ error: Optional[UpstreamError] = None
131
+
132
+
133
+ class Model(BaseModel):
134
+ """Model information for listing"""
135
+
136
+ id: str
137
+ object: str = "model"
138
+ created: int
139
+ owned_by: str
140
+
141
+
142
+ class ModelsResponse(BaseModel):
143
+ """Models list response model"""
144
+
145
+ object: str = "list"
146
+ data: List[Model]
app/utils/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """
2
+ Utils module initialization
3
+ """
4
+
5
+ from app.utils import helpers, sse_parser, tools, reload_config
6
+
7
+ __all__ = ["helpers", "sse_parser", "tools", "reload_config"]
app/utils/helpers.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for the application
3
+ """
4
+
5
+ import json
6
+ import re
7
+ import time
8
+ import random
9
+ from typing import Dict, List, Optional, Any, Tuple, Generator
10
+ import requests
11
+ from fake_useragent import UserAgent
12
+
13
+ from app.core.config import settings
14
+ from app.core.token_manager import token_manager
15
+
16
+ # 全局 UserAgent 实例,避免每次调用都创建新实例
17
+ _user_agent_instance = None
18
+
19
+ def get_user_agent_instance() -> UserAgent:
20
+ """获取或创建 UserAgent 实例(单例模式)"""
21
+ global _user_agent_instance
22
+ if _user_agent_instance is None:
23
+ _user_agent_instance = UserAgent()
24
+ return _user_agent_instance
25
+
26
+
27
+ def debug_log(message: str, *args) -> None:
28
+ """Log debug message if debug mode is enabled"""
29
+ if settings.DEBUG_LOGGING:
30
+ if args:
31
+ print(f"[DEBUG] {message % args}")
32
+ else:
33
+ print(f"[DEBUG] {message}")
34
+
35
+
36
+ def generate_request_ids() -> Tuple[str, str]:
37
+ """Generate unique IDs for chat and message"""
38
+ timestamp = int(time.time())
39
+ chat_id = f"{timestamp * 1000}-{timestamp}"
40
+ msg_id = str(timestamp * 1000000)
41
+ return chat_id, msg_id
42
+
43
+
44
+ def get_browser_headers(referer_chat_id: str = "") -> Dict[str, str]:
45
+ """Get browser headers for API requests with dynamic User-Agent"""
46
+
47
+ # 获取 UserAgent 实例
48
+ ua = get_user_agent_instance()
49
+
50
+ # 随机选择一个浏览器类型,偏向使用 Chrome 和 Edge
51
+ browser_choices = ['chrome', 'chrome', 'chrome', 'edge', 'edge', 'firefox', 'safari']
52
+ browser_type = random.choice(browser_choices)
53
+
54
+ try:
55
+ # 根据浏览器类型获取 User-Agent
56
+ if browser_type == 'chrome':
57
+ user_agent = ua.chrome
58
+ elif browser_type == 'edge':
59
+ user_agent = ua.edge
60
+ elif browser_type == 'firefox':
61
+ user_agent = ua.firefox
62
+ elif browser_type == 'safari':
63
+ user_agent = ua.safari
64
+ else:
65
+ user_agent = ua.random
66
+ except:
67
+ # 如果获取失败,使用随机 User-Agent
68
+ user_agent = ua.random
69
+
70
+ # 提取浏览器版本信息
71
+ chrome_version = "139" # 默认版本
72
+ edge_version = "139"
73
+
74
+ if "Chrome/" in user_agent:
75
+ try:
76
+ chrome_version = user_agent.split("Chrome/")[1].split(".")[0]
77
+ except:
78
+ pass
79
+
80
+ if "Edg/" in user_agent:
81
+ try:
82
+ edge_version = user_agent.split("Edg/")[1].split(".")[0]
83
+ # Edge 基于 Chromium,使用 Edge 特定的 sec-ch-ua
84
+ sec_ch_ua = f'"Microsoft Edge";v="{edge_version}", "Chromium";v="{chrome_version}", "Not_A Brand";v="24"'
85
+ except:
86
+ sec_ch_ua = f'"Not_A Brand";v="8", "Chromium";v="{chrome_version}", "Google Chrome";v="{chrome_version}"'
87
+ elif "Firefox/" in user_agent:
88
+ # Firefox 不使用 sec-ch-ua
89
+ sec_ch_ua = None
90
+ else:
91
+ # Chrome 或其他基于 Chromium 的浏览器
92
+ sec_ch_ua = f'"Not_A Brand";v="8", "Chromium";v="{chrome_version}", "Google Chrome";v="{chrome_version}"'
93
+
94
+ # 构建动态 Headers
95
+ headers = {
96
+ "Content-Type": "application/json",
97
+ "Accept": "application/json, text/event-stream",
98
+ "User-Agent": user_agent,
99
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7",
100
+ "sec-ch-ua-mobile": "?0",
101
+ "sec-ch-ua-platform": '"Windows"',
102
+ "sec-fetch-dest": "empty",
103
+ "sec-fetch-mode": "cors",
104
+ "sec-fetch-site": "same-origin",
105
+ "X-FE-Version": "prod-fe-1.0.70",
106
+ "Origin": settings.CLIENT_HEADERS["Origin"],
107
+ "Cache-Control": "no-cache",
108
+ "Pragma": "no-cache",
109
+ }
110
+
111
+ # 只有基于 Chromium 的浏览器才添加 sec-ch-ua
112
+ if sec_ch_ua:
113
+ headers["sec-ch-ua"] = sec_ch_ua
114
+
115
+ # 添加 Referer
116
+ if referer_chat_id:
117
+ headers["Referer"] = f"{settings.CLIENT_HEADERS['Origin']}/c/{referer_chat_id}"
118
+
119
+ # 调试日志
120
+ if settings.DEBUG_LOGGING:
121
+ debug_log(f"使用 User-Agent: {user_agent[:100]}...")
122
+
123
+ return headers
124
+
125
+
126
+ def get_anonymous_token() -> str:
127
+ """Get anonymous token for authentication"""
128
+ headers = get_browser_headers()
129
+ headers.update({
130
+ "Accept": "*/*",
131
+ "Accept-Language": "zh-CN,zh;q=0.9",
132
+ "Referer": f"{settings.CLIENT_HEADERS['Origin']}/",
133
+ })
134
+
135
+ try:
136
+ response = requests.get(
137
+ f"{settings.CLIENT_HEADERS['Origin']}/api/v1/auths/",
138
+ headers=headers,
139
+ timeout=10.0
140
+ )
141
+
142
+ if response.status_code != 200:
143
+ raise Exception(f"anon token status={response.status_code}")
144
+
145
+ data = response.json()
146
+ token = data.get("token")
147
+ if not token:
148
+ raise Exception("anon token empty")
149
+
150
+ return token
151
+ except Exception as e:
152
+ debug_log(f"获取匿名token失败: {e}")
153
+ raise
154
+
155
+
156
+ def get_auth_token() -> str:
157
+ """Get authentication token (anonymous or from token pool)"""
158
+ if settings.ANONYMOUS_MODE:
159
+ try:
160
+ token = get_anonymous_token()
161
+ debug_log(f"匿名token获取成功: {token[:10]}...")
162
+ return token
163
+ except Exception as e:
164
+ debug_log(f"匿名token获取失败,使用token池: {e}")
165
+
166
+ # Use token pool for load balancing
167
+ token = token_manager.get_next_token()
168
+ if token:
169
+ debug_log(f"从token池获取token: {token[:10]}...")
170
+ return token
171
+ else:
172
+ debug_log("token池无可用token,使用配置文件备用token")
173
+ return settings.BACKUP_TOKEN
174
+
175
+
176
+ def transform_thinking_content(content: str) -> str:
177
+ """Transform thinking content according to configuration"""
178
+ # Remove summary tags
179
+ content = re.sub(r'(?s)<summary>.*?</summary>', '', content)
180
+ # Clean up remaining tags
181
+ content = content.replace("</thinking>", "").replace("<Full>", "").replace("</Full>", "")
182
+ content = content.strip()
183
+
184
+ if settings.THINKING_PROCESSING == "think":
185
+ content = re.sub(r'<details[^>]*>', '<span>', content)
186
+ content = content.replace("</details>", "</span>")
187
+ elif settings.THINKING_PROCESSING == "strip":
188
+ content = re.sub(r'<details[^>]*>', '', content)
189
+ content = content.replace("</details>", "")
190
+
191
+ # Remove line prefixes
192
+ content = content.lstrip("> ")
193
+ content = content.replace("\n> ", "\n")
194
+
195
+ return content.strip()
196
+
197
+
198
+ def call_upstream_api(
199
+ upstream_req: Any,
200
+ chat_id: str,
201
+ auth_token: str
202
+ ) -> requests.Response:
203
+ """Call upstream API with proper headers"""
204
+ headers = get_browser_headers(chat_id)
205
+ headers["Authorization"] = f"Bearer {auth_token}"
206
+
207
+ # 准备请求数据
208
+ request_data = upstream_req.model_dump(exclude_none=True)
209
+ request_json = upstream_req.model_dump_json()
210
+
211
+ debug_log(f"调用上游API: {settings.API_ENDPOINT}")
212
+ debug_log(f"请求体大小: {len(request_json)} 字符")
213
+
214
+ # 如果请求体太大,只显示部分内容
215
+ if len(request_json) > 1000:
216
+ debug_log(f"上游请求体 (截断): {request_json[:500]}...{request_json[-200:]}")
217
+ else:
218
+ debug_log(f"上游请求体: {request_json}")
219
+
220
+ # 设置代理(如果配置了)
221
+ proxies = {}
222
+ if settings.HTTP_PROXY:
223
+ proxies['http'] = settings.HTTP_PROXY
224
+ if settings.HTTPS_PROXY:
225
+ proxies['https'] = settings.HTTPS_PROXY
226
+
227
+ try:
228
+ response = requests.post(
229
+ settings.API_ENDPOINT,
230
+ json=request_data,
231
+ headers=headers,
232
+ timeout=(settings.CONNECTION_TIMEOUT, settings.REQUEST_TIMEOUT),
233
+ stream=True,
234
+ proxies=proxies if proxies else None,
235
+ verify=True,
236
+ )
237
+
238
+ debug_log(f"上游响应状态: {response.status_code}")
239
+
240
+ # 检查响应头
241
+ if settings.DEBUG_LOGGING:
242
+ content_type = response.headers.get('content-type', 'unknown')
243
+ content_length = response.headers.get('content-length', 'unknown')
244
+ debug_log(f"响应类型: {content_type}, 长度: {content_length}")
245
+
246
+ return response
247
+
248
+ except requests.exceptions.Timeout as e:
249
+ debug_log(f"请求超时: {e}")
250
+ raise Exception(f"上游API请求超时: {e}")
251
+ except requests.exceptions.ConnectionError as e:
252
+ debug_log(f"连接错误: {e}")
253
+ raise Exception(f"上游API连接失败: {e}")
254
+ except requests.exceptions.RequestException as e:
255
+ debug_log(f"请求异常: {e}")
256
+ raise Exception(f"上游API请求失败: {e}")
257
+ except Exception as e:
258
+ debug_log(f"未知错误: {e}")
259
+ raise
app/utils/reload_config.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 热重载配置模块
3
+ 定义 Granian 服务器热重载时需要忽略的目录和文件模式
4
+ """
5
+
6
+ # 忽略的目录列表
7
+ RELOAD_IGNORE_DIRS = [
8
+ "logs", # 忽略日志目录
9
+ "storage", # 忽略存储目录
10
+ "__pycache__", # 忽略 Python 缓存
11
+ ".git", # 忽略 git 目录
12
+ "node_modules", # 忽略 node_modules
13
+ "migrations", # 忽略数据库迁移目录
14
+ ".pytest_cache", # 忽略 pytest 缓存
15
+ ".venv", # 忽略虚拟环境
16
+ "venv", # 忽略虚拟环境
17
+ "env", # 忽略环境目录
18
+ ".mypy_cache", # 忽略 mypy 缓存
19
+ ".ruff_cache", # 忽略 ruff 缓存
20
+ "dist", # 忽略构建分发目录
21
+ "build", # 忽略构建目录
22
+ ".coverage", # 忽略测试覆盖率文件
23
+ "htmlcov", # 忽略覆盖率报告目录
24
+ "tests", # 忽略测试目录
25
+ ]
26
+
27
+ # 忽略的文件模式(正则表达式)
28
+ RELOAD_IGNORE_PATTERNS = [
29
+ # 日志文件
30
+ r".*\.log$",
31
+ r".*\.log\.\d+$",
32
+ # 数据库文件
33
+ r".*\.sqlite3.*",
34
+ r".*\.db$",
35
+ r".*\.db-.*$",
36
+ # Python 相关
37
+ r".*\.pyc$",
38
+ r".*\.pyo$",
39
+ r".*\.pyd$",
40
+ # 临时文件
41
+ r".*\.tmp$",
42
+ r".*\.temp$",
43
+ r".*\.swp$",
44
+ r".*\.swo$",
45
+ r".*~$",
46
+ # 系统文件
47
+ r".*\.DS_Store$",
48
+ r".*Thumbs\.db$",
49
+ r".*\.directory$",
50
+ # 编辑器文件
51
+ r".*\.vscode.*",
52
+ r".*\.idea.*",
53
+ # 测试和覆盖率
54
+ r".*\.coverage$",
55
+ r".*\.pytest_cache.*",
56
+ # 构建文件
57
+ r".*\.egg-info.*",
58
+ r".*\.wheel$",
59
+ r".*\.whl$",
60
+ # 版本控制
61
+ r".*\.git.*",
62
+ r".*\.gitignore$",
63
+ r".*\.gitkeep$",
64
+ # 配置文件备份
65
+ r".*\.bak$",
66
+ r".*\.backup$",
67
+ r".*\.orig$",
68
+ # 锁文件
69
+ r".*\.lock$",
70
+ r".*\.pid$",
71
+ ]
72
+
73
+ # 监视的路径(只监视应用相关代码)
74
+ RELOAD_WATCH_PATHS = [
75
+ "app", # 应用主目录
76
+ "main.py", # 主入口文件
77
+ ]
78
+
79
+ # 热重载配置
80
+ RELOAD_CONFIG = {
81
+ "reload_ignore_dirs": RELOAD_IGNORE_DIRS,
82
+ "reload_ignore_patterns": RELOAD_IGNORE_PATTERNS,
83
+ "reload_paths": RELOAD_WATCH_PATHS,
84
+ "reload_tick": 100, # 监视频率(毫秒)
85
+ }
app/utils/sse_parser.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SSE (Server-Sent Events) parser for streaming responses
3
+ """
4
+
5
+ import json
6
+ from typing import Dict, Any, Generator, Optional, Type
7
+ import requests
8
+
9
+
10
+ class SSEParser:
11
+ """Server-Sent Events parser for streaming responses"""
12
+
13
+ def __init__(self, response: requests.Response, debug_mode: bool = False):
14
+ """Initialize SSE parser
15
+
16
+ Args:
17
+ response: requests.Response object with stream=True
18
+ debug_mode: Enable debug logging
19
+ """
20
+ self.response = response
21
+ self.debug_mode = debug_mode
22
+ self.buffer = ""
23
+ self.line_count = 0
24
+
25
+ def debug_log(self, format_str: str, *args) -> None:
26
+ """Log debug message if debug mode is enabled"""
27
+ if self.debug_mode:
28
+ if args:
29
+ print(f"[SSE_PARSER] {format_str % args}")
30
+ else:
31
+ print(f"[SSE_PARSER] {format_str}")
32
+
33
+ def iter_events(self) -> Generator[Dict[str, Any], None, None]:
34
+ """Iterate over SSE events
35
+
36
+ Yields:
37
+ dict: Parsed SSE event data
38
+ """
39
+ self.debug_log("开始解析 SSE 流")
40
+
41
+ for line in self.response.iter_lines():
42
+ self.line_count += 1
43
+
44
+ # Skip empty lines
45
+ if not line:
46
+ continue
47
+
48
+ # Decode bytes
49
+ if isinstance(line, bytes):
50
+ try:
51
+ line = line.decode("utf-8")
52
+ except UnicodeDecodeError:
53
+ self.debug_log(f"第{self.line_count}行解码失败,跳过")
54
+ continue
55
+
56
+ # Skip comment lines
57
+ if line.startswith(":"):
58
+ continue
59
+
60
+ # Parse field-value pairs
61
+ if ":" in line:
62
+ field, value = line.split(":", 1)
63
+ field = field.strip()
64
+ value = value.lstrip()
65
+
66
+ if field == "data":
67
+ self.debug_log(f"收到数据 (第{self.line_count}行): {value}")
68
+
69
+ # Try to parse JSON
70
+ try:
71
+ data = json.loads(value)
72
+ yield {"type": "data", "data": data, "raw": value}
73
+ except json.JSONDecodeError:
74
+ yield {"type": "data", "data": value, "raw": value, "is_json": False}
75
+
76
+ elif field == "event":
77
+ yield {"type": "event", "event": value}
78
+
79
+ elif field == "id":
80
+ yield {"type": "id", "id": value}
81
+
82
+ elif field == "retry":
83
+ try:
84
+ retry = int(value)
85
+ yield {"type": "retry", "retry": retry}
86
+ except ValueError:
87
+ self.debug_log(f"无效的 retry 值: {value}")
88
+
89
+ def iter_data_only(self) -> Generator[Dict[str, Any], None, None]:
90
+ """Iterate only over data events"""
91
+ for event in self.iter_events():
92
+ if event["type"] == "data":
93
+ yield event
94
+
95
+ def iter_json_data(self, model_class: Optional[Type] = None) -> Generator[Dict[str, Any], None, None]:
96
+ """Iterate only over JSON data events with optional validation
97
+
98
+ Args:
99
+ model_class: Optional Pydantic model class for validation
100
+
101
+ Yields:
102
+ dict: JSON data events
103
+ """
104
+ for event in self.iter_events():
105
+ if event["type"] == "data" and event.get("is_json", True):
106
+ try:
107
+ if model_class:
108
+ data = model_class.model_validate_json(event["raw"])
109
+ yield {"type": "data", "data": data, "raw": event["raw"]}
110
+ else:
111
+ yield event
112
+ except Exception as e:
113
+ self.debug_log(f"数据验证失败: {e}")
114
+ continue
115
+
116
+ def close(self) -> None:
117
+ """Close the response connection"""
118
+ if hasattr(self.response, "close"):
119
+ self.response.close()
120
+
121
+ def __enter__(self):
122
+ """Context manager entry"""
123
+ return self
124
+
125
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
126
+ """Context manager exit"""
127
+ self.close()
app/utils/sse_tool_handler.py ADDED
@@ -0,0 +1,692 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ SSE Tool Handler - 处理工具调用的SSE流
6
+ 基于 Z.AI 原生的 edit_index 和 edit_content 机制,更原生地处理工具调用
7
+ """
8
+
9
+ import json
10
+ import re
11
+ import time
12
+ from typing import Dict, Any, Optional, Generator, List
13
+
14
+ from app.utils.helpers import debug_log
15
+
16
+
17
+ class SSEToolHandler:
18
+
19
+ def __init__(self, chat_id: str, model: str):
20
+ self.chat_id = chat_id
21
+ self.model = model
22
+
23
+ # 工具调用状态
24
+ self.has_tool_call = False
25
+ self.tool_call_usage = None # 工具调用的usage信息
26
+ self.content_index = 0
27
+ self.has_thinking = False
28
+
29
+ self.content_buffer = bytearray() # 使用字节数组提高性能
30
+ self.last_edit_index = 0 # 上次编辑的位置
31
+
32
+ # 工具调用解析状态
33
+ self.active_tools = {} # 活跃的工具调用 {tool_id: tool_info}
34
+ self.completed_tools = [] # 已完成的工具调用
35
+ self.tool_blocks_cache = {} # 缓存解析的工具块
36
+
37
+ def process_tool_call_phase(self, data: Dict[str, Any], is_stream: bool = True) -> Generator[str, None, None]:
38
+ """
39
+ 处理tool_call阶段
40
+ """
41
+ if not self.has_tool_call:
42
+ self.has_tool_call = True
43
+ debug_log("🔧 进入工具调用阶段")
44
+
45
+ edit_content = data.get("edit_content", "")
46
+ edit_index = data.get("edit_index", 0)
47
+
48
+ if not edit_content:
49
+ return
50
+
51
+ # debug_log(f"📦 接收内容片段 [index={edit_index}]: {edit_content[:1000]}...")
52
+
53
+ # 更新内容缓冲区
54
+ self._apply_edit_to_buffer(edit_index, edit_content)
55
+
56
+ # 尝试解析和处理工具调用
57
+ yield from self._process_tool_calls_from_buffer(is_stream)
58
+
59
+ def _apply_edit_to_buffer(self, edit_index: int, edit_content: str):
60
+ """
61
+ 在指定位置替换/插入内容更新内容缓冲区
62
+ """
63
+ edit_bytes = edit_content.encode('utf-8')
64
+ required_length = edit_index + len(edit_bytes)
65
+
66
+ # 扩展缓冲区到所需长度(如果需要)
67
+ if len(self.content_buffer) < edit_index:
68
+ # 如果edit_index超出当前缓冲区,用空字节填充
69
+ self.content_buffer.extend(b'\x00' * (edit_index - len(self.content_buffer)))
70
+
71
+ # 确保缓冲区足够长以容纳新内容
72
+ if len(self.content_buffer) < required_length:
73
+ self.content_buffer.extend(b'\x00' * (required_length - len(self.content_buffer)))
74
+
75
+ # 在指定位置替换内容(不是插入,而是覆盖)
76
+ end_index = edit_index + len(edit_bytes)
77
+ self.content_buffer[edit_index:end_index] = edit_bytes
78
+
79
+ # debug_log(f"📝 缓冲区更新 [index={edit_index}, 长度={len(self.content_buffer)}]")
80
+
81
+ def _process_tool_calls_from_buffer(self, is_stream: bool) -> Generator[str, None, None]:
82
+ """
83
+ 从内容缓冲区中解析和处理工具调用
84
+ """
85
+ try:
86
+ # 解码内容并清理空字节
87
+ content_str = self.content_buffer.decode('utf-8', errors='ignore').replace('\x00', '')
88
+ yield from self._extract_and_process_tools(content_str, is_stream)
89
+ except Exception as e:
90
+ debug_log(f"📦 内容解析暂时失败,等待更多数据: {e}")
91
+ # 不抛出异常,继续等待更多数据
92
+
93
+ def _extract_and_process_tools(self, content_str: str, is_stream: bool) -> Generator[str, None, None]:
94
+ """
95
+ 从内容字符串中提取和处理工具调用
96
+ """
97
+ # 查找所有 glm_block,包括不完整的
98
+ pattern = r'<glm_block\s*>(.*?)(?:</glm_block>|$)'
99
+ matches = re.findall(pattern, content_str, re.DOTALL)
100
+
101
+ for block_content in matches:
102
+ # 尝试解析每个块
103
+ yield from self._process_single_tool_block(block_content, is_stream)
104
+
105
+ def _process_single_tool_block(self, block_content: str, is_stream: bool) -> Generator[str, None, None]:
106
+ """
107
+ 处理单个工具块,支持增量解析
108
+ """
109
+ try:
110
+ # 尝试修复和解析完整的JSON
111
+ fixed_content = self._fix_json_structure(block_content)
112
+ tool_data = json.loads(fixed_content)
113
+ metadata = tool_data.get("data", {}).get("metadata", {})
114
+
115
+ tool_id = metadata.get("id", "")
116
+ tool_name = metadata.get("name", "")
117
+ arguments_raw = metadata.get("arguments", "{}")
118
+
119
+ if not tool_id or not tool_name:
120
+ return
121
+
122
+ debug_log(f"🎯 解析完整工具块: {tool_name}(id={tool_id}), 参数: {arguments_raw}")
123
+
124
+ # 检查是否是新工具或更新的工具
125
+ yield from self._handle_tool_update(tool_id, tool_name, arguments_raw, is_stream)
126
+
127
+ except json.JSONDecodeError as e:
128
+ debug_log(f"📦 JSON解析失败: {e}, 尝试部分解析")
129
+ # JSON 不完整,尝试部分解析
130
+ yield from self._handle_partial_tool_block(block_content, is_stream)
131
+ except Exception as e:
132
+ debug_log(f"📦 工具块处理失败: {e}")
133
+
134
+ def _fix_json_structure(self, content: str) -> str:
135
+ """
136
+ 修复JSON结构中的常见问题
137
+ """
138
+ if not content:
139
+ return content
140
+
141
+ # 计算括号平衡
142
+ open_braces = content.count('{')
143
+ close_braces = content.count('}')
144
+
145
+ # 如果闭括号多于开括号,移除多余的闭括号
146
+ if close_braces > open_braces:
147
+ excess = close_braces - open_braces
148
+ fixed_content = content
149
+ for _ in range(excess):
150
+ # 从右侧移除多余的闭括号
151
+ last_brace_pos = fixed_content.rfind('}')
152
+ if last_brace_pos != -1:
153
+ fixed_content = fixed_content[:last_brace_pos] + fixed_content[last_brace_pos + 1:]
154
+ return fixed_content
155
+
156
+ return content
157
+
158
+ def _handle_tool_update(self, tool_id: str, tool_name: str, arguments_raw: str, is_stream: bool) -> Generator[str, None, None]:
159
+ """
160
+ 处理工具的创建或更新 - 更可靠的参数完整性检查
161
+ """
162
+ # 解析参数
163
+ try:
164
+ if isinstance(arguments_raw, str):
165
+ # 先处理转义和清理
166
+ cleaned_args = self._clean_arguments_string(arguments_raw)
167
+ arguments = json.loads(cleaned_args) if cleaned_args.strip() else {}
168
+ else:
169
+ arguments = arguments_raw
170
+ except json.JSONDecodeError:
171
+ debug_log(f"📦 参数解析失败,暂不处理: {arguments_raw}")
172
+ # 参数解析失败时,不创建或更新工具,等待更完整的数据
173
+ return
174
+
175
+ # 检查参数是否看起来完整(基本的完整性验证)
176
+ is_args_complete = self._is_arguments_complete(arguments, arguments_raw)
177
+
178
+ # 检查是否是新工具
179
+ if tool_id not in self.active_tools:
180
+ debug_log(f"🎯 发现新工具: {tool_name}(id={tool_id}), 参数完整性: {is_args_complete}")
181
+
182
+ self.active_tools[tool_id] = {
183
+ "id": tool_id,
184
+ "name": tool_name,
185
+ "arguments": arguments,
186
+ "arguments_raw": arguments_raw,
187
+ "status": "active",
188
+ "sent_start": False,
189
+ "last_sent_args": {}, # 跟踪上次发送的参数
190
+ "args_complete": is_args_complete,
191
+ "pending_send": True # 标记需要发送
192
+ }
193
+
194
+ # 只有在参数看起来完整时才发送工具开始信号
195
+ if is_stream and is_args_complete:
196
+ yield self._create_tool_start_chunk(tool_id, tool_name, arguments)
197
+ self.active_tools[tool_id]["sent_start"] = True
198
+ self.active_tools[tool_id]["last_sent_args"] = arguments.copy()
199
+ self.active_tools[tool_id]["pending_send"] = False
200
+ debug_log(f"📤 发送完整工具开始: {tool_name}(id={tool_id})")
201
+
202
+ else:
203
+ # 更新现有工具
204
+ current_tool = self.active_tools[tool_id]
205
+
206
+ # 检查是否有实质性改进
207
+ if self._is_significant_improvement(current_tool["arguments"], arguments,
208
+ current_tool["arguments_raw"], arguments_raw):
209
+ debug_log(f"🔄 工具参数有实质性改进: {tool_name}(id={tool_id})")
210
+
211
+ current_tool["arguments"] = arguments
212
+ current_tool["arguments_raw"] = arguments_raw
213
+ current_tool["args_complete"] = is_args_complete
214
+
215
+ # 如果之前没有发送过开始信号,且现在参数完整,发送开始信号
216
+ if is_stream and not current_tool["sent_start"] and is_args_complete:
217
+ yield self._create_tool_start_chunk(tool_id, tool_name, arguments)
218
+ current_tool["sent_start"] = True
219
+ current_tool["last_sent_args"] = arguments.copy()
220
+ current_tool["pending_send"] = False
221
+ debug_log(f"📤 发送延迟的工具开始: {tool_name}(id={tool_id})")
222
+
223
+ # 如果已经发送过开始信号,且参数有显著改进,发送参数更新
224
+ elif is_stream and current_tool["sent_start"] and is_args_complete:
225
+ if self._should_send_argument_update(current_tool["last_sent_args"], arguments):
226
+ yield self._create_tool_arguments_chunk(tool_id, arguments)
227
+ current_tool["last_sent_args"] = arguments.copy()
228
+ debug_log(f"📤 发送参数更��: {tool_name}(id={tool_id})")
229
+
230
+ def _is_arguments_complete(self, arguments: Dict[str, Any], arguments_raw: str) -> bool:
231
+ """
232
+ 检查参数是否看起来完整
233
+ """
234
+ if not arguments:
235
+ return False
236
+
237
+ # 检查原始字符串是否看起来完整
238
+ if not arguments_raw or not arguments_raw.strip():
239
+ return False
240
+
241
+ # 检查是否有明显的截断迹象
242
+ raw_stripped = arguments_raw.strip()
243
+
244
+ # 如果原始字符串不以}结尾,可能是截断的
245
+ if not raw_stripped.endswith('}') and not raw_stripped.endswith('"'):
246
+ return False
247
+
248
+ # 检查是否有不完整的URL(常见的截断情况)
249
+ for key, value in arguments.items():
250
+ if isinstance(value, str):
251
+ # 检查URL是否看起来完整
252
+ if 'http' in value.lower():
253
+ # 如果URL太短或以不完整的域名结尾,可能是截断的
254
+ if len(value) < 10 or value.endswith('.go') or value.endswith('.goo'):
255
+ return False
256
+
257
+ # 检查其他可能的截断迹象
258
+ if len(value) > 0 and value[-1] in ['.', '/', ':', '=']:
259
+ # 以这些字符结尾可能表示截断
260
+ return False
261
+
262
+ return True
263
+
264
+ def _is_significant_improvement(self, old_args: Dict[str, Any], new_args: Dict[str, Any],
265
+ old_raw: str, new_raw: str) -> bool:
266
+ """
267
+ 检查新参数是否比旧参数有显著改进
268
+ """
269
+ # 如果新参数为空,不是改进
270
+ if not new_args:
271
+ return False
272
+
273
+ if len(new_args) > len(old_args):
274
+ return True
275
+
276
+ # 检查值的改进
277
+ for key, new_value in new_args.items():
278
+ old_value = old_args.get(key, "")
279
+
280
+ if isinstance(new_value, str) and isinstance(old_value, str):
281
+ # 如果新值明显更长且更完整,是改进
282
+ if len(new_value) > len(old_value) + 5: # 至少长5个字符才算显著改进
283
+ return True
284
+
285
+ # 如果旧值看起来是截断的,新值更完整,是改进
286
+ if old_value.endswith(('.go', '.goo', '.com/', 'http')) and len(new_value) > len(old_value):
287
+ return True
288
+
289
+ # 检查原始字符串的改进
290
+ if len(new_raw) > len(old_raw) + 10: # 原始字符串显著增长
291
+ return True
292
+
293
+ return False
294
+
295
+ def _should_send_argument_update(self, last_sent: Dict[str, Any], new_args: Dict[str, Any]) -> bool:
296
+ """
297
+ 判断是否应该发送参数更新 - 更严格的标准
298
+ """
299
+ # 如果参数完全相同,不发送
300
+ if last_sent == new_args:
301
+ return False
302
+
303
+ # 如果新参数为空但之前有参数,不发送(避免倒退)
304
+ if not new_args and last_sent:
305
+ return False
306
+
307
+ # 如果新参数有更多键,发送更新
308
+ if len(new_args) > len(last_sent):
309
+ return True
310
+
311
+ # 检查是否有值变得显著更完整
312
+ for key, new_value in new_args.items():
313
+ last_value = last_sent.get(key, "")
314
+ if isinstance(new_value, str) and isinstance(last_value, str):
315
+ # 只有在值显著增长时才发送更新(避免微小变化)
316
+ if len(new_value) > len(last_value) + 5:
317
+ return True
318
+ elif new_value != last_value and new_value: # 确保新值不为空
319
+ return True
320
+
321
+ return False
322
+
323
+ def _handle_partial_tool_block(self, block_content: str, is_stream: bool) -> Generator[str, None, None]:
324
+ """
325
+ 处理不完整的工具块,尝试提取可用信息
326
+ """
327
+ try:
328
+ # 尝试提取工具ID和名称
329
+ id_match = re.search(r'"id":\s*"([^"]+)"', block_content)
330
+ name_match = re.search(r'"name":\s*"([^"]+)"', block_content)
331
+
332
+ if id_match and name_match:
333
+ tool_id = id_match.group(1)
334
+ tool_name = name_match.group(1)
335
+
336
+ # 尝试提取参数部分
337
+ args_match = re.search(r'"arguments":\s*"([^"]*)', block_content)
338
+ partial_args = args_match.group(1) if args_match else ""
339
+
340
+ debug_log(f"📦 部分工具块: {tool_name}(id={tool_id}), 部分参数: {partial_args[:50]}")
341
+
342
+ # 如果是新工具,先创建记录
343
+ if tool_id not in self.active_tools:
344
+ # 尝试解析部分参数为字典
345
+ partial_args_dict = self._parse_partial_arguments(partial_args)
346
+
347
+ self.active_tools[tool_id] = {
348
+ "id": tool_id,
349
+ "name": tool_name,
350
+ "arguments": partial_args_dict,
351
+ "status": "partial",
352
+ "sent_start": False,
353
+ "last_sent_args": {},
354
+ "args_complete": False,
355
+ "partial_args": partial_args
356
+ }
357
+
358
+ if is_stream:
359
+ yield self._create_tool_start_chunk(tool_id, tool_name, partial_args_dict)
360
+ self.active_tools[tool_id]["sent_start"] = True
361
+ self.active_tools[tool_id]["last_sent_args"] = partial_args_dict.copy()
362
+ else:
363
+ # 更新部分参数
364
+ self.active_tools[tool_id]["partial_args"] = partial_args
365
+ # 尝试更新解析的参数
366
+ new_partial_dict = self._parse_partial_arguments(partial_args)
367
+ if new_partial_dict != self.active_tools[tool_id]["arguments"]:
368
+ self.active_tools[tool_id]["arguments"] = new_partial_dict
369
+
370
+ except Exception as e:
371
+ debug_log(f"📦 部分块解析失败: {e}")
372
+
373
+ def _clean_arguments_string(self, arguments_raw: str) -> str:
374
+ """
375
+ 清理和标准化参数字符串,改进对不完整JSON的处理
376
+ """
377
+ if not arguments_raw:
378
+ return "{}"
379
+
380
+ # 移除首尾空白
381
+ cleaned = arguments_raw.strip()
382
+
383
+ # 处理特殊值
384
+ if cleaned.lower() == "null":
385
+ return "{}"
386
+
387
+ # 处理转义的JSON字符串
388
+ if cleaned.startswith('{\\"') and cleaned.endswith('\\"}'):
389
+ # 这是一个转义的JSON字符串,需要反转义
390
+ cleaned = cleaned.replace('\\"', '"')
391
+ elif cleaned.startswith('"{\\"') and cleaned.endswith('\\"}'):
392
+ # 双重转义的情况
393
+ cleaned = cleaned[1:-1].replace('\\"', '"')
394
+ elif cleaned.startswith('"') and cleaned.endswith('"'):
395
+ # 简单的引号包围,去除外层引号
396
+ cleaned = cleaned[1:-1]
397
+
398
+ # 处理不完整的JSON字符串
399
+ cleaned = self._fix_incomplete_json(cleaned)
400
+
401
+ # 标准化空格(移除JSON中的多余空格,但保留字符串值中的空格)
402
+ try:
403
+ # 先尝试解析,然后重新序列化以标准化格式
404
+ parsed = json.loads(cleaned)
405
+ if parsed is None:
406
+ return "{}"
407
+ cleaned = json.dumps(parsed, ensure_ascii=False, separators=(',', ':'))
408
+ except json.JSONDecodeError:
409
+ # 如果解析失败,只做基本的空格清理
410
+ debug_log(f"📦 JSON标准化失败,保持原样: {cleaned[:50]}...")
411
+
412
+ return cleaned
413
+
414
+ def _fix_incomplete_json(self, json_str: str) -> str:
415
+ """
416
+ 修复不完整的JSON字符串
417
+ """
418
+ if not json_str:
419
+ return "{}"
420
+
421
+ # 确保以{开头
422
+ if not json_str.startswith('{'):
423
+ json_str = '{' + json_str
424
+
425
+ # 处理不完整的字符串值
426
+ if json_str.count('"') % 2 != 0:
427
+ # 奇数个引号,可能有未闭合的字符串
428
+ json_str += '"'
429
+
430
+ # 确保以}结尾
431
+ if not json_str.endswith('}'):
432
+ json_str += '}'
433
+
434
+ return json_str
435
+
436
+ def _parse_partial_arguments(self, arguments_raw: str) -> Dict[str, Any]:
437
+ """
438
+ 解析不完整的参数字符串,尽可能提取有效信息
439
+ """
440
+ if not arguments_raw or arguments_raw.strip() == "" or arguments_raw.strip().lower() == "null":
441
+ return {}
442
+
443
+ try:
444
+ # 先尝试清理字符串
445
+ cleaned = self._clean_arguments_string(arguments_raw)
446
+ result = json.loads(cleaned)
447
+ # 确保返回字典类型
448
+ return result if isinstance(result, dict) else {}
449
+ except json.JSONDecodeError:
450
+ pass
451
+
452
+ try:
453
+ # 尝试修复常见的JSON问题
454
+ fixed_args = arguments_raw.strip()
455
+
456
+ # 处理转义字符
457
+ if '\\' in fixed_args:
458
+ fixed_args = fixed_args.replace('\\"', '"')
459
+
460
+ # 如果不是以{开头,添加{
461
+ if not fixed_args.startswith('{'):
462
+ fixed_args = '{' + fixed_args
463
+
464
+ # 如果不是以}结尾,尝试添加}
465
+ if not fixed_args.endswith('}'):
466
+ # 计算未闭合的引号和括号
467
+ quote_count = fixed_args.count('"') - fixed_args.count('\\"')
468
+ if quote_count % 2 != 0:
469
+ fixed_args += '"'
470
+ fixed_args += '}'
471
+
472
+ return json.loads(fixed_args)
473
+ except json.JSONDecodeError:
474
+ # 尝试提取键值对
475
+ return self._extract_key_value_pairs(arguments_raw)
476
+ except Exception:
477
+ # 如果所有方法都失败,返回空字典
478
+ return {}
479
+
480
+ def _extract_key_value_pairs(self, text: str) -> Dict[str, Any]:
481
+ """
482
+ 从文本中提取键值对,作为最后的解析尝试
483
+ """
484
+ result = {}
485
+ try:
486
+ # 使用正则表达式提取简单的键值对
487
+ import re
488
+
489
+ # 匹配 "key": "value" 或 "key": value 格式
490
+ pattern = r'"([^"]+)":\s*"([^"]*)"'
491
+ matches = re.findall(pattern, text)
492
+
493
+ for key, value in matches:
494
+ result[key] = value
495
+
496
+ # 匹配数字值
497
+ pattern = r'"([^"]+)":\s*(\d+)'
498
+ matches = re.findall(pattern, text)
499
+
500
+ for key, value in matches:
501
+ try:
502
+ result[key] = int(value)
503
+ except ValueError:
504
+ result[key] = value
505
+
506
+ # 匹配布尔值
507
+ pattern = r'"([^"]+)":\s*(true|false)'
508
+ matches = re.findall(pattern, text)
509
+
510
+ for key, value in matches:
511
+ result[key] = value.lower() == 'true'
512
+
513
+ except Exception:
514
+ pass
515
+
516
+ return result
517
+
518
+ def _complete_active_tools(self, is_stream: bool) -> Generator[str, None, None]:
519
+ """
520
+ 完成所有活跃的工具调用 - 处理待发送的工具
521
+ """
522
+ tools_to_send = []
523
+
524
+ for tool_id, tool in self.active_tools.items():
525
+ # 如果工具还没有发送过且参数看起来完整,现在发送
526
+ if is_stream and tool.get("pending_send", False) and not tool.get("sent_start", False):
527
+ if tool.get("args_complete", False):
528
+ debug_log(f"📤 完成时发送待发送工具: {tool['name']}(id={tool_id})")
529
+ yield self._create_tool_start_chunk(tool_id, tool["name"], tool["arguments"])
530
+ tool["sent_start"] = True
531
+ tool["pending_send"] = False
532
+ tools_to_send.append(tool)
533
+ else:
534
+ debug_log(f"⚠️ 跳过不完整的工具: {tool['name']}(id={tool_id})")
535
+
536
+ tool["status"] = "completed"
537
+ self.completed_tools.append(tool)
538
+ debug_log(f"✅ 完成工具调用: {tool['name']}(id={tool_id})")
539
+
540
+ self.active_tools.clear()
541
+
542
+ if is_stream and (self.completed_tools or tools_to_send):
543
+ # 发送工具完成信号
544
+ yield self._create_tool_finish_chunk()
545
+
546
+ def process_other_phase(self, data: Dict[str, Any], is_stream: bool = True) -> Generator[str, None, None]:
547
+ """
548
+ 处理other阶段 - 检测工具调用结束和状态更新
549
+ """
550
+ edit_content = data.get("edit_content", "")
551
+ edit_index = data.get("edit_index", 0)
552
+ usage = data.get("usage")
553
+
554
+ # 保存usage信息
555
+ if self.has_tool_call and usage:
556
+ self.tool_call_usage = usage
557
+ debug_log(f"💾 保存工具调用usage: {usage}")
558
+
559
+ # 如果有edit_content,继续更新内容缓冲区
560
+ if edit_content:
561
+ self._apply_edit_to_buffer(edit_index, edit_content)
562
+ # 继续处理可能的工具调用更新
563
+ yield from self._process_tool_calls_from_buffer(is_stream)
564
+
565
+ # 检测工具调用结束的多种标记
566
+ if self.has_tool_call and self._is_tool_call_finished(edit_content):
567
+ debug_log("🏁 检测到工具调用结束")
568
+
569
+ # 完成所有活跃的工具
570
+ yield from self._complete_active_tools(is_stream)
571
+
572
+ if is_stream:
573
+ debug_log("🏁 发送工具调用完成信号")
574
+ yield "data: [DONE]"
575
+
576
+ # 重置工具调用状态
577
+ self.has_tool_call = False
578
+
579
+ def _is_tool_call_finished(self, edit_content: str) -> bool:
580
+ """
581
+ 检测工具调用是否结束的多种标记
582
+ """
583
+ if not edit_content:
584
+ return False
585
+
586
+ # 检测各种结束标记
587
+ end_markers = [
588
+ "null,", # 原有的结束标记
589
+ '"status": "completed"', # 状态完成标记
590
+ '"is_error": false', # 错误状态标记
591
+ ]
592
+
593
+ for marker in end_markers:
594
+ if marker in edit_content:
595
+ debug_log(f"🔍 检测到结束标记: {marker}")
596
+ return True
597
+
598
+ # 检查是否所有工具都有完整的结构
599
+ if self.active_tools and '"status": "completed"' in self.content_buffer:
600
+ return True
601
+
602
+ return False
603
+
604
+ def _reset_all_state(self):
605
+ """重置所有状态"""
606
+ self.has_tool_call = False
607
+ self.tool_call_usage = None
608
+ self.content_index = 0
609
+ self.content_buffer = bytearray()
610
+ self.last_edit_index = 0
611
+ self.active_tools.clear()
612
+ self.completed_tools.clear()
613
+ self.tool_blocks_cache.clear()
614
+
615
+ def _create_tool_start_chunk(self, tool_id: str, tool_name: str, initial_args: Dict[str, Any] = None) -> str:
616
+ """创建工具调用开始的chunk,支持初始参数"""
617
+ # 使用提供的初始参数,如果没有则使用空字典
618
+ args_dict = initial_args or {}
619
+ args_str = json.dumps(args_dict, ensure_ascii=False)
620
+
621
+ chunk = {
622
+ "choices": [
623
+ {
624
+ "delta": {
625
+ "role": "assistant",
626
+ "content": None,
627
+ "tool_calls": [
628
+ {
629
+ "id": tool_id,
630
+ "type": "function",
631
+ "function": {"name": tool_name, "arguments": args_str},
632
+ }
633
+ ],
634
+ },
635
+ "finish_reason": None,
636
+ "index": self.content_index,
637
+ "logprobs": None,
638
+ }
639
+ ],
640
+ "created": int(time.time()),
641
+ "id": self.chat_id,
642
+ "model": self.model,
643
+ "object": "chat.completion.chunk",
644
+ "system_fingerprint": "fp_zai_001",
645
+ }
646
+ return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
647
+
648
+ def _create_tool_arguments_chunk(self, tool_id: str, arguments: Dict) -> str:
649
+ """创建工具参数的chunk - 只包含参数更新,不包含函数名"""
650
+ chunk = {
651
+ "choices": [
652
+ {
653
+ "delta": {
654
+ "tool_calls": [
655
+ {
656
+ "id": tool_id,
657
+ "function": {"arguments": json.dumps(arguments, ensure_ascii=False)},
658
+ }
659
+ ],
660
+ },
661
+ "finish_reason": None,
662
+ "index": self.content_index,
663
+ "logprobs": None,
664
+ }
665
+ ],
666
+ "created": int(time.time()),
667
+ "id": self.chat_id,
668
+ "model": self.model,
669
+ "object": "chat.completion.chunk",
670
+ "system_fingerprint": "fp_zai_001",
671
+ }
672
+ return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
673
+
674
+ def _create_tool_finish_chunk(self) -> str:
675
+ """创建工具调用完成的chunk"""
676
+ chunk = {
677
+ "choices": [
678
+ {
679
+ "delta": {"role": "assistant", "content": None, "tool_calls": []},
680
+ "finish_reason": "tool_calls",
681
+ "index": 0,
682
+ "logprobs": None,
683
+ }
684
+ ],
685
+ "created": int(time.time()),
686
+ "id": self.chat_id,
687
+ "usage": self.tool_call_usage or None,
688
+ "model": self.model,
689
+ "object": "chat.completion.chunk",
690
+ "system_fingerprint": "fp_zai_001",
691
+ }
692
+ return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
app/utils/tools.py ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tool processing utilities
3
+ """
4
+
5
+ import json
6
+ import re
7
+ import time
8
+ from typing import Dict, List, Optional, Any
9
+
10
+ from app.core.config import settings
11
+
12
+
13
+ def content_to_string(content: Any) -> str:
14
+ """Convert content from various formats to string (following app.py pattern)"""
15
+ if isinstance(content, str):
16
+ return content
17
+ if isinstance(content, list):
18
+ parts = []
19
+ for p in content:
20
+ if isinstance(p, dict) and p.get("type") == "text":
21
+ parts.append(p.get("text", ""))
22
+ elif isinstance(p, str):
23
+ parts.append(p)
24
+ return " ".join(parts)
25
+ return ""
26
+
27
+
28
+ def generate_tool_prompt(tools: List[Dict[str, Any]]) -> str:
29
+ """Generate tool injection prompt with enhanced formatting"""
30
+ if not tools:
31
+ return ""
32
+
33
+ tool_definitions = []
34
+ for tool in tools:
35
+ if tool.get("type") != "function":
36
+ continue
37
+
38
+ function_spec = tool.get("function", {}) or {}
39
+ function_name = function_spec.get("name", "unknown")
40
+ function_description = function_spec.get("description", "")
41
+ parameters = function_spec.get("parameters", {}) or {}
42
+
43
+ # Create structured tool definition
44
+ tool_info = [f"## {function_name}", f"**Purpose**: {function_description}"]
45
+
46
+ # Add parameter details
47
+ parameter_properties = parameters.get("properties", {}) or {}
48
+ required_parameters = set(parameters.get("required", []) or [])
49
+
50
+ if parameter_properties:
51
+ tool_info.append("**Parameters**:")
52
+ for param_name, param_details in parameter_properties.items():
53
+ param_type = (param_details or {}).get("type", "any")
54
+ param_desc = (param_details or {}).get("description", "")
55
+ requirement_flag = "**Required**" if param_name in required_parameters else "*Optional*"
56
+ tool_info.append(f"- `{param_name}` ({param_type}) - {requirement_flag}: {param_desc}")
57
+
58
+ tool_definitions.append("\n".join(tool_info))
59
+
60
+ if not tool_definitions:
61
+ return ""
62
+
63
+ # Build comprehensive tool prompt
64
+ prompt_template = (
65
+ "\n\n# AVAILABLE FUNCTIONS\n" + "\n\n---\n".join(tool_definitions) + "\n\n# USAGE INSTRUCTIONS\n"
66
+ "When you need to execute a function, respond ONLY with a JSON object containing tool_calls:\n"
67
+ "```json\n"
68
+ "{\n"
69
+ ' "tool_calls": [\n'
70
+ " {\n"
71
+ ' "id": "call_xxx",\n'
72
+ ' "type": "function",\n'
73
+ ' "function": {\n'
74
+ ' "name": "function_name",\n'
75
+ ' "arguments": "{\\"param1\\": \\"value1\\"}"\n'
76
+ " }\n"
77
+ " }\n"
78
+ " ]\n"
79
+ "}\n"
80
+ "```\n"
81
+ "Important: No explanatory text before or after the JSON. The 'arguments' field must be a JSON string, not an object.\n"
82
+ )
83
+
84
+ return prompt_template
85
+
86
+
87
+ def process_messages_with_tools(
88
+ messages: List[Dict[str, Any]], tools: Optional[List[Dict[str, Any]]] = None, tool_choice: Optional[Any] = None
89
+ ) -> List[Dict[str, Any]]:
90
+ """Process messages and inject tool prompts"""
91
+ processed: List[Dict[str, Any]] = []
92
+
93
+ if tools and settings.TOOL_SUPPORT and (tool_choice != "none"):
94
+ tools_prompt = generate_tool_prompt(tools)
95
+ has_system = any(m.get("role") == "system" for m in messages)
96
+
97
+ if has_system:
98
+ for m in messages:
99
+ if m.get("role") == "system":
100
+ mm = dict(m)
101
+ content = content_to_string(mm.get("content", ""))
102
+ mm["content"] = content + tools_prompt
103
+ processed.append(mm)
104
+ else:
105
+ processed.append(m)
106
+ else:
107
+ processed = [{"role": "system", "content": "你是一个有用的助手。" + tools_prompt}] + messages
108
+
109
+ # Add tool choice hints
110
+ if tool_choice in ("required", "auto"):
111
+ if processed and processed[-1].get("role") == "user":
112
+ last = dict(processed[-1])
113
+ content = content_to_string(last.get("content", ""))
114
+ last["content"] = content + "\n\n请根据需要使用提供的工具函数。"
115
+ processed[-1] = last
116
+ elif isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
117
+ fname = (tool_choice.get("function") or {}).get("name")
118
+ if fname and processed and processed[-1].get("role") == "user":
119
+ last = dict(processed[-1])
120
+ content = content_to_string(last.get("content", ""))
121
+ last["content"] = content + f"\n\n请使用 {fname} 函数来处理这个请求。"
122
+ processed[-1] = last
123
+ else:
124
+ processed = list(messages)
125
+
126
+ # Handle tool/function messages
127
+ final_msgs: List[Dict[str, Any]] = []
128
+ for m in processed:
129
+ role = m.get("role")
130
+ if role in ("tool", "function"):
131
+ tool_name = m.get("name", "unknown")
132
+ tool_content = content_to_string(m.get("content", ""))
133
+ if isinstance(tool_content, dict):
134
+ tool_content = json.dumps(tool_content, ensure_ascii=False)
135
+
136
+ # 确保内容不为空且不包含 None
137
+ content = f"工具 {tool_name} 返回结果:\n```json\n{tool_content}\n```"
138
+ if not content.strip():
139
+ content = f"工具 {tool_name} 执行完成"
140
+
141
+ final_msgs.append(
142
+ {
143
+ "role": "assistant",
144
+ "content": content,
145
+ }
146
+ )
147
+ else:
148
+ # For regular messages, ensure content is string format
149
+ final_msg = dict(m)
150
+ content = content_to_string(final_msg.get("content", ""))
151
+ final_msg["content"] = content
152
+ final_msgs.append(final_msg)
153
+
154
+ return final_msgs
155
+
156
+
157
+ # Tool Extraction Patterns
158
+ TOOL_CALL_FENCE_PATTERN = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
159
+ # 注意:TOOL_CALL_INLINE_PATTERN 已被移除,因为它会导致过度匹配
160
+ # 现在在 remove_tool_json_content 函数中使用基于括号平衡的方法
161
+ FUNCTION_CALL_PATTERN = re.compile(r"调用函数\s*[::]\s*([\w\-\.]+)\s*(?:参数|arguments)[::]\s*(\{.*?\})", re.DOTALL)
162
+
163
+
164
+ def extract_tool_invocations(text: str) -> Optional[List[Dict[str, Any]]]:
165
+ """Extract tool invocations from response text"""
166
+ if not text:
167
+ return None
168
+
169
+ # Limit scan size for performance
170
+ scannable_text = text[: settings.SCAN_LIMIT]
171
+
172
+ # Attempt 1: Extract from JSON code blocks
173
+ json_blocks = TOOL_CALL_FENCE_PATTERN.findall(scannable_text)
174
+ for json_block in json_blocks:
175
+ try:
176
+ parsed_data = json.loads(json_block)
177
+ tool_calls = parsed_data.get("tool_calls")
178
+ if tool_calls and isinstance(tool_calls, list):
179
+ # Ensure arguments field is a string
180
+ for tc in tool_calls:
181
+ if "function" in tc:
182
+ func = tc["function"]
183
+ if "arguments" in func:
184
+ if isinstance(func["arguments"], dict):
185
+ # Convert dict to JSON string
186
+ func["arguments"] = json.dumps(func["arguments"], ensure_ascii=False)
187
+ elif not isinstance(func["arguments"], str):
188
+ func["arguments"] = json.dumps(func["arguments"], ensure_ascii=False)
189
+ return tool_calls
190
+ except (json.JSONDecodeError, AttributeError):
191
+ continue
192
+
193
+ # Attempt 2: Extract inline JSON objects using bracket balance method
194
+ # 查找包含 "tool_calls" 的 JSON 对象
195
+ i = 0
196
+ while i < len(scannable_text):
197
+ if scannable_text[i] == '{':
198
+ # 尝试找到匹配的右括号
199
+ brace_count = 1
200
+ j = i + 1
201
+ in_string = False
202
+ escape_next = False
203
+
204
+ while j < len(scannable_text) and brace_count > 0:
205
+ if escape_next:
206
+ escape_next = False
207
+ elif scannable_text[j] == '\\':
208
+ escape_next = True
209
+ elif scannable_text[j] == '"' and not escape_next:
210
+ in_string = not in_string
211
+ elif not in_string:
212
+ if scannable_text[j] == '{':
213
+ brace_count += 1
214
+ elif scannable_text[j] == '}':
215
+ brace_count -= 1
216
+ j += 1
217
+
218
+ if brace_count == 0:
219
+ # 找到了完整的 JSON 对象
220
+ json_str = scannable_text[i:j]
221
+ try:
222
+ parsed_data = json.loads(json_str)
223
+ tool_calls = parsed_data.get("tool_calls")
224
+ if tool_calls and isinstance(tool_calls, list):
225
+ # Ensure arguments field is a string
226
+ for tc in tool_calls:
227
+ if "function" in tc:
228
+ func = tc["function"]
229
+ if "arguments" in func:
230
+ if isinstance(func["arguments"], dict):
231
+ # Convert dict to JSON string
232
+ func["arguments"] = json.dumps(func["arguments"], ensure_ascii=False)
233
+ elif not isinstance(func["arguments"], str):
234
+ func["arguments"] = json.dumps(func["arguments"], ensure_ascii=False)
235
+ return tool_calls
236
+ except (json.JSONDecodeError, AttributeError):
237
+ pass
238
+
239
+ i += 1
240
+ else:
241
+ i += 1
242
+
243
+ # Attempt 3: Parse natural language function calls
244
+ natural_lang_match = FUNCTION_CALL_PATTERN.search(scannable_text)
245
+ if natural_lang_match:
246
+ function_name = natural_lang_match.group(1).strip()
247
+ arguments_str = natural_lang_match.group(2).strip()
248
+ try:
249
+ # Validate JSON format
250
+ json.loads(arguments_str)
251
+ return [
252
+ {
253
+ "id": f"call_{int(time.time() * 1000000)}",
254
+ "type": "function",
255
+ "function": {"name": function_name, "arguments": arguments_str},
256
+ }
257
+ ]
258
+ except json.JSONDecodeError:
259
+ return None
260
+
261
+ return None
262
+
263
+
264
+ def remove_tool_json_content(text: str) -> str:
265
+ """Remove tool JSON content from response text - using bracket balance method"""
266
+
267
+ def remove_tool_call_block(match: re.Match) -> str:
268
+ json_content = match.group(1)
269
+ try:
270
+ parsed_data = json.loads(json_content)
271
+ if "tool_calls" in parsed_data:
272
+ return ""
273
+ except (json.JSONDecodeError, AttributeError):
274
+ pass
275
+ return match.group(0)
276
+
277
+ # Step 1: Remove fenced tool JSON blocks
278
+ cleaned_text = TOOL_CALL_FENCE_PATTERN.sub(remove_tool_call_block, text)
279
+
280
+ # Step 2: Remove inline tool JSON - 使用基于括号平衡的智能方法
281
+ # 查找所有可能的 JSON 对象并精确删除包含 tool_calls 的对象
282
+ result = []
283
+ i = 0
284
+ while i < len(cleaned_text):
285
+ if cleaned_text[i] == '{':
286
+ # 尝试找到匹配的右括号
287
+ brace_count = 1
288
+ j = i + 1
289
+ in_string = False
290
+ escape_next = False
291
+
292
+ while j < len(cleaned_text) and brace_count > 0:
293
+ if escape_next:
294
+ escape_next = False
295
+ elif cleaned_text[j] == '\\':
296
+ escape_next = True
297
+ elif cleaned_text[j] == '"' and not escape_next:
298
+ in_string = not in_string
299
+ elif not in_string:
300
+ if cleaned_text[j] == '{':
301
+ brace_count += 1
302
+ elif cleaned_text[j] == '}':
303
+ brace_count -= 1
304
+ j += 1
305
+
306
+ if brace_count == 0:
307
+ # 找到了完整的 JSON 对象
308
+ json_str = cleaned_text[i:j]
309
+ try:
310
+ parsed = json.loads(json_str)
311
+ if "tool_calls" in parsed:
312
+ # 这是一个工具调用,跳过它
313
+ i = j
314
+ continue
315
+ except:
316
+ pass
317
+
318
+ # 不是工具调用或无法解析,保留这个字符
319
+ result.append(cleaned_text[i])
320
+ i += 1
321
+ else:
322
+ result.append(cleaned_text[i])
323
+ i += 1
324
+
325
+ return ''.join(result).strip()
docker-compose.yml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ # 你的克隆士兵军团 (工人)
5
+ z-ai2api:
6
+ # 直接使用懒人镜像,我们不构建!
7
+ image: julienol/z-ai2api-python:latest
8
+ # 我们不直接暴露端口,所有流量都由总管 Nginx 处理
9
+ env_file:
10
+ - .env
11
+ volumes:
12
+ - ./tokens.txt:/app/tokens.txt:ro
13
+ - ./data:/app/data
14
+ restart: unless-stopped
15
+ # 部署指令:声明你想要 10 个一模一样的克隆士兵!
16
+ deploy:
17
+ replicas: 2
18
+ healthcheck:
19
+ test: ["CMD", "curl", "-f", "http://localhost:8080/"]
20
+ interval: 30s
21
+ timeout: 10s
22
+ retries: 3
23
+ start_period: 40s
24
+
25
+ # 你的军队总管 (Nginx 负载均衡器)
26
+ nginx:
27
+ image: nginx:latest
28
+ container_name: z-ai-load-balancer
29
+ ports:
30
+ # 这是你帝国的唯一大门,所有访客都从这里进
31
+ - "8084:8084"
32
+ volumes:
33
+ # 把你的军规挂载到总管的大脑里
34
+ - ./nginx.conf:/etc/nginx/nginx.conf:ro
35
+ depends_on:
36
+ - z-ai2api # 确保士兵们先列队完毕,总管再上岗
37
+ restart: unless-stopped
38
+
39
+ # 定义网络,让士兵和总管可以互相通信
40
+ networks:
41
+ default:
42
+ driver: bridge
main.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Main application entry point
6
+ """
7
+
8
+ from fastapi import FastAPI, Request, Response
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+
11
+ from app.core.config import settings
12
+ from app.core import openai
13
+ from app.api import admin
14
+ from app.utils.reload_config import RELOAD_CONFIG
15
+
16
+ from granian import Granian
17
+
18
+ # Create FastAPI app
19
+ app = FastAPI(
20
+ title="OpenAI Compatible API Server",
21
+ description="An OpenAI-compatible API server for Z.AI chat service",
22
+ version="1.0.0",
23
+ )
24
+
25
+ # Add CORS middleware
26
+ app.add_middleware(
27
+ CORSMiddleware,
28
+ allow_origins=["*"],
29
+ allow_credentials=True,
30
+ allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
31
+ allow_headers=["Content-Type", "Authorization"],
32
+ )
33
+
34
+ # Include API routers
35
+ app.include_router(openai.router)
36
+ app.include_router(admin.router)
37
+
38
+
39
+ @app.options("/")
40
+ async def handle_options():
41
+ """Handle OPTIONS requests"""
42
+ return Response(status_code=200)
43
+
44
+
45
+ @app.get("/")
46
+ async def root():
47
+ """Root endpoint"""
48
+ return {"message": "OpenAI Compatible API Server"}
49
+
50
+
51
+ def run_server():
52
+ Granian(
53
+ "main:app",
54
+ interface="asgi",
55
+ address="0.0.0.0",
56
+ port=settings.LISTEN_PORT,
57
+ reload=False, # 生产环境请关闭热重载
58
+ **RELOAD_CONFIG,
59
+ ).serve()
60
+
61
+
62
+ if __name__ == "__main__":
63
+ run_server()
nginx.conf ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ events {
2
+ worker_connections 1024;
3
+ }
4
+
5
+ http {
6
+ # 定义你的克隆士兵军团
7
+ # Nginx 会自动找到所有名为 'z-ai2api' 的容器实例
8
+ upstream z_ai_workers {
9
+ # 'z-ai2api' 是我们在 docker-compose.yml 中定义的服务名
10
+ # Docker 的内部 DNS 会解析它,并把所有工人的 IP 地址都加进来
11
+ server z-ai2api:8080;
12
+ }
13
+
14
+ server {
15
+ # 这是你帝国的唯一入口,所有请求都从这里进来
16
+ listen 8084;
17
+
18
+ location / {
19
+ # 将请求转发给你的工人大军
20
+ proxy_pass http://z_ai_workers;
21
+ proxy_set_header Host $host;
22
+ proxy_set_header X-Real-IP $remote_addr;
23
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
24
+ proxy_set_header X-Forwarded-Proto $scheme;
25
+
26
+ # 确保流式响应和 WebSocket 正常工作
27
+ proxy_http_version 1.1;
28
+ proxy_set_header Upgrade $http_upgrade;
29
+ proxy_set_header Connection "upgrade";
30
+ proxy_buffering off; # 关键!关闭缓冲,让数据流实时通过
31
+ }
32
+ }
33
+ }
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ requests
4
+ pydantic
5
+ pydantic-settings
6
+ pydantic-core
7
+ typing-inspection
8
+ fake-useragent
9
+ brotli
10
+ httpx
11
+ brotli
12
+ fake-useragent
13
+ fastapi
14
+ httpx
15
+ pydantic
16
+ pydantic-settings
17
+ pydantic-core
18
+ requests
19
+ typing-inspection