Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| # OpenAI 兼容层 schema,供 server.yaml 通过 $ref 引用 | |
| swagger: '2.0' | |
| info: | |
| title: OpenAPI fragments — OpenAI-compatible schemas | |
| version: "0.0" | |
| definitions: | |
| # --- Completions /v1/completions --- | |
| # POST /v1/completions/prompt:将用户原文套用 chat template,返回实际送入续写接口的字符串 | |
| OpenAICompletionsPromptRequest: | |
| type: object | |
| additionalProperties: true | |
| required: | |
| - model | |
| - prompt | |
| properties: | |
| model: | |
| type: string | |
| description: Model ID | |
| prompt: | |
| description: 用户输入原文(非已格式化全文) | |
| type: string | |
| system: | |
| description: 可选;chat template 的 system 段。省略时不拼装 system(仅 user);传入字符串时原样作为 system 段(含空字符串、仅空白等) | |
| type: string | |
| enable_thinking: | |
| description: 可选;是否启用 Qwen3 chat template 的 thinking 模式;省略或 false 时不插入 thinking 块 | |
| type: boolean | |
| OpenAICompletionsPromptResponse: | |
| type: object | |
| required: | |
| - prompt_used | |
| properties: | |
| prompt_used: | |
| type: string | |
| description: 套用 chat template 后、与 POST /v1/completions 的 prompt 字段同义的完整模型输入 | |
| OpenAICompletionsRequest: | |
| type: object | |
| additionalProperties: true | |
| required: | |
| - model | |
| - prompt | |
| properties: | |
| model: | |
| type: string | |
| description: Model ID | |
| prompt: | |
| description: 已确定的完整模型输入(需 chat template 时请先 POST /v1/completions/prompt) | |
| type: string | |
| best_of: | |
| type: integer | |
| minimum: 0 | |
| maximum: 20 | |
| echo: | |
| type: boolean | |
| frequency_penalty: | |
| type: number | |
| minimum: -2 | |
| maximum: 2 | |
| logit_bias: | |
| type: object | |
| additionalProperties: | |
| type: number | |
| logprobs: | |
| type: integer | |
| minimum: 0 | |
| maximum: 5 | |
| max_tokens: | |
| description: 可选;正整数(> 0),且与 prompt 合计不超过续写接口的上下文 token 上限;省略则用满剩余额度 | |
| type: integer | |
| minimum: 1 | |
| n: | |
| type: integer | |
| minimum: 1 | |
| maximum: 128 | |
| presence_penalty: | |
| type: number | |
| minimum: -2 | |
| maximum: 2 | |
| seed: | |
| type: integer | |
| format: int64 | |
| stop: | |
| description: 停止序列,最多 4 个 | |
| type: | |
| - string | |
| - array | |
| items: | |
| type: string | |
| maxItems: 4 | |
| stream: | |
| type: boolean | |
| description: 本服务忽略;响应恒为 SSE | |
| stream_options: | |
| type: object | |
| properties: | |
| include_usage: | |
| type: boolean | |
| suffix: | |
| type: string | |
| temperature: | |
| type: number | |
| minimum: 0 | |
| maximum: 2 | |
| top_p: | |
| type: number | |
| minimum: 0 | |
| maximum: 1 | |
| user: | |
| type: string | |
| OpenAICompletionUsage: | |
| type: object | |
| properties: | |
| prompt_tokens: | |
| type: integer | |
| completion_tokens: | |
| type: integer | |
| total_tokens: | |
| type: integer | |
| completion_tokens_details: | |
| type: object | |
| additionalProperties: true | |
| prompt_tokens_details: | |
| type: object | |
| additionalProperties: true | |
| OpenAICompletionChoice: | |
| type: object | |
| properties: | |
| text: | |
| type: string | |
| index: | |
| type: integer | |
| logprobs: | |
| description: 无 logprobs 时为 null;否则含 text_offset、token_logprobs、tokens、top_logprobs | |
| type: object | |
| x-nullable: true | |
| finish_reason: | |
| type: string | |
| enum: | |
| - stop | |
| - length | |
| - content_filter | |
| x-nullable: true | |
| # bpe_strings 项与 server.yaml#/definitions/TokenWithOffset 一致(跨文件复用) | |
| InfoRadarCompletionPayload: | |
| type: object | |
| properties: | |
| bpe_strings: | |
| type: array | |
| items: | |
| $ref: "server.yaml#/definitions/TokenWithOffset" | |
| OpenAICompletionsResponse: | |
| type: object | |
| required: | |
| - id | |
| - object | |
| - created | |
| - model | |
| - choices | |
| properties: | |
| id: | |
| type: string | |
| object: | |
| type: string | |
| enum: | |
| - text_completion | |
| created: | |
| type: integer | |
| description: Unix 时间戳(秒) | |
| model: | |
| type: string | |
| choices: | |
| type: array | |
| items: | |
| $ref: "#/definitions/OpenAICompletionChoice" | |
| system_fingerprint: | |
| type: string | |
| usage: | |
| $ref: "#/definitions/OpenAICompletionUsage" | |
| info_radar: | |
| $ref: "#/definitions/InfoRadarCompletionPayload" | |
| description: 续写 token 级概率与 top-N,便于与信息密度可视化对齐 | |