Instructions to use moonshotai/Kimi-K2.6 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use moonshotai/Kimi-K2.6 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="moonshotai/Kimi-K2.6", trust_remote_code=True) messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("moonshotai/Kimi-K2.6", trust_remote_code=True, dtype="auto") - Inference
- HuggingChat
- Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use moonshotai/Kimi-K2.6 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "moonshotai/Kimi-K2.6" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "moonshotai/Kimi-K2.6", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/moonshotai/Kimi-K2.6
- SGLang
How to use moonshotai/Kimi-K2.6 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "moonshotai/Kimi-K2.6" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "moonshotai/Kimi-K2.6", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "moonshotai/Kimi-K2.6" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "moonshotai/Kimi-K2.6", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use moonshotai/Kimi-K2.6 with Docker Model Runner:
docker model run hf.co/moonshotai/Kimi-K2.6
Fix save_pretrained / from_pretrained round-trip for KimiK25VisionProcessor
Browse files## Problem
``processor.save_pretrained()`` serializes transient Hub kwargs (``revision``, ``_from_auto``, ``cache_dir``, ``force_download``, ``local_files_only``) into ``preprocessor_config.json``. On reload, ``from_pretrained`` passes these same kwargs again, and ``from_dict`` merges ``**config`` with ``**kwargs`` — causing a ``TypeError: got multiple values for keyword argument``:
```python
processor = AutoProcessor.from_pretrained("moonshotai/Kimi-K2.6")
processor.save_pretrained("/tmp/test")
AutoProcessor.from_pretrained("/tmp/test", trust_remote_code=True)
# TypeError: KimiK25VisionProcessor() got multiple values for keyword argument 'revision'
```
## Fix
Strip transient loading kwargs in ``__init__``, ``from_dict``, and ``to_dict`` so they are never stored as instance attributes or serialized.
|
@@ -22,6 +22,21 @@ try:
|
|
| 22 |
except ImportError:
|
| 23 |
VideoReader = None
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def resampling(video_bytes: bytes,
|
| 27 |
sample_indices: list[int],
|
|
@@ -46,6 +61,7 @@ class KimiK25VisionProcessor(BaseImageProcessor):
|
|
| 46 |
media_proc_cfg: dict,
|
| 47 |
**kwargs,
|
| 48 |
):
|
|
|
|
| 49 |
super().__init__(**kwargs)
|
| 50 |
self.media_proc_cfg = media_proc_cfg
|
| 51 |
self.num_frames_per_chunk = media_proc_cfg[
|
|
@@ -162,11 +178,11 @@ class KimiK25VisionProcessor(BaseImageProcessor):
|
|
| 162 |
) -> BatchFeature:
|
| 163 |
"""
|
| 164 |
Preprocess a atom vision input (images/video_chunk) into model-ready tensors.
|
| 165 |
-
|
| 166 |
Args:
|
| 167 |
medias: List of MediaInput.
|
| 168 |
return_tensors: Desired output format ('pt', 'np', 'tf', or None).
|
| 169 |
-
|
| 170 |
Returns:
|
| 171 |
BatchFeature containing 'pixel_values' and 'grid_thws' tensors.
|
| 172 |
"""
|
|
@@ -232,6 +248,7 @@ class KimiK25VisionProcessor(BaseImageProcessor):
|
|
| 232 |
|
| 233 |
def to_dict(self) -> Dict[str, Any]:
|
| 234 |
output = super().to_dict()
|
|
|
|
| 235 |
output["media_proc_cfg"] = self.media_proc_cfg
|
| 236 |
if "media_processor" in output:
|
| 237 |
del output["media_processor"]
|
|
@@ -239,7 +256,8 @@ class KimiK25VisionProcessor(BaseImageProcessor):
|
|
| 239 |
|
| 240 |
@classmethod
|
| 241 |
def from_dict(cls, config_dict: Dict[str, Any], **kwargs):
|
| 242 |
-
config = config_dict.copy()
|
|
|
|
| 243 |
media_proc_cfg = config.pop("media_proc_cfg", {})
|
| 244 |
return cls(media_proc_cfg=media_proc_cfg, **config, **kwargs)
|
| 245 |
|
|
|
|
| 22 |
except ImportError:
|
| 23 |
VideoReader = None
|
| 24 |
|
| 25 |
+
_TRANSIENT_KWARGS = frozenset({
|
| 26 |
+
"_from_auto",
|
| 27 |
+
"cache_dir",
|
| 28 |
+
"force_download",
|
| 29 |
+
"local_files_only",
|
| 30 |
+
"revision",
|
| 31 |
+
"token",
|
| 32 |
+
"proxies",
|
| 33 |
+
"resume_download",
|
| 34 |
+
})
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _pop_transient(d: dict) -> dict:
|
| 38 |
+
return {k: v for k, v in d.items() if k not in _TRANSIENT_KWARGS}
|
| 39 |
+
|
| 40 |
|
| 41 |
def resampling(video_bytes: bytes,
|
| 42 |
sample_indices: list[int],
|
|
|
|
| 61 |
media_proc_cfg: dict,
|
| 62 |
**kwargs,
|
| 63 |
):
|
| 64 |
+
kwargs = _pop_transient(kwargs)
|
| 65 |
super().__init__(**kwargs)
|
| 66 |
self.media_proc_cfg = media_proc_cfg
|
| 67 |
self.num_frames_per_chunk = media_proc_cfg[
|
|
|
|
| 178 |
) -> BatchFeature:
|
| 179 |
"""
|
| 180 |
Preprocess a atom vision input (images/video_chunk) into model-ready tensors.
|
| 181 |
+
|
| 182 |
Args:
|
| 183 |
medias: List of MediaInput.
|
| 184 |
return_tensors: Desired output format ('pt', 'np', 'tf', or None).
|
| 185 |
+
|
| 186 |
Returns:
|
| 187 |
BatchFeature containing 'pixel_values' and 'grid_thws' tensors.
|
| 188 |
"""
|
|
|
|
| 248 |
|
| 249 |
def to_dict(self) -> Dict[str, Any]:
|
| 250 |
output = super().to_dict()
|
| 251 |
+
output = _pop_transient(output)
|
| 252 |
output["media_proc_cfg"] = self.media_proc_cfg
|
| 253 |
if "media_processor" in output:
|
| 254 |
del output["media_processor"]
|
|
|
|
| 256 |
|
| 257 |
@classmethod
|
| 258 |
def from_dict(cls, config_dict: Dict[str, Any], **kwargs):
|
| 259 |
+
config = _pop_transient(config_dict.copy())
|
| 260 |
+
kwargs = _pop_transient(kwargs)
|
| 261 |
media_proc_cfg = config.pop("media_proc_cfg", {})
|
| 262 |
return cls(media_proc_cfg=media_proc_cfg, **config, **kwargs)
|
| 263 |
|