fxmarty-amd commited on
Commit
8988379
·
verified ·
1 Parent(s): 2755962

Fix save_pretrained / from_pretrained round-trip for KimiK25VisionProcessor

Browse files

## Problem

``processor.save_pretrained()`` serializes transient Hub kwargs (``revision``, ``_from_auto``, ``cache_dir``, ``force_download``, ``local_files_only``) into ``preprocessor_config.json``. On reload, ``from_pretrained`` passes these same kwargs again, and ``from_dict`` merges ``**config`` with ``**kwargs`` — causing a ``TypeError: got multiple values for keyword argument``:

```python
processor = AutoProcessor.from_pretrained("moonshotai/Kimi-K2.6")
processor.save_pretrained("/tmp/test")
AutoProcessor.from_pretrained("/tmp/test", trust_remote_code=True)
# TypeError: KimiK25VisionProcessor() got multiple values for keyword argument 'revision'
```

## Fix

Strip transient loading kwargs in ``__init__``, ``from_dict``, and ``to_dict`` so they are never stored as instance attributes or serialized.

Files changed (1) hide show
  1. kimi_k25_vision_processing.py +21 -3
kimi_k25_vision_processing.py CHANGED
@@ -22,6 +22,21 @@ try:
22
  except ImportError:
23
  VideoReader = None
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def resampling(video_bytes: bytes,
27
  sample_indices: list[int],
@@ -46,6 +61,7 @@ class KimiK25VisionProcessor(BaseImageProcessor):
46
  media_proc_cfg: dict,
47
  **kwargs,
48
  ):
 
49
  super().__init__(**kwargs)
50
  self.media_proc_cfg = media_proc_cfg
51
  self.num_frames_per_chunk = media_proc_cfg[
@@ -162,11 +178,11 @@ class KimiK25VisionProcessor(BaseImageProcessor):
162
  ) -> BatchFeature:
163
  """
164
  Preprocess a atom vision input (images/video_chunk) into model-ready tensors.
165
-
166
  Args:
167
  medias: List of MediaInput.
168
  return_tensors: Desired output format ('pt', 'np', 'tf', or None).
169
-
170
  Returns:
171
  BatchFeature containing 'pixel_values' and 'grid_thws' tensors.
172
  """
@@ -232,6 +248,7 @@ class KimiK25VisionProcessor(BaseImageProcessor):
232
 
233
  def to_dict(self) -> Dict[str, Any]:
234
  output = super().to_dict()
 
235
  output["media_proc_cfg"] = self.media_proc_cfg
236
  if "media_processor" in output:
237
  del output["media_processor"]
@@ -239,7 +256,8 @@ class KimiK25VisionProcessor(BaseImageProcessor):
239
 
240
  @classmethod
241
  def from_dict(cls, config_dict: Dict[str, Any], **kwargs):
242
- config = config_dict.copy()
 
243
  media_proc_cfg = config.pop("media_proc_cfg", {})
244
  return cls(media_proc_cfg=media_proc_cfg, **config, **kwargs)
245
 
 
22
  except ImportError:
23
  VideoReader = None
24
 
25
+ _TRANSIENT_KWARGS = frozenset({
26
+ "_from_auto",
27
+ "cache_dir",
28
+ "force_download",
29
+ "local_files_only",
30
+ "revision",
31
+ "token",
32
+ "proxies",
33
+ "resume_download",
34
+ })
35
+
36
+
37
+ def _pop_transient(d: dict) -> dict:
38
+ return {k: v for k, v in d.items() if k not in _TRANSIENT_KWARGS}
39
+
40
 
41
  def resampling(video_bytes: bytes,
42
  sample_indices: list[int],
 
61
  media_proc_cfg: dict,
62
  **kwargs,
63
  ):
64
+ kwargs = _pop_transient(kwargs)
65
  super().__init__(**kwargs)
66
  self.media_proc_cfg = media_proc_cfg
67
  self.num_frames_per_chunk = media_proc_cfg[
 
178
  ) -> BatchFeature:
179
  """
180
  Preprocess a atom vision input (images/video_chunk) into model-ready tensors.
181
+
182
  Args:
183
  medias: List of MediaInput.
184
  return_tensors: Desired output format ('pt', 'np', 'tf', or None).
185
+
186
  Returns:
187
  BatchFeature containing 'pixel_values' and 'grid_thws' tensors.
188
  """
 
248
 
249
  def to_dict(self) -> Dict[str, Any]:
250
  output = super().to_dict()
251
+ output = _pop_transient(output)
252
  output["media_proc_cfg"] = self.media_proc_cfg
253
  if "media_processor" in output:
254
  del output["media_processor"]
 
256
 
257
  @classmethod
258
  def from_dict(cls, config_dict: Dict[str, Any], **kwargs):
259
+ config = _pop_transient(config_dict.copy())
260
+ kwargs = _pop_transient(kwargs)
261
  media_proc_cfg = config.pop("media_proc_cfg", {})
262
  return cls(media_proc_cfg=media_proc_cfg, **config, **kwargs)
263