KANIME-V1 / handler.py
L0Xit's picture
Create handler.py
06a6d2a verified
from typing import Dict
from transformers import AutoProcessor, AutoModelForConditionalGeneration
from PIL import Image
import torch
import base64
import io
class EndpointHandler:
def __init__(self, path=""):
model_id = path if path else "Qwen/Qwen2.5-VL-7B-Instruct"
self.processor = AutoProcessor.from_pretrained(model_id)
self.model = AutoModelForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto"
)
def __call__(self, data: Dict[str, any]) -> Dict[str, str]:
"""
data = {
"inputs": {
"text": "Describe this image",
"image": "<base64-encoded image>" # optional
}
}
"""
inputs = {}
if "text" in data["inputs"]:
inputs["text"] = data["inputs"]["text"]
if "image" in data["inputs"]:
# Bild von Base64 in PIL umwandeln
image_bytes = base64.b64decode(data["inputs"]["image"])
image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
inputs["images"] = image
proc_inputs = self.processor(**inputs, return_tensors="pt").to(self.model.device)
output_ids = self.model.generate(**proc_inputs, max_new_tokens=200)
result = self.processor.batch_decode(output_ids, skip_special_tokens=True)
return {"generated_text": result[0]}