import json import ast from PIL import Image, ImageDraw, ImageFont from openai import OpenAI import os import base64 import gradio as gr import tempfile def plot_bounding_boxes(image, bounding_boxes): """Simple bounding box plotter.""" if isinstance(image, str): img = Image.open(image) else: img = image.copy() width, height = img.size draw = ImageDraw.Draw(img) # Parse JSON lines = bounding_boxes.splitlines() for i, line in enumerate(lines): if line == "```json": bounding_boxes = "\n".join(lines[i+1:]) bounding_boxes = bounding_boxes.split("```")[0] break # Try to load font with CJK support font = None font_paths = [ # Noto CJK fonts (installed via packages.txt) "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", "/usr/share/fonts/truetype/noto-cjk/NotoSansCJK-Regular.ttc", # Fallback fonts "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf", "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", ] for font_path in font_paths: try: font = ImageFont.truetype(font_path, size=14) break except: continue if font is None: font = ImageFont.load_default() # Parse and plot try: bbox_list = ast.literal_eval(bounding_boxes) if not isinstance(bbox_list, list): bbox_list = [bbox_list] for bbox in bbox_list: coords = bbox.get("bbox_2d", []) text = bbox.get("text_content", "") if len(coords) < 4: continue x1, y1, x2, y2 = coords[0], coords[1], coords[2], coords[3] # Ensure order if x1 > x2: x1, x2 = x2, x1 if y1 > y2: y1, y2 = y2, y1 # Clamp to image bounds x1 = max(0, min(x1, width - 1)) y1 = max(0, min(y1, height - 1)) x2 = max(0, min(x2, width - 1)) y2 = max(0, min(y2, height - 1)) # Draw box draw.rectangle(((x1, y1), (x2, y2)), outline='green', width=2) # Draw text label if we have font if text and font: text_x = x1 + 2 text_y = y2 + 2 draw.text((text_x, text_y), text, fill='green', font=font) except Exception as e: print(f"Error plotting boxes: {e}") return img def process_image(image, prompt): """Process image using API.""" if image is None: return None, "Please upload an image" # Get API key from environment variable (HF Secret) api_key = os.environ.get("QWEN_API_KEY") if not api_key: return None, "API key not configured. Please set QWEN_API_KEY in Space secrets." # Fixed model model = "qwen2.5-vl-7b-instruct" try: # Save image temporarily with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp: image.save(tmp.name, format='JPEG', quality=95) temp_path = tmp.name # Encode image with open(temp_path, "rb") as image_file: base64_image = base64.b64encode(image_file.read()).decode("utf-8") # Call API client = OpenAI( api_key=api_key, base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1", ) messages = [ { "role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}] }, { "role": "user", "content": [ { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} }, {"type": "text", "text": prompt} ] } ] completion = client.chat.completions.create( model=model, messages=messages, ) response = completion.choices[0].message.content # Plot boxes annotated_image = plot_bounding_boxes(image, response) # Clean up os.unlink(temp_path) return annotated_image, response except Exception as e: return None, f"Error: {str(e)}" # Create interface with gr.Blocks(title="安全なう - 行為規制") as demo: # Simple text-based logo gr.Markdown( """