Spaces:

DawnC
/

VividFlow

Running on Zero

App Files Files Community

DawnC commited on Jan 10

Commit

6a2169d

verified ·

1 Parent(s): 84016c5

Upload 13 files

Browse files

Files changed (12) hide show

BackgroundEngine.py +432 -0
FlowFacade.py +1 -23
ResourceManager.py +0 -6
TextProcessor.py +0 -7
VideoEngine_optimized.py +0 -7
app.py +5 -2
css_style.py +102 -1
image_blender.py +1117 -0
mask_generator.py +648 -0
requirements.txt +15 -5
scene_templates.py +428 -0
ui_manager.py +496 -234

BackgroundEngine.py ADDED Viewed

	@@ -0,0 +1,432 @@

+import torch
+import numpy as np
+import cv2
+from PIL import Image
+import logging
+import gc
+import time
+import os
+from typing import Optional, Dict, Any, Callable
+import warnings
+warnings.filterwarnings("ignore")
+from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler
+import open_clip
+from mask_generator import MaskGenerator
+from image_blender import ImageBlender
+try:
+    import spaces
+    SPACES_AVAILABLE = True
+except ImportError:
+    SPACES_AVAILABLE = False
+logger = logging.getLogger(__name__)
+class BackgroundEngine:
+    """
+    Background generation engine for VividFlow.
+    Integrates SDXL pipeline, OpenCLIP analysis, mask generation,
+    and advanced image blending.
+    """
+    def __init__(self, device: str = "auto"):
+        self.device = self._setup_device(device)
+        self.base_model_id = "stabilityai/stable-diffusion-xl-base-1.0"
+        self.clip_model_name = "ViT-B-32"
+        self.clip_pretrained = "openai"
+        self.pipeline = None
+        self.clip_model = None
+        self.clip_preprocess = None
+        self.clip_tokenizer = None
+        self.is_initialized = False
+        self.max_image_size = 1024
+        self.default_steps = 25
+        self.use_fp16 = True
+        self.mask_generator = MaskGenerator(self.max_image_size)
+        self.image_blender = ImageBlender()
+        logger.info(f"BackgroundEngine initialized on {self.device}")
+    def _setup_device(self, device: str) -> str:
+        """Setup computation device (ZeroGPU compatible)"""
+        if os.getenv('SPACE_ID') is not None:
+            return "cpu"
+        if device == "auto":
+            if torch.cuda.is_available():
+                return "cuda"
+            elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+                return "mps"
+            return "cpu"
+        return device
+    def _memory_cleanup(self):
+        """Memory cleanup"""
+        for _ in range(3):
+            gc.collect()
+        is_spaces = os.getenv('SPACE_ID') is not None
+        if not is_spaces and torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    def load_models(self, progress_callback: Optional[Callable] = None):
+        """Load SDXL and OpenCLIP models"""
+        if self.is_initialized:
+            logger.info("Models already loaded")
+            return
+        logger.info("Loading background generation models...")
+        try:
+            self._memory_cleanup()
+            # Detect actual device (in ZeroGPU, CUDA becomes available after @spaces.GPU allocation)
+            actual_device = "cuda" if torch.cuda.is_available() else self.device
+            logger.info(f"Loading models to device: {actual_device}")
+            if progress_callback:
+                progress_callback("Loading OpenCLIP...", 20)
+            # Load OpenCLIP
+            self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms(
+                self.clip_model_name,
+                pretrained=self.clip_pretrained,
+                device=actual_device
+            )
+            self.clip_tokenizer = open_clip.get_tokenizer(self.clip_model_name)
+            self.clip_model.eval()
+            logger.info("OpenCLIP loaded")
+            if progress_callback:
+                progress_callback("Loading SDXL pipeline...", 60)
+            # Load SDXL
+            self.pipeline = StableDiffusionXLPipeline.from_pretrained(
+                self.base_model_id,
+                torch_dtype=torch.float16 if self.use_fp16 else torch.float32,
+                use_safetensors=True,
+                variant="fp16" if self.use_fp16 else None
+            )
+            # DPM solver for faster generation
+            self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
+                self.pipeline.scheduler.config
+            )
+            self.pipeline = self.pipeline.to(actual_device)
+            if progress_callback:
+                progress_callback("Applying optimizations...", 90)
+            # Memory optimizations
+            try:
+                self.pipeline.enable_xformers_memory_efficient_attention()
+                logger.info("xformers enabled")
+            except Exception:
+                try:
+                    self.pipeline.enable_attention_slicing()
+                    logger.info("Attention slicing enabled")
+                except Exception:
+                    pass
+            if hasattr(self.pipeline, 'enable_vae_tiling'):
+                self.pipeline.enable_vae_tiling()
+            if hasattr(self.pipeline, 'enable_vae_slicing'):
+                self.pipeline.enable_vae_slicing()
+            self.pipeline.unet.eval()
+            if hasattr(self.pipeline, 'vae'):
+                self.pipeline.vae.eval()
+            self.is_initialized = True
+            if progress_callback:
+                progress_callback("Models loaded!", 100)
+            logger.info("Background models loaded successfully")
+        except Exception as e:
+            logger.error(f"Model loading failed: {e}")
+            raise RuntimeError(f"Failed to load models: {str(e)}")
+    def analyze_image_with_clip(self, image: Image.Image) -> str:
+        """Analyze image using OpenCLIP"""
+        if not self.clip_model:
+            return "Unknown"
+        try:
+            # Use actual device
+            actual_device = "cuda" if torch.cuda.is_available() else self.device
+            image_input = self.clip_preprocess(image).unsqueeze(0).to(actual_device)
+            categories = [
+                "a photo of a person",
+                "a photo of an animal",
+                "a photo of an object",
+                "a photo of nature",
+                "a photo of a building"
+            ]
+            text_inputs = self.clip_tokenizer(categories).to(actual_device)
+            with torch.no_grad():
+                image_features = self.clip_model.encode_image(image_input)
+                text_features = self.clip_model.encode_text(text_inputs)
+                image_features /= image_features.norm(dim=-1, keepdim=True)
+                text_features /= text_features.norm(dim=-1, keepdim=True)
+                similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
+                best_match_idx = similarity.argmax().item()
+                category = categories[best_match_idx].replace("a photo of ", "")
+                return category
+        except Exception as e:
+            logger.error(f"CLIP analysis failed: {e}")
+            return "unknown"
+    def enhance_prompt(self, user_prompt: str, foreground_image: Image.Image) -> str:
+        """Smart prompt enhancement based on image analysis"""
+        try:
+            img_array = np.array(foreground_image.convert('RGB'))
+            # Analyze color temperature
+            lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
+            avg_b = np.mean(lab[:, :, 2])
+            is_warm = avg_b > 128
+            # Analyze brightness
+            gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+            avg_brightness = np.mean(gray)
+            is_bright = avg_brightness > 127
+            # Get subject type
+            clip_analysis = self.analyze_image_with_clip(foreground_image)
+            subject_type = clip_analysis
+            # Build lighting descriptors
+            if is_warm and is_bright:
+                lighting = "warm golden hour lighting, soft natural light"
+            elif is_warm and not is_bright:
+                lighting = "warm ambient lighting, cozy atmosphere"
+            elif not is_warm and is_bright:
+                lighting = "bright daylight, clear sky lighting"
+            else:
+                lighting = "soft diffused light, gentle shadows"
+            # Build atmosphere based on subject
+            atmosphere_map = {
+                "person": "professional, elegant composition",
+                "animal": "natural, harmonious setting",
+                "object": "clean product photography style",
+                "nature": "scenic, peaceful atmosphere",
+                "building": "architectural, balanced composition"
+            }
+            atmosphere = atmosphere_map.get(subject_type, "balanced composition")
+            quality_modifiers = "high quality, detailed, sharp focus, photorealistic"
+            # Avoid conflicts
+            user_prompt_lower = user_prompt.lower()
+            if "sunset" in user_prompt_lower or "golden" in user_prompt_lower:
+                lighting = ""
+            if "dark" in user_prompt_lower or "night" in user_prompt_lower:
+                lighting = lighting.replace("bright", "").replace("daylight", "")
+            # Combine
+            fragments = [user_prompt]
+            if lighting:
+                fragments.append(lighting)
+            fragments.append(atmosphere)
+            fragments.append(quality_modifiers)
+            enhanced_prompt = ", ".join(filter(None, fragments))
+            logger.debug(f"Enhanced: {enhanced_prompt[:80]}...")
+            return enhanced_prompt
+        except Exception as e:
+            logger.warning(f"Prompt enhancement failed: {e}")
+            return f"{user_prompt}, high quality, detailed, photorealistic"
+    def _prepare_image(self, image: Image.Image) -> Image.Image:
+        """Prepare image for processing"""
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        width, height = image.size
+        max_size = self.max_image_size
+        if width > max_size or height > max_size:
+            ratio = min(max_size/width, max_size/height)
+            new_width = int(width * ratio)
+            new_height = int(height * ratio)
+            image = image.resize((new_width, new_height), Image.LANCZOS)
+        width, height = image.size
+        new_width = (width // 8) * 8
+        new_height = (height // 8) * 8
+        if new_width != width or new_height != height:
+            image = image.resize((new_width, new_height), Image.LANCZOS)
+        return image
+    def generate_background(
+        self,
+        prompt: str,
+        width: int,
+        height: int,
+        negative_prompt: str = "blurry, low quality, distorted",
+        num_inference_steps: int = 25,
+        guidance_scale: float = 7.5
+    ) -> Image.Image:
+        """Generate background using SDXL"""
+        if not self.is_initialized:
+            raise RuntimeError("Models not loaded")
+        logger.info(f"Generating background: {prompt[:50]}...")
+        try:
+            # Use actual device
+            actual_device = "cuda" if torch.cuda.is_available() else self.device
+            with torch.inference_mode():
+                result = self.pipeline(
+                    prompt=prompt,
+                    negative_prompt=negative_prompt,
+                    width=width,
+                    height=height,
+                    num_inference_steps=num_inference_steps,
+                    guidance_scale=guidance_scale,
+                    generator=torch.Generator(device=actual_device).manual_seed(42)
+                )
+                generated_image = result.images[0]
+                logger.info("Background generation completed")
+                return generated_image
+        except torch.cuda.OutOfMemoryError:
+            logger.error("GPU memory exhausted")
+            self._memory_cleanup()
+            raise RuntimeError("GPU memory insufficient")
+        except Exception as e:
+            logger.error(f"Generation failed: {e}")
+            raise RuntimeError(f"Generation failed: {str(e)}")
+    def generate_and_combine(
+        self,
+        original_image: Image.Image,
+        prompt: str,
+        combination_mode: str = "center",
+        focus_mode: str = "person",
+        negative_prompt: str = "blurry, low quality, distorted",
+        num_inference_steps: int = 25,
+        guidance_scale: float = 7.5,
+        progress_callback: Optional[Callable] = None,
+        enable_prompt_enhancement: bool = True,
+        feather_radius: int = 0
+    ) -> Dict[str, Any]:
+        """
+        Generate background and combine with foreground.
+        Args:
+            feather_radius: Gaussian blur radius for mask edge softening (0-20, default 0)
+        Returns dict with: combined_image, generated_scene, original_image, mask, success
+        """
+        if not self.is_initialized:
+            raise RuntimeError("Models not loaded")
+        logger.info("Starting background generation and combination...")
+        try:
+            if progress_callback:
+                progress_callback("Analyzing image...", 5)
+            # Prepare image
+            processed_original = self._prepare_image(original_image)
+            target_width, target_height = processed_original.size
+            if progress_callback:
+                progress_callback("Enhancing prompt...", 15)
+            # Enhance prompt
+            if enable_prompt_enhancement:
+                enhanced_prompt = self.enhance_prompt(prompt, processed_original)
+            else:
+                enhanced_prompt = f"{prompt}, high quality, detailed, photorealistic"
+            enhanced_negative = f"{negative_prompt}, people, characters, cartoons, logos"
+            if progress_callback:
+                progress_callback("Generating background...", 30)
+            # Generate background
+            generated_background = self.generate_background(
+                prompt=enhanced_prompt,
+                width=target_width,
+                height=target_height,
+                negative_prompt=enhanced_negative,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale
+            )
+            if progress_callback:
+                progress_callback("Creating mask...", 80)
+            # Generate mask
+            logger.info("Generating mask...")
+            combination_mask = self.mask_generator.create_gradient_based_mask(
+                processed_original,
+                combination_mode,
+                focus_mode
+            )
+            if progress_callback:
+                progress_callback("Blending images...", 90)
+            # Blend images with feather_radius
+            logger.info("Blending images...")
+            combined_image = self.image_blender.simple_blend_images(
+                processed_original,
+                generated_background,
+                combination_mask,
+                feather_radius=feather_radius
+            )
+            # Cleanup
+            self._memory_cleanup()
+            if progress_callback:
+                progress_callback("Complete!", 100)
+            logger.info("Background generation completed successfully")
+            # Build result dict (always include mask for diagnostics)
+            return {
+                "combined_image": combined_image,
+                "generated_scene": generated_background,
+                "original_image": processed_original,
+                "mask": combination_mask,
+                "success": True
+            }
+        except Exception as e:
+            logger.error(f"Generation failed: {e}")
+            self._memory_cleanup()
+            return {
+                "success": False,
+                "error": str(e)
+            }

FlowFacade.py CHANGED Viewed

@@ -26,29 +26,7 @@ class FlowFacade:
         self.text_processor = TextProcessor(resource_manager=None)
         print("✓ DeltaFlow initialized")
-    def _calculate_gpu_duration(self, image: Image.Image, duration_seconds: float,
-                                num_inference_steps: int, enable_prompt_expansion: bool, **kwargs) -> int:
-        BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
-        BASE_STEP_DURATION = 8
-        resized_image = self.video_engine.resize_image(image)
-        width, height = resized_image.width, resized_image.height
-        frames = self.video_engine.get_num_frames(duration_seconds)
-        factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
-        step_duration = BASE_STEP_DURATION * factor ** 1.5
-        total_duration = int(num_inference_steps) * step_duration
-        # Add overhead for first-time model loading
-        if not self.video_engine.is_loaded:
-            total_duration += 150
-        if enable_prompt_expansion:
-            total_duration += 40
-        return max(int(total_duration), 300)
-    @spaces.GPU(duration=_calculate_gpu_duration)
     def generate_video_from_image(self, image: Image.Image, user_instruction: str,
                                   duration_seconds: float = 3.0, num_inference_steps: int = 4,
                                   guidance_scale: float = 1.0, guidance_scale_2: float = 1.0,

         self.text_processor = TextProcessor(resource_manager=None)
         print("✓ DeltaFlow initialized")
+    @spaces.GPU(duration=300)
     def generate_video_from_image(self, image: Image.Image, user_instruction: str,
                                   duration_seconds: float = 3.0, num_inference_steps: int = 4,
                                   guidance_scale: float = 1.0, guidance_scale_2: float = 1.0,

ResourceManager.py CHANGED Viewed

@@ -1,9 +1,3 @@
-# %%writefile RescourceManager.py
-"""
-DeltaFlow - Resource Manager
-Handles GPU memory allocation, deallocation, and cache management
-"""
 import gc
 import torch
 from typing import Optional

 import gc
 import torch
 from typing import Optional

TextProcessor.py CHANGED Viewed

@@ -1,10 +1,3 @@
-# %%writefile text_processor.py
-"""
-DeltaFlow - Text Processor
-Handles semantic expansion using Qwen2.5-0.5B-Instruct
-Converts brief instructions into detailed motion descriptions
-"""
 import gc
 import traceback
 from typing import Optional

 import gc
 import traceback
 from typing import Optional

VideoEngine_optimized.py CHANGED Viewed

@@ -1,10 +1,3 @@
-"""
-DeltaFlow - Video Engine (FP8 Optimized)
-Ultra-fast Image-to-Video generation using Wan2.2-I2V-A14B
-Features: Lightning LoRA + FP8 Quantization
-~70-90s inference (vs 150s baseline)
-"""
 import warnings
 warnings.filterwarnings('ignore', category=FutureWarning)
 warnings.filterwarnings('ignore', category=DeprecationWarning)

 import warnings
 warnings.filterwarnings('ignore', category=FutureWarning)
 warnings.filterwarnings('ignore', category=DeprecationWarning)

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ import ftfy
 import sentencepiece
 from FlowFacade import FlowFacade
 from ui_manager import UIManager
@@ -124,11 +125,13 @@ def main():
     try:
         facade = FlowFacade()
-        ui = UIManager(facade)
         is_colab = 'google.colab' in sys.modules
         print("✓ Ready")
-        ui.launch(
             share=is_colab,
             server_name="0.0.0.0",
             server_port=None,

 import sentencepiece
 from FlowFacade import FlowFacade
+from BackgroundEngine import BackgroundEngine
 from ui_manager import UIManager
     try:
         facade = FlowFacade()
+        background_engine = BackgroundEngine()
+        ui_manager = UIManager(facade, background_engine)
+        interface = ui_manager.create_interface()
         is_colab = 'google.colab' in sys.modules
         print("✓ Ready")
+        interface.launch(
             share=is_colab,
             server_name="0.0.0.0",
             server_port=None,

css_style.py CHANGED Viewed

@@ -1,5 +1,8 @@
 DELTAFLOW_CSS = """
-/* Global Light Theme */
 :root {
     --primary-bg: #f8f9fa;
     --secondary-bg: #ffffff;
@@ -11,9 +14,12 @@ DELTAFLOW_CSS = """
     --accent-hover: #4f46e5;
     --success-color: #10b981;
     --error-color: #ef4444;
     --shadow-sm: 0 2px 8px rgba(0, 0, 0, 0.08);
     --shadow-md: 0 4px 16px rgba(0, 0, 0, 0.12);
     --shadow-lg: 0 8px 32px rgba(0, 0, 0, 0.16);
 }
 /* Main Container */
@@ -276,4 +282,99 @@ video {
     max-width: 1200px !important;
     margin: 0 auto !important;
 }
 """

 DELTAFLOW_CSS = """
+/* Import professional fonts */
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
+/* Global Light Theme - Combined VividFlow & SceneWeaver */
 :root {
     --primary-bg: #f8f9fa;
     --secondary-bg: #ffffff;
     --accent-hover: #4f46e5;
     --success-color: #10b981;
     --error-color: #ef4444;
+    --warning-color: #f59e0b;
     --shadow-sm: 0 2px 8px rgba(0, 0, 0, 0.08);
     --shadow-md: 0 4px 16px rgba(0, 0, 0, 0.12);
     --shadow-lg: 0 8px 32px rgba(0, 0, 0, 0.16);
+    --radius-md: 8px;
+    --radius-lg: 12px;
 }
 /* Main Container */
     max-width: 1200px !important;
     margin: 0 auto !important;
 }
+/* ==== SceneWeaver Background Generation Styles ==== */
+/* Feature Card - Background Generation Tab */
+.feature-card {
+    background: var(--card-bg) !important;
+    border: 1px solid var(--border-color) !important;
+    border-radius: var(--radius-lg) !important;
+    padding: 1.5rem !important;
+    box-shadow: var(--shadow-md) !important;
+    overflow: visible !important;
+    transition: all 0.2s ease !important;
+}
+.feature-card:hover {
+    border-color: var(--accent-color) !important;
+    box-shadow: var(--shadow-lg) !important;
+}
+/* Scene Template Dropdown */
+.template-dropdown select,
+.template-dropdown input {
+    font-size: 0.95rem !important;
+    padding: 10px 14px !important;
+    border-radius: var(--radius-md) !important;
+    border: 1px solid var(--border-color) !important;
+    background: var(--secondary-bg) !important;
+    transition: all 0.2s ease !important;
+}
+.template-dropdown select:focus,
+.template-dropdown input:focus {
+    border-color: var(--accent-color) !important;
+    box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.15) !important;
+    outline: none !important;
+}
+/* Results Gallery */
+.result-gallery {
+    border-radius: var(--radius-lg) !important;
+    overflow: hidden !important;
+    border: 1px solid var(--border-color) !important;
+    box-shadow: var(--shadow-md) !important;
+}
+/* Secondary Button (Download, Clear) */
+.secondary-button {
+    background: var(--secondary-bg) !important;
+    color: var(--accent-color) !important;
+    border: 1.5px solid var(--accent-color) !important;
+    border-radius: var(--radius-md) !important;
+    padding: 12px 20px !important;
+    font-weight: 500 !important;
+    transition: all 0.2s ease !important;
+}
+.secondary-button:hover {
+    background: rgba(99, 102, 241, 0.1) !important;
+}
+/* Dropdown positioning fix for Gradio 4.x/5.x */
+.gradio-dropdown,
+.gradio-dropdown > div {
+    position: relative !important;
+}
+.gradio-dropdown ul,
+.gradio-dropdown [role="listbox"] {
+    position: absolute !important;
+    z-index: 9999 !important;
+    left: 0 !important;
+    top: 100% !important;
+    width: 100% !important;
+    max-height: 300px !important;
+    overflow-y: auto !important;
+    background: var(--secondary-bg) !important;
+    border: 1px solid var(--border-color) !important;
+    border-radius: var(--radius-md) !important;
+    box-shadow: var(--shadow-lg) !important;
+    margin-top: 4px !important;
+}
+/* Status Panel */
+.status-panel {
+    background: var(--secondary-bg) !important;
+    border: 1px solid var(--border-color) !important;
+    border-radius: var(--radius-md) !important;
+    padding: 12px 16px !important;
+    margin: 16px 0 !important;
+}
+.status-ready {
+    color: var(--success-color) !important;
+    font-weight: 500 !important;
+}
 """

image_blender.py ADDED Viewed

	@@ -0,0 +1,1117 @@

+import cv2
+import numpy as np
+import traceback
+from PIL import Image
+import logging
+from typing import Dict, Any, Optional, Tuple
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+class ImageBlender:
+    """
+    Advanced image blending with aggressive spill suppression and color replacement.
+    Supports two primary modes:
+    - Background generation: Foreground preservation with edge refinement
+    - Inpainting: Seamless blending with adaptive color correction
+    Attributes:
+        enable_multi_scale: Whether multi-scale edge refinement is enabled
+    """
+    EDGE_EROSION_PIXELS = 1          # Pixels to erode from mask edge
+    ALPHA_BINARIZE_THRESHOLD = 0.5   # Alpha threshold for binarization
+    DARK_LUMINANCE_THRESHOLD = 60    # Luminance threshold for dark foreground
+    FOREGROUND_PROTECTION_THRESHOLD = 140  # Mask value for strong protection
+    BACKGROUND_COLOR_TOLERANCE = 30  # DeltaE tolerance for background detection
+    # Inpainting-specific parameters
+    INPAINT_FEATHER_SCALE = 1.2      # Scale factor for inpainting feathering
+    INPAINT_COLOR_BLEND_RADIUS = 10  # Radius for color adaptation zone
+    def __init__(self, enable_multi_scale: bool = True):
+        """
+        Initialize ImageBlender.
+        Parameters
+        ----------
+        enable_multi_scale : bool
+            Whether to enable multi-scale edge refinement (default True)
+        """
+        self.enable_multi_scale = enable_multi_scale
+        self._debug_info = {}
+        self._adaptive_strength_map = None
+    def _erode_mask_edges(
+        self,
+        mask_array: np.ndarray,
+        erosion_pixels: int = 2
+    ) -> np.ndarray:
+        """
+        Erode mask edges to remove contaminated boundary pixels.
+        This removes the outermost pixels of the foreground mask where
+        color contamination from the original background is most likely.
+        Args:
+            mask_array: Input mask as numpy array (uint8, 0-255)
+            erosion_pixels: Number of pixels to erode (default 2)
+        Returns:
+            Eroded mask array (uint8)
+        """
+        if erosion_pixels <= 0:
+            return mask_array
+        # Use elliptical kernel for natural-looking erosion
+        kernel_size = max(2, erosion_pixels)
+        kernel = cv2.getStructuringElement(
+            cv2.MORPH_ELLIPSE,
+            (kernel_size, kernel_size)
+        )
+        # Apply erosion
+        eroded = cv2.erode(mask_array, kernel, iterations=1)
+        # Slight blur to smooth the eroded edges
+        eroded = cv2.GaussianBlur(eroded, (3, 3), 0)
+        logger.debug(f"Mask erosion applied: {erosion_pixels}px, kernel size: {kernel_size}")
+        return eroded
+    def _binarize_edge_alpha(
+        self,
+        alpha: np.ndarray,
+        mask_array: np.ndarray,
+        orig_array: np.ndarray,
+        threshold: float = 0.45
+    ) -> np.ndarray:
+        """
+        Binarize semi-transparent edge pixels to eliminate color bleeding.
+        Semi-transparent pixels at edges cause visible contamination because
+        they blend the original (potentially dark) foreground with the new
+        background. This method forces edge pixels to be either fully opaque
+        or fully transparent.
+        Args:
+            alpha: Current alpha channel (float32, 0.0-1.0)
+            mask_array: Original mask array (uint8, 0-255)
+            orig_array: Original foreground image array (uint8, RGB)
+            threshold: Alpha threshold for binarization decision (default 0.45)
+        Returns:
+            Modified alpha array with binarized edges (float32)
+        """
+        # Identify semi-transparent edge zone (not fully opaque, not fully transparent)
+        edge_zone = (alpha > 0.05) & (alpha < 0.95)
+        if not np.any(edge_zone):
+            return alpha
+        # Calculate local foreground luminance for adaptive thresholding
+        gray = np.mean(orig_array, axis=2)
+        # For dark foreground pixels, use slightly higher threshold
+        # to preserve more of the dark subject
+        is_dark = gray < self.DARK_LUMINANCE_THRESHOLD
+        # Create adaptive threshold map
+        adaptive_threshold = np.full_like(alpha, threshold)
+        adaptive_threshold[is_dark] = threshold + 0.1  # Keep more dark pixels
+        # Binarize: above threshold -> opaque, below -> transparent
+        alpha_binarized = alpha.copy()
+        # Pixels above threshold become fully opaque
+        make_opaque = edge_zone & (alpha > adaptive_threshold)
+        alpha_binarized[make_opaque] = 1.0
+        # Pixels below threshold become fully transparent
+        make_transparent = edge_zone & (alpha <= adaptive_threshold)
+        alpha_binarized[make_transparent] = 0.0
+        # Log statistics
+        num_opaque = np.sum(make_opaque)
+        num_transparent = np.sum(make_transparent)
+        logger.info(f"Edge binarization: {num_opaque} pixels -> opaque, {num_transparent} pixels -> transparent")
+        return alpha_binarized
+    def _apply_edge_cleanup(
+        self,
+        result_array: np.ndarray,
+        bg_array: np.ndarray,
+        alpha: np.ndarray,
+        cleanup_width: int = 2
+    ) -> np.ndarray:
+        """
+        Final cleanup pass to remove any remaining edge artifacts.
+        Detects remaining semi-transparent edges and replaces them with
+        either pure foreground or pure background colors.
+        Args:
+            result_array: Current blended result (uint8, RGB)
+            bg_array: Background image array (uint8, RGB)
+            alpha: Final alpha channel (float32, 0.0-1.0)
+            cleanup_width: Width of edge zone to clean (default 2)
+        Returns:
+            Cleaned result array (uint8)
+        """
+        # Find edge pixels that might still have artifacts
+        # These are pixels with alpha close to but not exactly 0 or 1
+        residual_edge = (alpha > 0.01) & (alpha < 0.99) & (alpha != 0.0) & (alpha != 1.0)
+        if not np.any(residual_edge):
+            return result_array
+        result_cleaned = result_array.copy()
+        # For residual edge pixels, snap to nearest pure state
+        snap_to_bg = residual_edge & (alpha < 0.5)
+        snap_to_fg = residual_edge & (alpha >= 0.5)
+        # Replace with background
+        result_cleaned[snap_to_bg] = bg_array[snap_to_bg]
+        # For foreground, keep original but ensure no blending artifacts
+        # (already handled by the blend, so no action needed for snap_to_fg)
+        num_cleaned = np.sum(residual_edge)
+        if num_cleaned > 0:
+            logger.debug(f"Edge cleanup: {num_cleaned} residual pixels cleaned")
+        return result_cleaned
+    def _remove_background_color_contamination(
+        self,
+        image_array: np.ndarray,
+        mask_array: np.ndarray,
+        orig_bg_color_lab: np.ndarray,
+        tolerance: float = 30.0
+    ) -> np.ndarray:
+        """
+        Remove original background color contamination from foreground pixels.
+        Scans the foreground area for pixels that match the original background
+        color and replaces them with nearby clean foreground colors.
+        Args:
+            image_array: Foreground image array (uint8, RGB)
+            mask_array: Mask array (uint8, 0-255)
+            orig_bg_color_lab: Original background color in Lab space
+            tolerance: DeltaE tolerance for detecting contaminated pixels
+        Returns:
+            Cleaned image array (uint8)
+        """
+        # Convert to Lab for color comparison
+        image_lab = cv2.cvtColor(image_array, cv2.COLOR_RGB2LAB).astype(np.float32)
+        # Only process foreground pixels (mask > 50)
+        foreground_mask = mask_array > 50
+        if not np.any(foreground_mask):
+            return image_array
+        # Calculate deltaE from original background color for all pixels
+        delta_l = image_lab[:, :, 0] - orig_bg_color_lab[0]
+        delta_a = image_lab[:, :, 1] - orig_bg_color_lab[1]
+        delta_b = image_lab[:, :, 2] - orig_bg_color_lab[2]
+        delta_e = np.sqrt(delta_l**2 + delta_a**2 + delta_b**2)
+        # Find contaminated pixels: in foreground but color similar to original background
+        contaminated = foreground_mask & (delta_e < tolerance)
+        if not np.any(contaminated):
+            logger.debug("No background color contamination detected in foreground")
+            return image_array
+        num_contaminated = np.sum(contaminated)
+        logger.info(f"Found {num_contaminated} pixels with background color contamination")
+        # Create output array
+        result = image_array.copy()
+        # For contaminated pixels, use inpainting to replace with surrounding colors
+        inpaint_mask = contaminated.astype(np.uint8) * 255
+        try:
+            # Use inpainting to fill contaminated areas with surrounding foreground colors
+            result = cv2.inpaint(result, inpaint_mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)
+            logger.info(f"Inpainted {num_contaminated} contaminated pixels")
+        except Exception as e:
+            logger.warning(f"Inpainting failed: {e}, using median filter fallback")
+            # Fallback: apply median filter to contaminated areas
+            median_filtered = cv2.medianBlur(image_array, 5)
+            result[contaminated] = median_filtered[contaminated]
+        return result
+    def _protect_foreground_core(
+        self,
+        result_array: np.ndarray,
+        orig_array: np.ndarray,
+        mask_array: np.ndarray,
+        protection_threshold: int = 140
+    ) -> np.ndarray:
+        """
+        Strongly protect core foreground pixels from any background influence.
+        For pixels with high mask confidence, directly use the original foreground
+        color without any blending, ensuring faces and bodies are not affected.
+        Args:
+            result_array: Current blended result (uint8, RGB)
+            orig_array: Original foreground image (uint8, RGB)
+            mask_array: Mask array (uint8, 0-255)
+            protection_threshold: Mask value above which pixels are fully protected
+        Returns:
+            Protected result array (uint8)
+        """
+        # Identify strongly protected foreground pixels
+        strong_foreground = mask_array >= protection_threshold
+        if not np.any(strong_foreground):
+            return result_array
+        # For these pixels, use original foreground color directly
+        result_protected = result_array.copy()
+        result_protected[strong_foreground] = orig_array[strong_foreground]
+        num_protected = np.sum(strong_foreground)
+        logger.info(f"Protected {num_protected} core foreground pixels from background influence")
+        return result_protected
+    def multi_scale_edge_refinement(
+        self,
+        original_image: Image.Image,
+        background_image: Image.Image,
+        mask: Image.Image
+    ) -> Image.Image:
+        """
+        Multi-scale edge refinement for better edge quality.
+        Uses image pyramid to handle edges at different scales.
+        Args:
+            original_image: Foreground PIL Image
+            background_image: Background PIL Image
+            mask: Current mask PIL Image
+        Returns:
+            Refined mask PIL Image
+        """
+        logger.info("🔍 Starting multi-scale edge refinement...")
+        try:
+            # Convert to numpy arrays
+            orig_array = np.array(original_image.convert('RGB'))
+            mask_array = np.array(mask).astype(np.float32)
+            height, width = mask_array.shape
+            # Define scales for pyramid
+            scales = [1.0, 0.5, 0.25]  # Original, half, quarter
+            scale_masks = []
+            scale_complexities = []
+            # Convert to grayscale for edge detection
+            gray = cv2.cvtColor(orig_array, cv2.COLOR_RGB2GRAY)
+            for scale in scales:
+                if scale == 1.0:
+                    scaled_gray = gray
+                    scaled_mask = mask_array
+                else:
+                    new_h = int(height * scale)
+                    new_w = int(width * scale)
+                    scaled_gray = cv2.resize(gray, (new_w, new_h), interpolation=cv2.INTER_LANCZOS4)
+                    scaled_mask = cv2.resize(mask_array, (new_w, new_h), interpolation=cv2.INTER_LANCZOS4)
+                # Compute local complexity using gradient standard deviation
+                sobel_x = cv2.Sobel(scaled_gray, cv2.CV_64F, 1, 0, ksize=3)
+                sobel_y = cv2.Sobel(scaled_gray, cv2.CV_64F, 0, 1, ksize=3)
+                gradient_mag = np.sqrt(sobel_x**2 + sobel_y**2)
+                # Calculate local complexity in 5x5 regions
+                kernel_size = 5
+                complexity = cv2.blur(gradient_mag, (kernel_size, kernel_size))
+                # Resize back to original size
+                if scale != 1.0:
+                    scaled_mask = cv2.resize(scaled_mask, (width, height), interpolation=cv2.INTER_LANCZOS4)
+                    complexity = cv2.resize(complexity, (width, height), interpolation=cv2.INTER_LANCZOS4)
+                scale_masks.append(scaled_mask)
+                scale_complexities.append(complexity)
+            # Compute weights based on complexity
+            # High complexity -> use high resolution mask
+            # Low complexity -> use low resolution mask (smoother)
+            weights = np.zeros((len(scales), height, width), dtype=np.float32)
+            # Normalize complexities
+            max_complexity = max(c.max() for c in scale_complexities) + 1e-6
+            normalized_complexities = [c / max_complexity for c in scale_complexities]
+            # Weight assignment: higher complexity at each scale means that scale is more reliable
+            for i, complexity in enumerate(normalized_complexities):
+                if i == 0:  # High resolution - prefer for high complexity regions
+                    weights[i] = complexity
+                elif i == 1:  # Medium resolution - moderate complexity
+                    weights[i] = 0.5 * (1 - complexity) + 0.5 * complexity * 0.5
+                else:  # Low resolution - prefer for low complexity regions
+                    weights[i] = 1 - complexity
+            # Normalize weights so they sum to 1 at each pixel
+            weight_sum = weights.sum(axis=0, keepdims=True) + 1e-6
+            weights = weights / weight_sum
+            # Weighted blend of masks from different scales
+            refined_mask = np.zeros((height, width), dtype=np.float32)
+            for i, mask_i in enumerate(scale_masks):
+                refined_mask += weights[i] * mask_i
+            # Clip and convert to uint8
+            refined_mask = np.clip(refined_mask, 0, 255).astype(np.uint8)
+            logger.info("✅ Multi-scale edge refinement completed")
+            return Image.fromarray(refined_mask, mode='L')
+        except Exception as e:
+            logger.error(f"❌ Multi-scale refinement failed: {e}, using original mask")
+            return mask
+    def simple_blend_images(
+        self,
+        original_image: Image.Image,
+        background_image: Image.Image,
+        combination_mask: Image.Image,
+        use_multi_scale: Optional[bool] = None,
+        feather_radius: int = 0
+    ) -> Image.Image:
+        """
+        Aggressive spill suppression + color replacement: completely eliminate yellow edge residue, maintain sharp edges
+        Args:
+            original_image: Foreground PIL Image
+            background_image: Background PIL Image
+            combination_mask: Mask PIL Image (L mode)
+            use_multi_scale: Override for multi-scale refinement (None = use class default)
+            feather_radius: Gaussian blur radius for mask feathering (0 = disabled, default behavior)
+        Returns:
+            Blended PIL Image
+        """
+        logger.info("🎨 Starting advanced image blending process...")
+        # Apply multi-scale edge refinement if enabled
+        should_use_multi_scale = use_multi_scale if use_multi_scale is not None else self.enable_multi_scale
+        if should_use_multi_scale:
+            combination_mask = self.multi_scale_edge_refinement(
+                original_image, background_image, combination_mask
+            )
+        # Convert to numpy arrays
+        orig_array = np.array(original_image, dtype=np.uint8)
+        bg_array = np.array(background_image, dtype=np.uint8)
+        mask_array = np.array(combination_mask, dtype=np.uint8)
+        # Apply feathering if requested
+        if feather_radius > 0:
+            kernel_size = feather_radius * 2 + 1
+            mask_array = cv2.GaussianBlur(
+                mask_array,
+                (kernel_size, kernel_size),
+                feather_radius / 2.0
+            )
+            logger.info(f"📐 Mask feathering applied: radius={feather_radius}, kernel={kernel_size}x{kernel_size}")
+        logger.info(f"📊 Image dimensions - Original: {orig_array.shape}, Background: {bg_array.shape}, Mask: {mask_array.shape}")
+        logger.info(f"📊 Mask statistics (before erosion) - Mean: {mask_array.mean():.1f}, Min: {mask_array.min()}, Max: {mask_array.max()}")
+        # === NEW: Apply mask erosion to remove contaminated edge pixels ===
+        mask_array = self._erode_mask_edges(mask_array, self.EDGE_EROSION_PIXELS)
+        logger.info(f"📊 Mask statistics (after erosion) - Mean: {mask_array.mean():.1f}, Min: {mask_array.min()}, Max: {mask_array.max()}")
+        # Enhanced parameters for better spill suppression
+        RING_WIDTH_PX = 4           # Increased ring width for better coverage
+        SPILL_STRENGTH = 0.85       # Stronger spill suppression
+        L_MATCH_STRENGTH = 0.65     # Stronger luminance matching
+        DELTAE_THRESHOLD = 18       # More aggressive contamination detection
+        HARD_EDGE_PROTECT = True    # Black edge protection
+        INPAINT_FALLBACK = True     # inpaint fallback repair
+        MULTI_PASS_CORRECTION = True # Enable multi-pass correction
+        # Estimate original background color and foreground representative color ===
+        height, width = orig_array.shape[:2]
+        # Take 15px from each side to estimate original background color
+        edge_width = 15
+        border_pixels = []
+        # Collect border pixels (excluding foreground areas)
+        border_mask = np.zeros((height, width), dtype=bool)
+        border_mask[:edge_width, :] = True  # Top edge
+        border_mask[-edge_width:, :] = True  # Bottom edge
+        border_mask[:, :edge_width] = True  # Left edge
+        border_mask[:, -edge_width:] = True  # Right edge
+        # Exclude foreground areas
+        fg_binary = mask_array > 50
+        border_mask = border_mask & (~fg_binary)
+        if np.any(border_mask):
+            border_pixels = orig_array[border_mask].reshape(-1, 3)
+            # Simplified background color estimation (no sklearn dependency)
+            try:
+                if len(border_pixels) > 100:
+                    # Use histogram to find mode colors
+                    # Quantize RGB to coarser grid to find main colors
+                    quantized = (border_pixels // 32) * 32  # 8-level quantization
+                    # Find most frequent color
+                    unique_colors, counts = np.unique(quantized.reshape(-1, quantized.shape[-1]),
+                                                    axis=0, return_counts=True)
+                    most_common_idx = np.argmax(counts)
+                    orig_bg_color_rgb = unique_colors[most_common_idx].astype(np.uint8)
+                else:
+                    orig_bg_color_rgb = np.median(border_pixels, axis=0).astype(np.uint8)
+            except:
+                # Fallback: use four corners average
+                corners = np.array([orig_array[0,0], orig_array[0,-1],
+                                  orig_array[-1,0], orig_array[-1,-1]])
+                orig_bg_color_rgb = np.mean(corners, axis=0).astype(np.uint8)
+        else:
+            orig_bg_color_rgb = np.array([200, 180, 120], dtype=np.uint8)  # Default yellow
+        # Convert to Lab space
+        orig_bg_color_lab = cv2.cvtColor(orig_bg_color_rgb.reshape(1,1,3), cv2.COLOR_RGB2LAB)[0,0].astype(np.float32)
+        logger.info(f"🎨 Detected original background color: RGB{tuple(orig_bg_color_rgb)}")
+        # Remove original background color contamination from foreground
+        orig_array = self._remove_background_color_contamination(
+            orig_array,
+            mask_array,
+            orig_bg_color_lab,
+            tolerance=self.BACKGROUND_COLOR_TOLERANCE
+        )
+        # Redefine trimap, optimized for cartoon characters
+        try:
+            kernel_3x3 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+            # FG_CORE: Reduce erosion iterations from 2 to 1 to avoid losing thin limbs
+            mask_eroded_once = cv2.erode(mask_array, kernel_3x3, iterations=1)
+            fg_core = mask_eroded_once > 127  # Adjustable parameter: erosion iterations
+            # RING: Use morphological gradient to redefine, ensuring only thin edge band
+            mask_dilated = cv2.dilate(mask_array, kernel_3x3, iterations=1)
+            mask_eroded = cv2.erode(mask_array, kernel_3x3, iterations=1)
+            # Ensure consistent data types to avoid overflow
+            morphological_gradient = cv2.subtract(mask_dilated, mask_eroded)
+            ring_zone = morphological_gradient > 0  # Areas with morphological gradient > 0 are edge bands
+            # BG: background area
+            bg_zone = mask_array < 30
+            logger.info(f"🔍 Trimap regions - FG_CORE: {fg_core.sum()}, RING: {ring_zone.sum()}, BG: {bg_zone.sum()}")
+        except Exception as e:
+            logger.error(f"❌ Trimap definition failed: {e}")
+            logger.error(f"📍 Traceback: {traceback.format_exc()}")
+            print(f"❌ TRIMAP ERROR: {e}")
+            print(f"Traceback: {traceback.format_exc()}")
+            # Fallback to simple definition
+            fg_core = mask_array > 200
+            ring_zone = (mask_array > 50) & (mask_array <= 200)
+            bg_zone = mask_array <= 50
+        # Foreground representative color: estimated from FG_CORE
+        if np.any(fg_core):
+            fg_pixels = orig_array[fg_core].reshape(-1, 3)
+            fg_rep_color_rgb = np.median(fg_pixels, axis=0).astype(np.uint8)
+        else:
+            fg_rep_color_rgb = np.array([80, 60, 40], dtype=np.uint8)  # Default dark
+        fg_rep_color_lab = cv2.cvtColor(fg_rep_color_rgb.reshape(1,1,3), cv2.COLOR_RGB2LAB)[0,0].astype(np.float32)
+        # Edge band spill suppression and repair
+        if np.any(ring_zone):
+            # Convert to Lab space
+            orig_lab = cv2.cvtColor(orig_array, cv2.COLOR_RGB2LAB).astype(np.float32)
+            orig_array_working = orig_array.copy().astype(np.float32)
+            # ΔE detect contaminated pixels
+            ring_pixels_lab = orig_lab[ring_zone]
+            # Calculate ΔE with original background color (simplified version)
+            delta_l = ring_pixels_lab[:, 0] - orig_bg_color_lab[0]
+            delta_a = ring_pixels_lab[:, 1] - orig_bg_color_lab[1]
+            delta_b = ring_pixels_lab[:, 2] - orig_bg_color_lab[2]
+            delta_e = np.sqrt(delta_l**2 + delta_a**2 + delta_b**2)
+            # Contaminated pixel mask
+            contaminated_mask = delta_e < DELTAE_THRESHOLD
+            if np.any(contaminated_mask):
+                # Calculate adaptive strength based on delta_e for each pixel
+                # Pixels closer to background color get stronger correction
+                contaminated_delta_e = delta_e[contaminated_mask]
+                # Adaptive strength formula: inverse relationship with delta_e
+                # Pixels very close to bg color (low delta_e) -> strong correction
+                # Pixels further from bg color (high delta_e) -> lighter correction
+                adaptive_strength = SPILL_STRENGTH * np.maximum(
+                    0.0,
+                    1.0 - (contaminated_delta_e / DELTAE_THRESHOLD)
+                )
+                # Clamp adaptive strength to reasonable range (30% - 100% of base strength)
+                min_strength = SPILL_STRENGTH * 0.3
+                adaptive_strength = np.clip(adaptive_strength, min_strength, SPILL_STRENGTH)
+                # Store for debug visualization
+                self._adaptive_strength_map = np.zeros_like(delta_e)
+                self._adaptive_strength_map[contaminated_mask] = adaptive_strength
+                logger.info(f"📊 Adaptive strength stats - Mean: {adaptive_strength.mean():.3f}, Min: {adaptive_strength.min():.3f}, Max: {adaptive_strength.max():.3f}")
+                # Chroma vector deprojection
+                bg_chroma = np.array([orig_bg_color_lab[1], orig_bg_color_lab[2]])
+                bg_chroma_norm = bg_chroma / (np.linalg.norm(bg_chroma) + 1e-6)
+                # Color correction for contaminated pixels
+                contaminated_pixels = ring_pixels_lab[contaminated_mask]
+                # Remove background chroma component with adaptive strength (per-pixel)
+                pixel_chroma = contaminated_pixels[:, 1:3]  # a, b channels
+                projection = np.dot(pixel_chroma, bg_chroma_norm)[:, np.newaxis] * bg_chroma_norm
+                # Apply adaptive strength per pixel
+                adaptive_strength_2d = adaptive_strength[:, np.newaxis]
+                corrected_chroma = pixel_chroma - projection * adaptive_strength_2d
+                # Converge toward foreground representative color with adaptive strength
+                convergence_factor = adaptive_strength_2d * 0.6
+                corrected_chroma = (corrected_chroma * (1 - convergence_factor) +
+                                  fg_rep_color_lab[1:3] * convergence_factor)
+                # Adaptive luminance matching
+                adaptive_l_strength = adaptive_strength * (L_MATCH_STRENGTH / SPILL_STRENGTH)
+                corrected_l = (contaminated_pixels[:, 0] * (1 - adaptive_l_strength) +
+                             fg_rep_color_lab[0] * adaptive_l_strength)
+                # Update Lab values
+                ring_pixels_lab[contaminated_mask, 0] = corrected_l
+                ring_pixels_lab[contaminated_mask, 1:3] = corrected_chroma
+                # Write back to original image
+                orig_lab[ring_zone] = ring_pixels_lab
+            # Dark edge protection
+            if HARD_EDGE_PROTECT:
+                gray = np.mean(orig_array, axis=2)
+                # Detect dark and high gradient areas
+                sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
+                sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
+                gradient_mag = np.sqrt(sobel_x**2 + sobel_y**2)
+                dark_edge_zone = ring_zone & (gray < 60) & (gradient_mag > 20)
+                # Protect these areas from excessive modification, copy directly from original
+                if np.any(dark_edge_zone):
+                    orig_lab[dark_edge_zone] = cv2.cvtColor(orig_array, cv2.COLOR_RGB2LAB)[dark_edge_zone]
+            # Multi-pass correction for stubborn spill
+            if MULTI_PASS_CORRECTION:
+                # Second pass for remaining contamination
+                ring_pixels_lab_pass2 = orig_lab[ring_zone]
+                delta_l_pass2 = ring_pixels_lab_pass2[:, 0] - orig_bg_color_lab[0]
+                delta_a_pass2 = ring_pixels_lab_pass2[:, 1] - orig_bg_color_lab[1]
+                delta_b_pass2 = ring_pixels_lab_pass2[:, 2] - orig_bg_color_lab[2]
+                delta_e_pass2 = np.sqrt(delta_l_pass2**2 + delta_a_pass2**2 + delta_b_pass2**2)
+                still_contaminated = delta_e_pass2 < (DELTAE_THRESHOLD * 0.8)
+                if np.any(still_contaminated):
+                    # Apply stronger correction to remaining contaminated pixels
+                    remaining_pixels = ring_pixels_lab_pass2[still_contaminated]
+                    # More aggressive chroma neutralization
+                    remaining_chroma = remaining_pixels[:, 1:3]
+                    neutralized_chroma = remaining_chroma * 0.3 + fg_rep_color_lab[1:3] * 0.7
+                    # Stronger luminance matching
+                    neutralized_l = remaining_pixels[:, 0] * 0.4 + fg_rep_color_lab[0] * 0.6
+                    ring_pixels_lab_pass2[still_contaminated, 0] = neutralized_l
+                    ring_pixels_lab_pass2[still_contaminated, 1:3] = neutralized_chroma
+                    orig_lab[ring_zone] = ring_pixels_lab_pass2
+            # Convert back to RGB
+            orig_lab_clipped = np.clip(orig_lab, 0, 255).astype(np.uint8)
+            orig_array_corrected = cv2.cvtColor(orig_lab_clipped, cv2.COLOR_LAB2RGB)
+            # inpaint fallback repair
+            if INPAINT_FALLBACK:
+                # inpaint still contaminated outermost pixels
+                final_contaminated = ring_zone.copy()
+                # Check if there's still contamination after repair
+                final_lab = cv2.cvtColor(orig_array_corrected, cv2.COLOR_RGB2LAB).astype(np.float32)
+                final_ring_lab = final_lab[ring_zone]
+                final_delta_l = final_ring_lab[:, 0] - orig_bg_color_lab[0]
+                final_delta_a = final_ring_lab[:, 1] - orig_bg_color_lab[1]
+                final_delta_b = final_ring_lab[:, 2] - orig_bg_color_lab[2]
+                final_delta_e = np.sqrt(final_delta_l**2 + final_delta_a**2 + final_delta_b**2)
+                still_contaminated = final_delta_e < (DELTAE_THRESHOLD * 0.5)
+                if np.any(still_contaminated):
+                    # Create inpaint mask
+                    inpaint_mask = np.zeros((height, width), dtype=np.uint8)
+                    ring_coords = np.where(ring_zone)
+                    inpaint_coords = (ring_coords[0][still_contaminated], ring_coords[1][still_contaminated])
+                    inpaint_mask[inpaint_coords] = 255
+                    # Execute inpaint
+                    try:
+                        orig_array_corrected = cv2.inpaint(orig_array_corrected, inpaint_mask, 3, cv2.INPAINT_TELEA)
+                    except:
+                        # Fallback: directly cover with foreground representative color
+                        orig_array_corrected[inpaint_coords] = fg_rep_color_rgb
+            orig_array = orig_array_corrected
+        # === Linear space blending (keep original logic) ===
+        def srgb_to_linear(img):
+            img_f = img.astype(np.float32) / 255.0
+            return np.where(img_f <= 0.04045, img_f / 12.92, np.power((img_f + 0.055) / 1.055, 2.4))
+        def linear_to_srgb(img):
+            img_clipped = np.clip(img, 0, 1)
+            return np.where(img_clipped <= 0.0031308,
+                           12.92 * img_clipped,
+                           1.055 * np.power(img_clipped, 1/2.4) - 0.055)
+        orig_linear = srgb_to_linear(orig_array)
+        bg_linear = srgb_to_linear(bg_array)
+        # Cartoon-optimized Alpha calculation
+        alpha = mask_array.astype(np.float32) / 255.0
+        # Core foreground region - fully opaque
+        alpha[fg_core] = 1.0
+        # Background region - fully transparent
+        alpha[bg_zone] = 0.0
+        # [Key Fix] Force pixels with mask≥160 to α=1.0, avoiding white fill areas being limited to 0.9
+        high_confidence_pixels = mask_array >= 160
+        alpha[high_confidence_pixels] = 1.0
+        logger.info(f"💯 High confidence pixels set to full opacity: {high_confidence_pixels.sum()}")
+        # Ring area can be dehaloed, but doesn't affect already set high confidence pixels
+        ring_without_high_conf = ring_zone & (~high_confidence_pixels)
+        alpha[ring_without_high_conf] = np.clip(alpha[ring_without_high_conf], 0.2, 0.9)
+        # Retain existing black outline/strong edge protection
+        orig_gray = np.mean(orig_array, axis=2)
+        # Detect strong edge areas
+        sobel_x = cv2.Sobel(orig_gray, cv2.CV_64F, 1, 0, ksize=3)
+        sobel_y = cv2.Sobel(orig_gray, cv2.CV_64F, 0, 1, ksize=3)
+        gradient_mag = np.sqrt(sobel_x**2 + sobel_y**2)
+        # Black outline/strong edge protection: nearly fully opaque
+        black_edge_threshold = 60  # black edge threshold
+        gradient_threshold = 25    # gradient threshold
+        strong_edges = (orig_gray < black_edge_threshold) & (gradient_mag > gradient_threshold) & (mask_array > 10)
+        alpha[strong_edges] = np.maximum(alpha[strong_edges], 0.995)  # black edge alpha
+        logger.info(f"🛡️ Protection applied - High conf: {high_confidence_pixels.sum()}, Strong edges: {strong_edges.sum()}")
+        # Apply edge alpha binarization to eliminate semi-transparent artifacts
+        alpha = self._binarize_edge_alpha(
+            alpha,
+            mask_array,
+            orig_array,
+            threshold=self.ALPHA_BINARIZE_THRESHOLD
+        )
+        # Final blending
+        alpha_3d = alpha[:, :, np.newaxis]
+        result_linear = orig_linear * alpha_3d + bg_linear * (1 - alpha_3d)
+        result_srgb = linear_to_srgb(result_linear)
+        result_array = (result_srgb * 255).astype(np.uint8)
+        # Final edge cleanup pass
+        result_array = self._apply_edge_cleanup(result_array, bg_array, alpha)
+        # Protect core foreground from any background influence
+        # This ensures faces and bodies retain original colors
+        result_array = self._protect_foreground_core(
+            result_array,
+            np.array(original_image, dtype=np.uint8),  # Use original unprocessed image
+            mask_array,
+            protection_threshold=self.FOREGROUND_PROTECTION_THRESHOLD
+        )
+        # Store debug information (for debug output)
+        self._debug_info = {
+            'orig_bg_color_rgb': orig_bg_color_rgb,
+            'fg_rep_color_rgb': fg_rep_color_rgb,
+            'orig_bg_color_lab': orig_bg_color_lab,
+            'fg_rep_color_lab': fg_rep_color_lab,
+            'ring_zone': ring_zone,
+            'fg_core': fg_core,
+            'alpha_final': alpha
+        }
+        return Image.fromarray(result_array)
+    def create_debug_images(
+        self,
+        original_image: Image.Image,
+        generated_background: Image.Image,
+        combination_mask: Image.Image,
+        combined_image: Image.Image
+    ) -> Dict[str, Image.Image]:
+        """
+        Generate debug images: (a) Final mask grayscale (b) Alpha heatmap (c) Ring visualization overlay
+        """
+        debug_images = {}
+        # Final Mask grayscale
+        debug_images["mask_gray"] = combination_mask.convert('L')
+        # Alpha Heatmap
+        mask_array = np.array(combination_mask.convert('L'))
+        heatmap_colored = cv2.applyColorMap(mask_array, cv2.COLORMAP_JET)
+        heatmap_rgb = cv2.cvtColor(heatmap_colored, cv2.COLOR_BGR2RGB)
+        debug_images["alpha_heatmap"] = Image.fromarray(heatmap_rgb)
+        # Ring visualization overlay - show ring areas on original image
+        if hasattr(self, '_debug_info') and 'ring_zone' in self._debug_info:
+            ring_zone = self._debug_info['ring_zone']
+            orig_array = np.array(original_image)
+            ring_overlay = orig_array.copy()
+            # Mark ring areas with red semi-transparent overlay
+            ring_overlay[ring_zone] = ring_overlay[ring_zone] * 0.7 + np.array([255, 0, 0]) * 0.3
+            debug_images["ring_visualization"] = Image.fromarray(ring_overlay.astype(np.uint8))
+        else:
+            # If no ring information, use original image
+            debug_images["ring_visualization"] = original_image
+        # Adaptive strength heatmap - visualize per-pixel correction strength
+        if hasattr(self, '_adaptive_strength_map') and self._adaptive_strength_map is not None:
+            # Normalize adaptive strength to 0-255 for visualization
+            strength_map = self._adaptive_strength_map
+            if strength_map.max() > 0:
+                normalized_strength = (strength_map / strength_map.max() * 255).astype(np.uint8)
+            else:
+                normalized_strength = np.zeros_like(strength_map, dtype=np.uint8)
+            # Apply colormap
+            strength_heatmap = cv2.applyColorMap(normalized_strength, cv2.COLORMAP_VIRIDIS)
+            strength_heatmap_rgb = cv2.cvtColor(strength_heatmap, cv2.COLOR_BGR2RGB)
+            debug_images["adaptive_strength_heatmap"] = Image.fromarray(strength_heatmap_rgb)
+        return debug_images
+    # INPAINTING-SPECIFIC BLENDING METHODS
+    def blend_inpainting(
+        self,
+        original: Image.Image,
+        generated: Image.Image,
+        mask: Image.Image,
+        feather_radius: int = 8,
+        apply_color_correction: bool = True
+    ) -> Image.Image:
+        """
+        Blend inpainted region with original image.
+        Specialized blending for inpainting that focuses on seamless integration
+        rather than foreground protection. Performs blending in linear color space
+        with optional adaptive color correction at boundaries.
+        Parameters
+        ----------
+        original : PIL.Image
+            Original image before inpainting
+        generated : PIL.Image
+            Generated/inpainted result from the model
+        mask : PIL.Image
+            Inpainting mask (white = inpainted area)
+        feather_radius : int
+            Feathering radius for smooth transitions
+        apply_color_correction : bool
+            Whether to apply adaptive color correction at boundaries
+        Returns
+        -------
+        PIL.Image
+            Blended result
+        """
+        logger.info(f"Inpainting blend: feather={feather_radius}, color_correction={apply_color_correction}")
+        # Ensure same size
+        if generated.size != original.size:
+            generated = generated.resize(original.size, Image.LANCZOS)
+        if mask.size != original.size:
+            mask = mask.resize(original.size, Image.LANCZOS)
+        # Convert to arrays
+        orig_array = np.array(original.convert('RGB')).astype(np.float32)
+        gen_array = np.array(generated.convert('RGB')).astype(np.float32)
+        mask_array = np.array(mask.convert('L')).astype(np.float32) / 255.0
+        # Apply feathering to mask
+        if feather_radius > 0:
+            scaled_radius = int(feather_radius * self.INPAINT_FEATHER_SCALE)
+            kernel_size = scaled_radius * 2 + 1
+            mask_array = cv2.GaussianBlur(
+                mask_array,
+                (kernel_size, kernel_size),
+                scaled_radius / 2
+            )
+        # Apply adaptive color correction if enabled
+        if apply_color_correction:
+            gen_array = self._apply_inpaint_color_correction(
+                orig_array, gen_array, mask_array
+            )
+        # sRGB to linear conversion for accurate blending
+        def srgb_to_linear(img):
+            img_norm = img / 255.0
+            return np.where(
+                img_norm <= 0.04045,
+                img_norm / 12.92,
+                np.power((img_norm + 0.055) / 1.055, 2.4)
+            )
+        def linear_to_srgb(img):
+            img_clipped = np.clip(img, 0, 1)
+            return np.where(
+                img_clipped <= 0.0031308,
+                12.92 * img_clipped,
+                1.055 * np.power(img_clipped, 1/2.4) - 0.055
+            )
+        # Convert to linear space
+        orig_linear = srgb_to_linear(orig_array)
+        gen_linear = srgb_to_linear(gen_array)
+        # Alpha blending in linear space
+        alpha = mask_array[:, :, np.newaxis]
+        result_linear = gen_linear * alpha + orig_linear * (1 - alpha)
+        # Convert back to sRGB
+        result_srgb = linear_to_srgb(result_linear)
+        result_array = (result_srgb * 255).astype(np.uint8)
+        logger.debug("Inpainting blend completed in linear color space")
+        return Image.fromarray(result_array)
+    def _apply_inpaint_color_correction(
+        self,
+        original: np.ndarray,
+        generated: np.ndarray,
+        mask: np.ndarray
+    ) -> np.ndarray:
+        """
+        Apply adaptive color correction to match generated region with surroundings.
+        Analyzes the boundary region and adjusts the generated content's
+        luminance and color to better match the original context.
+        Parameters
+        ----------
+        original : np.ndarray
+            Original image (float32, 0-255)
+        generated : np.ndarray
+            Generated image (float32, 0-255)
+        mask : np.ndarray
+            Blend mask (float32, 0-1)
+        Returns
+        -------
+        np.ndarray
+            Color-corrected generated image
+        """
+        # Find boundary region
+        mask_binary = (mask > 0.5).astype(np.uint8)
+        kernel = cv2.getStructuringElement(
+            cv2.MORPH_ELLIPSE,
+            (self.INPAINT_COLOR_BLEND_RADIUS * 2 + 1, self.INPAINT_COLOR_BLEND_RADIUS * 2 + 1)
+        )
+        dilated = cv2.dilate(mask_binary, kernel, iterations=1)
+        boundary_zone = (dilated > 0) & (mask < 0.3)
+        if not np.any(boundary_zone):
+            return generated
+        # Convert to Lab for perceptual color matching
+        orig_lab = cv2.cvtColor(
+            original.astype(np.uint8), cv2.COLOR_RGB2LAB
+        ).astype(np.float32)
+        gen_lab = cv2.cvtColor(
+            generated.astype(np.uint8), cv2.COLOR_RGB2LAB
+        ).astype(np.float32)
+        # Calculate statistics in boundary zone (original)
+        boundary_orig_l = orig_lab[boundary_zone, 0]
+        boundary_orig_a = orig_lab[boundary_zone, 1]
+        boundary_orig_b = orig_lab[boundary_zone, 2]
+        orig_mean_l = np.median(boundary_orig_l)
+        orig_mean_a = np.median(boundary_orig_a)
+        orig_mean_b = np.median(boundary_orig_b)
+        # Calculate statistics in generated inpaint region
+        inpaint_zone = mask > 0.5
+        if not np.any(inpaint_zone):
+            return generated
+        gen_inpaint_l = gen_lab[inpaint_zone, 0]
+        gen_inpaint_a = gen_lab[inpaint_zone, 1]
+        gen_inpaint_b = gen_lab[inpaint_zone, 2]
+        gen_mean_l = np.median(gen_inpaint_l)
+        gen_mean_a = np.median(gen_inpaint_a)
+        gen_mean_b = np.median(gen_inpaint_b)
+        # Calculate correction deltas
+        delta_l = orig_mean_l - gen_mean_l
+        delta_a = orig_mean_a - gen_mean_a
+        delta_b = orig_mean_b - gen_mean_b
+        # Limit correction to avoid over-adjustment
+        max_correction = 15
+        delta_l = np.clip(delta_l, -max_correction, max_correction)
+        delta_a = np.clip(delta_a, -max_correction * 0.5, max_correction * 0.5)
+        delta_b = np.clip(delta_b, -max_correction * 0.5, max_correction * 0.5)
+        logger.debug(f"Color correction deltas: L={delta_l:.1f}, a={delta_a:.1f}, b={delta_b:.1f}")
+        # Apply correction with spatial falloff from boundary
+        # Create distance map from boundary
+        distance = cv2.distanceTransform(
+            mask_binary, cv2.DIST_L2, 5
+        )
+        max_dist = np.max(distance)
+        if max_dist > 0:
+            # Correction strength falls off from boundary toward center
+            correction_strength = 1.0 - np.clip(distance / (max_dist * 0.5), 0, 1)
+        else:
+            correction_strength = np.ones_like(distance)
+        # Apply correction to Lab channels
+        corrected_lab = gen_lab.copy()
+        corrected_lab[:, :, 0] += delta_l * correction_strength * 0.7
+        corrected_lab[:, :, 1] += delta_a * correction_strength * 0.5
+        corrected_lab[:, :, 2] += delta_b * correction_strength * 0.5
+        # Clip to valid Lab ranges
+        corrected_lab[:, :, 0] = np.clip(corrected_lab[:, :, 0], 0, 255)
+        corrected_lab[:, :, 1] = np.clip(corrected_lab[:, :, 1], 0, 255)
+        corrected_lab[:, :, 2] = np.clip(corrected_lab[:, :, 2], 0, 255)
+        # Convert back to RGB
+        corrected_rgb = cv2.cvtColor(
+            corrected_lab.astype(np.uint8), cv2.COLOR_LAB2RGB
+        ).astype(np.float32)
+        return corrected_rgb
+    def blend_inpainting_with_guided_filter(
+        self,
+        original: Image.Image,
+        generated: Image.Image,
+        mask: Image.Image,
+        feather_radius: int = 8,
+        guide_radius: int = 8,
+        guide_eps: float = 0.01
+    ) -> Image.Image:
+        """
+        Blend inpainted region using guided filter for edge-aware transitions.
+        Combines standard alpha blending with guided filtering to preserve
+        edges in the original image while seamlessly integrating new content.
+        Parameters
+        ----------
+        original : PIL.Image
+            Original image
+        generated : PIL.Image
+            Generated/inpainted result
+        mask : PIL.Image
+            Inpainting mask
+        feather_radius : int
+            Base feathering radius
+        guide_radius : int
+            Guided filter radius
+        guide_eps : float
+            Guided filter regularization
+        Returns
+        -------
+        PIL.Image
+            Blended result with edge-aware transitions
+        """
+        logger.info("Applying guided filter inpainting blend")
+        # Ensure same size
+        if generated.size != original.size:
+            generated = generated.resize(original.size, Image.LANCZOS)
+        if mask.size != original.size:
+            mask = mask.resize(original.size, Image.LANCZOS)
+        # Convert to arrays
+        orig_array = np.array(original.convert('RGB')).astype(np.float32)
+        gen_array = np.array(generated.convert('RGB')).astype(np.float32)
+        mask_array = np.array(mask.convert('L')).astype(np.float32) / 255.0
+        # Apply base feathering
+        if feather_radius > 0:
+            kernel_size = feather_radius * 2 + 1
+            mask_feathered = cv2.GaussianBlur(
+                mask_array,
+                (kernel_size, kernel_size),
+                feather_radius / 2
+            )
+        else:
+            mask_feathered = mask_array
+        # Use original image as guide for the filter
+        guide = cv2.cvtColor(orig_array.astype(np.uint8), cv2.COLOR_RGB2GRAY)
+        guide = guide.astype(np.float32) / 255.0
+        # Apply guided filter to the mask
+        try:
+            mask_guided = cv2.ximgproc.guidedFilter(
+                guide=guide,
+                src=mask_feathered,
+                radius=guide_radius,
+                eps=guide_eps
+            )
+            logger.debug("Guided filter applied successfully")
+        except Exception as e:
+            logger.warning(f"Guided filter failed: {e}, using standard feathering")
+            mask_guided = mask_feathered
+        # Alpha blending
+        alpha = mask_guided[:, :, np.newaxis]
+        result = gen_array * alpha + orig_array * (1 - alpha)
+        result = np.clip(result, 0, 255).astype(np.uint8)
+        return Image.fromarray(result)

mask_generator.py ADDED Viewed

	@@ -0,0 +1,648 @@

+import cv2
+import numpy as np
+import traceback
+from PIL import Image, ImageFilter, ImageDraw
+import logging
+from typing import Optional, Tuple
+from scipy.ndimage import binary_erosion, binary_dilation
+import io
+import gc
+import torch
+from transformers import AutoModelForImageSegmentation
+from torchvision import transforms
+from rembg import remove, new_session
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+class MaskGenerator:
+    """
+    Intelligent mask generation using deep learning models with traditional fallback.
+    Priority: BiRefNet > U²-Net (rembg) > Traditional gradient-based methods
+    """
+    def __init__(self, max_image_size: int = 1024, device: str = "auto"):
+        self.max_image_size = max_image_size
+        self.device = self._setup_device(device)
+        # BiRefNet model (lazy loading)
+        self._birefnet_model = None
+        self._birefnet_transform = None
+        # Log initialization
+        logger.info(f"🎭 MaskGenerator initialized on {self.device}")
+    def _setup_device(self, device: str) -> str:
+        """Setup computation device"""
+        if device == "auto":
+            if torch.cuda.is_available():
+                return "cuda"
+            elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+                return "mps"
+            return "cpu"
+        return device
+    def _load_birefnet_model(self) -> bool:
+        """
+        Lazy load BiRefNet model for memory efficiency.
+        Returns True if model loaded successfully, False otherwise.
+        """
+        if self._birefnet_model is not None:
+            return True
+        try:
+            logger.info("📥 Loading BiRefNet model (ZhengPeng7/BiRefNet)...")
+            # Load model with fp16 for memory efficiency on GPU
+            dtype = torch.float16 if self.device == "cuda" else torch.float32
+            self._birefnet_model = AutoModelForImageSegmentation.from_pretrained(
+                "ZhengPeng7/BiRefNet",
+                trust_remote_code=True,
+                torch_dtype=dtype
+            )
+            self._birefnet_model.to(self.device)
+            self._birefnet_model.eval()
+            # Define preprocessing transform
+            self._birefnet_transform = transforms.Compose([
+                transforms.Resize((1024, 1024)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+            ])
+            logger.info("✅ BiRefNet model loaded successfully")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Failed to load BiRefNet: {e}")
+            self._birefnet_model = None
+            self._birefnet_transform = None
+            return False
+    def _unload_birefnet_model(self):
+        """Unload BiRefNet model to free memory"""
+        if self._birefnet_model is not None:
+            del self._birefnet_model
+            self._birefnet_model = None
+            self._birefnet_transform = None
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            gc.collect()
+            logger.info("🧹 BiRefNet model unloaded")
+    def apply_guided_filter(
+        self,
+        mask: np.ndarray,
+        guide_image: Image.Image,
+        radius: int = 8,
+        eps: float = 0.01
+    ) -> np.ndarray:
+        """
+        Apply guided filter to mask for edge-preserving smoothing.
+        Falls back to Gaussian blur if guided filter is not available.
+        Args:
+            mask: Input mask as numpy array (0-255)
+            guide_image: Original image to use as guide
+            radius: Filter radius (larger = more smoothing)
+            eps: Regularization parameter (smaller = more edge-preserving)
+        Returns:
+            Filtered mask as numpy array (0-255)
+        """
+        try:
+            # Convert guide image to grayscale
+            guide_gray = np.array(guide_image.convert('L')).astype(np.float32) / 255.0
+            mask_float = mask.astype(np.float32) / 255.0
+            logger.info(f"🔧 Applying guided filter (radius={radius}, eps={eps})")
+            # Apply guided filter
+            filtered = cv2.ximgproc.guidedFilter(
+                guide=guide_gray,
+                src=mask_float,
+                radius=radius,
+                eps=eps
+            )
+            # Convert back to 0-255 range
+            result = (np.clip(filtered, 0, 1) * 255).astype(np.uint8)
+            logger.info("✅ Guided filter applied successfully")
+            return result
+        except Exception as e:
+            logger.error(f"❌ Guided filter failed: {e}, using original mask")
+            return mask
+    def try_birefnet_mask(self, original_image: Image.Image) -> Optional[Image.Image]:
+        """
+        Generate foreground mask using BiRefNet model.
+        BiRefNet provides high-quality segmentation with clean edges.
+        Args:
+            original_image: Input PIL Image
+        Returns:
+            PIL Image (L mode) mask or None if failed
+        """
+        try:
+            # Lazy load model
+            if not self._load_birefnet_model():
+                return None
+            logger.info("🤖 Starting BiRefNet foreground extraction...")
+            original_size = original_image.size
+            # Convert to RGB if needed
+            if original_image.mode != 'RGB':
+                image_rgb = original_image.convert('RGB')
+            else:
+                image_rgb = original_image
+            # Preprocess image
+            input_tensor = self._birefnet_transform(image_rgb).unsqueeze(0)
+            # Move to device with appropriate dtype
+            if self.device == "cuda":
+                input_tensor = input_tensor.to(self.device, dtype=torch.float16)
+            else:
+                input_tensor = input_tensor.to(self.device)
+            # Run inference
+            with torch.no_grad():
+                outputs = self._birefnet_model(input_tensor)
+                # BiRefNet outputs a list, get the final prediction
+                if isinstance(outputs, (list, tuple)):
+                    pred = outputs[-1]
+                else:
+                    pred = outputs
+                # Sigmoid to get probability map
+                pred = torch.sigmoid(pred)
+                # Convert to numpy
+                pred_np = pred.squeeze().cpu().numpy()
+            # Convert to 0-255 range
+            mask_array = (pred_np * 255).astype(np.uint8)
+            # Resize back to original size
+            mask_pil = Image.fromarray(mask_array, mode='L')
+            mask_pil = mask_pil.resize(original_size, Image.LANCZOS)
+            mask_array = np.array(mask_pil)
+            # Quality check
+            mean_val = mask_array.mean()
+            nonzero_ratio = np.count_nonzero(mask_array > 50) / mask_array.size
+            logger.info(f"📊 BiRefNet mask stats - Mean: {mean_val:.1f}, Coverage: {nonzero_ratio:.1%}")
+            if mean_val < 10:
+                logger.warning("⚠️ BiRefNet mask too weak, falling back")
+                return None
+            if nonzero_ratio < 0.03:
+                logger.warning("⚠️ BiRefNet foreground coverage too low, falling back")
+                return None
+            # Light post-processing for edge refinement
+            # Use morphological operations to clean up
+            kernel_small = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+            mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel_small)
+            logger.info("✅ BiRefNet mask generation successful!")
+            return Image.fromarray(mask_array, mode='L')
+        except torch.cuda.OutOfMemoryError:
+            logger.error("❌ BiRefNet: GPU memory exhausted")
+            self._unload_birefnet_model()
+            return None
+        except Exception as e:
+            logger.error(f"❌ BiRefNet mask generation failed: {e}")
+            logger.error(f"📍 Traceback: {traceback.format_exc()}")
+            return None
+    def try_deep_learning_mask(self, original_image: Image.Image) -> Optional[Image.Image]:
+        """
+        Intelligent foreground extraction with model priority:
+        1. BiRefNet (best quality, clean edges)
+        2. U²-Net via rembg (good fallback)
+        3. Return None to trigger traditional methods
+        Args:
+            original_image: Input PIL Image
+        Returns:
+            PIL Image (L mode) mask or None if all methods failed
+        """
+        # Priority 1: Try BiRefNet first
+        logger.info("🤖 Attempting BiRefNet mask generation...")
+        birefnet_mask = self.try_birefnet_mask(original_image)
+        if birefnet_mask is not None:
+            logger.info("✅ Using BiRefNet generated mask")
+            return birefnet_mask
+        # Priority 2: Fallback to rembg (U²-Net)
+        logger.info("🔄 BiRefNet unavailable/failed, trying rembg...")
+        try:
+            logger.info("🤖 Starting rembg foreground extraction")
+            # Try u2net first (better for cartoons/objects like Snoopy)
+            try:
+                session = new_session('u2net')
+                logger.info("✅ Using u2net model")
+            except Exception as e:
+                logger.warning(f"u2net failed ({e}), trying u2net_human_seg")
+                try:
+                    session = new_session('u2net_human_seg')
+                    logger.info("✅ Using u2net_human_seg model")
+                except Exception as e2:
+                    logger.error(f"All rembg models failed: {e2}")
+                    return None
+            # Convert image to bytes for rembg
+            img_byte_arr = io.BytesIO()
+            original_image.save(img_byte_arr, format='PNG')
+            img_byte_arr = img_byte_arr.getvalue()
+            logger.info(f"📷 Image size: {len(img_byte_arr)} bytes")
+            # Perform background removal
+            result = remove(img_byte_arr, session=session)
+            result_img = Image.open(io.BytesIO(result)).convert('RGBA')
+            alpha_channel = result_img.split()[-1]
+            alpha_array = np.array(alpha_channel)
+            logger.info(f"📊 Raw alpha stats - Mean: {alpha_array.mean():.1f}, Min: {alpha_array.min()}, Max: {alpha_array.max()}")
+            # Step 1: Light smoothing to reduce noise but preserve edges
+            alpha_smoothed = cv2.GaussianBlur(alpha_array, (3, 3), 0.8)
+            # Step 2: Contrast stretching to utilize full range
+            alpha_stretched = cv2.normalize(alpha_smoothed, None, 0, 255, cv2.NORM_MINMAX)
+            # Step 3: CRITICAL FIX - More aggressive foreground preservation
+            # Instead of hard threshold, use adaptive approach
+            # Find the main subject area (high confidence regions)
+            high_confidence = alpha_stretched > 180
+            medium_confidence = (alpha_stretched > 60) & (alpha_stretched <= 180)
+            low_confidence = (alpha_stretched > 15) & (alpha_stretched <= 60)
+            # Create final mask with better extremity handling
+            final_alpha = np.zeros_like(alpha_stretched)
+            # High confidence areas - keep at full opacity
+            final_alpha[high_confidence] = 255
+            # Medium confidence - boost significantly
+            final_alpha[medium_confidence] = np.clip(alpha_stretched[medium_confidence] * 1.8, 200, 255)
+            # Low confidence - moderate boost (catches faint extremities)
+            final_alpha[low_confidence] = np.clip(alpha_stretched[low_confidence] * 2.5, 120, 199)
+            # Morphological operations to connect disconnected parts (hands, feet, tail)
+            kernel_small = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+            kernel_medium = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+            # Close small gaps (helps connect separated body parts)
+            final_alpha = cv2.morphologyEx(final_alpha, cv2.MORPH_CLOSE, kernel_small, iterations=1)
+            # Light dilation to ensure nothing gets cut off
+            final_alpha = cv2.dilate(final_alpha, kernel_small, iterations=1)
+            logger.info(f"📊 Final alpha stats - Mean: {final_alpha.mean():.1f}, Min: {final_alpha.min()}, Max: {final_alpha.max()}")
+            # Quality check - but be more lenient for cartoon characters
+            if final_alpha.mean() < 10:
+                logger.warning("⚠️ Alpha still too weak, falling back to traditional method")
+                return None
+            # Enhanced post-processing for cartoon characters
+            is_cartoon = self._detect_cartoon_character(original_image, final_alpha)
+            if is_cartoon:
+                logger.info("🎭 Detected cartoon/character image, applying specialized processing")
+                final_alpha = self._enhance_cartoon_mask(original_image, final_alpha)
+            # Count non-zero pixels to ensure we have substantial foreground
+            foreground_pixels = np.count_nonzero(final_alpha > 50)
+            total_pixels = final_alpha.size
+            foreground_ratio = foreground_pixels / total_pixels
+            logger.info(f"📊 Foreground coverage: {foreground_ratio:.1%} of image")
+            if foreground_ratio < 0.05:  # Less than 5% is probably too little
+                logger.warning("⚠️ Very low foreground coverage, falling back to traditional method")
+                return None
+            mask = Image.fromarray(final_alpha.astype(np.uint8), mode='L')
+            logger.info("✅ Enhanced rembg mask generation successful!")
+            return mask
+        except Exception as e:
+            logger.error(f"❌ Deep learning mask extraction failed: {e}")
+            return None
+    def _detect_cartoon_character(self, original_image: Image.Image, alpha_mask: np.ndarray) -> bool:
+        """
+        Detect if image is cartoon/line art (heuristic approach)
+        """
+        try:
+            img_array = np.array(original_image.convert('RGB'))
+            gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+            # Calculate edge density (cartoons usually have more clear edges)
+            edges = cv2.Canny(gray, 50, 150)
+            edge_density = np.count_nonzero(edges) / max(edges.size, 1)  # Avoid division by zero
+            # Calculate color complexity (cartoons usually have fewer colors) - optimize memory usage
+            h, w, c = img_array.shape
+            if h * w > 100000:  # If image is too large, resize for processing
+                small_img = cv2.resize(img_array, (200, 200))
+            else:
+                small_img = img_array
+            unique_colors = len(np.unique(small_img.reshape(-1, 3), axis=0))
+            total_pixels = small_img.shape[0] * small_img.shape[1]
+            color_simplicity = unique_colors < (total_pixels * 0.1)
+            # Check for obvious black outlines
+            dark_pixels_ratio = np.count_nonzero(gray < 50) / max(gray.size, 1)  # Avoid division by zero
+            has_black_outline = dark_pixels_ratio > 0.05
+            # Comprehensive judgment: high edge density + color simplicity + black outline = likely cartoon
+            is_cartoon = (edge_density > 0.05) and (color_simplicity or has_black_outline)
+            logger.info(f"🔍 Cartoon detection - Edge density: {edge_density:.3f}, Color simplicity: {color_simplicity}, Black outline: {has_black_outline} -> Cartoon: {is_cartoon}")
+            return is_cartoon
+        except Exception as e:
+            logger.error(f"❌ Cartoon detection failed: {e}")
+            logger.error(f"📍 Traceback: {traceback.format_exc()}")
+            print(f"❌ CARTOON DETECTION ERROR: {e}")
+            print(f"Traceback: {traceback.format_exc()}")
+            return False
+    def _enhance_cartoon_mask(self, original_image: Image.Image, alpha_mask: np.ndarray) -> np.ndarray:
+        """
+        Enhanced mask processing for cartoon characters
+        """
+        try:
+            img_array = np.array(original_image.convert('RGB'))
+            gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+            enhanced_alpha = alpha_mask.copy()
+            # Step 1: Black outline enhancement - find black outlines and enhance their alpha
+            th_dark = 80  # Adjustable parameter: black threshold
+            black_outline = gray < th_dark
+            # Dilate black outline region by 1px
+            kernel_dilate = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))  # Adjustable parameter: dilation kernel size
+            black_outline_dilated = cv2.dilate(black_outline.astype(np.uint8), kernel_dilate, iterations=1)
+            # Set black outline region alpha directly to 255
+            enhanced_alpha[black_outline_dilated > 0] = 255
+            logger.info(f"🖤 Black outline enhanced: {np.count_nonzero(black_outline_dilated)} pixels")
+            # Step 2: Simplified internal enhancement - process white fill areas within outlines
+            # Find high confidence regions (alpha ≥ 160)
+            high_confidence = enhanced_alpha >= 160
+            # Apply close operation on high confidence regions to connect separated parts
+            kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))  # Adjustable parameter: close kernel size
+            high_confidence_closed = cv2.morphologyEx(high_confidence.astype(np.uint8), cv2.MORPH_CLOSE, kernel_close, iterations=1)
+            # Simplified approach: directly enhance medium confidence regions without complex flood fill
+            # Find medium/low confidence regions surrounded by high confidence regions
+            medium_confidence = (enhanced_alpha >= 80) & (enhanced_alpha < 160)
+            # Dilate high confidence region to include more internal areas
+            kernel_dilate_internal = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
+            high_confidence_expanded = cv2.dilate(high_confidence_closed, kernel_dilate_internal, iterations=1)
+            # Medium confidence pixels within expanded high confidence areas are considered internal fill
+            internal_fill_regions = medium_confidence & (high_confidence_expanded > 0)
+            # Enhance alpha of these internal fill regions to at least 220
+            min_alpha_for_fill = 220  # Adjustable parameter: minimum alpha for internal fill
+            enhanced_alpha[internal_fill_regions] = np.maximum(enhanced_alpha[internal_fill_regions], min_alpha_for_fill)
+            logger.info(f"🤍 Internal fill regions enhanced: {np.count_nonzero(internal_fill_regions)} pixels")
+            logger.info(f"📊 Enhanced alpha stats - Mean: {enhanced_alpha.mean():.1f}, Min: {enhanced_alpha.min()}, Max: {enhanced_alpha.max()}")
+            return enhanced_alpha
+        except Exception as e:
+            logger.error(f"❌ Cartoon mask enhancement failed: {e}")
+            logger.error(f"📍 Traceback: {traceback.format_exc()}")
+            print(f"❌ CARTOON MASK ENHANCEMENT ERROR: {e}")
+            print(f"Traceback: {traceback.format_exc()}")
+            return alpha_mask
+    def _adjust_mask_for_scene_focus(self, mask: Image.Image, original_image: Image.Image) -> Image.Image:
+        """
+        Adjust mask for scene focus mode to include nearby objects like chairs, furniture
+        """
+        try:
+            logger.info("🏠 Adjusting mask for scene focus mode...")
+            mask_array = np.array(mask)
+            img_array = np.array(original_image.convert('RGB'))
+            # Expand mask to include nearby objects
+            # Use larger dilation kernel to include furniture/objects
+            kernel_large = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
+            expanded_mask = cv2.dilate(mask_array, kernel_large, iterations=2)
+            # Find contours in the expanded area to detect objects
+            gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+            edges = cv2.Canny(gray, 30, 100)
+            # Apply edge detection only in the expanded region
+            expanded_region = (expanded_mask > 0) & (mask_array == 0)
+            object_edges = np.zeros_like(edges)
+            object_edges[expanded_region] = edges[expanded_region]
+            # Close gaps to form complete objects
+            kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
+            object_mask = cv2.morphologyEx(object_edges, cv2.MORPH_CLOSE, kernel_close)
+            object_mask = cv2.dilate(object_mask, kernel_close, iterations=1)
+            # Combine with original mask
+            final_mask = np.maximum(mask_array, object_mask)
+            logger.info("✅ Scene focus adjustment completed")
+            return Image.fromarray(final_mask)
+        except Exception as e:
+            logger.error(f"❌ Scene focus adjustment failed: {e}")
+            return mask
+    def create_gradient_based_mask(self, original_image: Image.Image, mode: str = "center", focus_mode: str = "person") -> Image.Image:
+        """
+        Intelligent foreground extraction: prioritize deep learning models, fallback to traditional methods
+        Focus mode: 'person' for tight crop around person, 'scene' for including nearby objects
+        """
+        width, height = original_image.size
+        logger.info(f"🎯 Creating mask for {width}x{height} image, mode: {mode}, focus: {focus_mode}")
+        if mode == "center":
+            # Try using deep learning models for intelligent foreground extraction
+            logger.info("🤖 Attempting deep learning mask generation...")
+            dl_mask = self.try_deep_learning_mask(original_image)
+            if dl_mask is not None:
+                logger.info("✅ Using deep learning generated mask")
+                # Apply focus mode adjustments to deep learning mask
+                if focus_mode == "scene":
+                    dl_mask = self._adjust_mask_for_scene_focus(dl_mask, original_image)
+                return dl_mask
+            # Fallback to traditional method
+            logger.info("🔄 Deep learning failed, using traditional gradient-based method")
+            img_array = np.array(original_image.convert('RGB'))
+            gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+            # First-order derivatives: use Sobel operator for edge detection
+            grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
+            grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
+            gradient_magnitude = np.sqrt(grad_x**2 + grad_y**2)
+            # Second-order derivatives: use Laplacian operator for texture change detection
+            laplacian = cv2.Laplacian(gray, cv2.CV_64F, ksize=3)
+            laplacian_abs = np.abs(laplacian)
+            # Combine first and second order derivatives
+            combined_edges = gradient_magnitude * 0.7 + laplacian_abs * 0.3
+            combined_edges = (combined_edges / np.max(combined_edges)) * 255
+            # Threshold processing to find strong edges
+            _, edge_binary = cv2.threshold(combined_edges.astype(np.uint8), 20, 255, cv2.THRESH_BINARY)
+            # Morphological operations to connect edges
+            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+            edge_binary = cv2.morphologyEx(edge_binary, cv2.MORPH_CLOSE, kernel)
+            # Find contours and create mask
+            contours, _ = cv2.findContours(edge_binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            if contours:
+                # Find largest contour (main subject)
+                largest_contour = max(contours, key=cv2.contourArea)
+                contour_mask = np.zeros((height, width), dtype=np.uint8)
+                cv2.fillPoly(contour_mask, [largest_contour], 255)
+                # Create foreground enhancement mask: specially protect dark regions
+                dark_mask = (gray < 90).astype(np.uint8) * 255
+                morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
+                dark_mask = cv2.morphologyEx(dark_mask, cv2.MORPH_CLOSE, morph_kernel, iterations=1)
+                dark_mask = cv2.dilate(dark_mask, morph_kernel, iterations=2)
+                contour_mask = cv2.bitwise_or(contour_mask, dark_mask)
+                # Get core foreground: clean holes and fill gaps
+                close_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
+                core_mask = cv2.morphologyEx(contour_mask, cv2.MORPH_CLOSE, close_kernel, iterations=1)
+                open_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+                core_mask = cv2.morphologyEx(core_mask, cv2.MORPH_OPEN, open_kernel, iterations=1)
+                # Convert to binary core (0/255)
+                _, core_binary = cv2.threshold(core_mask, 127, 255, cv2.THRESH_BINARY)
+                # Keep only slight dilation to avoid foreground being eaten
+                dilate_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+                core_binary = cv2.dilate(core_binary, dilate_kernel, iterations=1)
+                # Distance transform feathering: shrink feathering range for sharp edges
+                FEATHER_PX = 4
+                # Calculate distance transform
+                core_float = core_binary.astype(np.float32) / 255.0
+                distances = cv2.distanceTransform((1 - core_float).astype(np.uint8), cv2.DIST_L2, 5)
+                # Create feathering mask: 0→FEATHER_PX linear mapping to 1→0
+                feather_mask = np.ones_like(distances)
+                edge_region = (distances > 0) & (distances <= FEATHER_PX)
+                feather_mask[edge_region] = 1.0 - (distances[edge_region] / FEATHER_PX)
+                feather_mask[distances > FEATHER_PX] = 0.0
+                # Apply double-smoothstep curve: make transition steeper, reduce semi-transparent halos
+                def double_smoothstep(t):
+                    t = np.clip(t, 0, 1)
+                    s1 = t * t * (3 - 2 * t)
+                    return s1 * s1 * (3 - 2 * s1)  # Equivalent to t^3 (10 - 15t + 6t^2)
+                # Combine core with feathering: core area keeps 255, edges use double_smoothstep feathering
+                final_alpha = np.zeros_like(distances)
+                final_alpha[core_binary > 127] = 1.0  # Core area
+                final_alpha[edge_region] = double_smoothstep(feather_mask[edge_region])  # Feathering area
+                # Convert to 0-255 range
+                final_mask = (final_alpha * 255).astype(np.uint8)
+                # Apply guided filter for edge-preserving smoothing
+                final_mask = self.apply_guided_filter(final_mask, original_image, radius=8, eps=0.01)
+                mask = Image.fromarray(final_mask)
+            else:
+                # Backup plan: use large ellipse
+                mask = Image.new('L', (width, height), 0)
+                draw = ImageDraw.Draw(mask)
+                center_x, center_y = width // 2, height // 2
+                width_radius = int(width * 0.45)
+                height_radius = int(width * 0.48)
+                draw.ellipse([
+                    center_x - width_radius, center_y - height_radius,
+                    center_x + width_radius, center_y + height_radius
+                ], fill=255)
+                # Apply guided filter instead of Gaussian blur
+                mask_array = np.array(mask)
+                mask_array = self.apply_guided_filter(mask_array, original_image, radius=10, eps=0.02)
+                mask = Image.fromarray(mask_array)
+        elif mode == "left_half":
+            # Keep original logic unchanged - ensure Snoopy and other functions work normally
+            mask = Image.new('L', (width, height), 0)
+            mask_array = np.array(mask)
+            mask_array[:, :width//2] = 255
+            transition_zone = width // 10
+            for i in range(transition_zone):
+                x_pos = width//2 + i
+                if x_pos < width:
+                    alpha = 255 * (1 - i / transition_zone)
+                    mask_array[:, x_pos] = int(alpha)
+            mask = Image.fromarray(mask_array)
+        elif mode == "right_half":
+            # Keep original logic unchanged - ensure Snoopy and other functions work normally
+            mask = Image.new('L', (width, height), 0)
+            mask_array = np.array(mask)
+            mask_array[:, width//2:] = 255
+            transition_zone = width // 10
+            for i in range(transition_zone):
+                x_pos = width//2 - i - 1
+                if x_pos >= 0:
+                    alpha = 255 * (1 - i / transition_zone)
+                    mask_array[:, x_pos] = int(alpha)
+            mask = Image.fromarray(mask_array)
+        elif mode == "full":
+            mask = Image.new('L', (width, height), 0)
+            draw = ImageDraw.Draw(mask)
+            center_x, center_y = width // 2, height // 2
+            radius = min(width, height) // 8
+            draw.ellipse([
+                center_x - radius, center_y - radius,
+                center_x + radius, center_y + radius
+            ], fill=255)
+            mask = mask.filter(ImageFilter.GaussianBlur(radius=5))
+        return mask

requirements.txt CHANGED Viewed

@@ -1,9 +1,8 @@
-#（Apache 2.0 授權，包含 Wan2.2 LoRA 修復）
 git+https://github.com/linoytsaban/diffusers.git@wan22-loras
 gradio
-transformers
-accelerate
 safetensors
 sentencepiece
 peft
@@ -12,4 +11,15 @@ imageio-ffmpeg
 opencv-python
 pillow
 spaces
-torchao

+# VividFlow I2V Dependencies
 git+https://github.com/linoytsaban/diffusers.git@wan22-loras
 gradio
+transformers>=4.46.0
+accelerate>=1.1.1
 safetensors
 sentencepiece
 peft
 opencv-python
 pillow
 spaces
+torchao
+# Background Generation Dependencies (SceneWeaver)
+open_clip_torch
+sentence-transformers
+rembg[gpu]
+scipy
+opencv-contrib-python
+# Core Dependencies
+torch>=2.5.0
+numpy

scene_templates.py ADDED Viewed

	@@ -0,0 +1,428 @@

+import logging
+from typing import Dict, List, Optional
+from dataclasses import dataclass
+logger = logging.getLogger(__name__)
+@dataclass
+class SceneTemplate:
+    """Data class representing a scene template"""
+    key: str
+    name: str
+    prompt: str
+    negative_extra: str
+    category: str
+    icon: str
+    guidance_scale: float = 7.5
+class SceneTemplateManager:
+    """
+    Manages curated scene templates for background generation.
+    Provides categorized presets that users can select with one click.
+    """
+    # Scene template definitions
+    TEMPLATES: Dict[str, SceneTemplate] = {
+        # Professional Category
+        "office_modern": SceneTemplate(
+            key="office_modern",
+            name="Modern Office",
+            prompt="modern minimalist office interior, clean white desk, large floor-to-ceiling windows, natural daylight, professional corporate environment, soft shadows, contemporary furniture",
+            negative_extra="messy, cluttered, dark, old",
+            category="Professional",
+            icon="🏢",
+            guidance_scale=7.5
+        ),
+        "office_executive": SceneTemplate(
+            key="office_executive",
+            name="Executive Suite",
+            prompt="luxurious executive office, mahogany desk, leather chair, city skyline view through windows, warm ambient lighting, bookshelf, elegant professional setting",
+            negative_extra="cheap, cramped, messy",
+            category="Professional",
+            icon="👔",
+            guidance_scale=7.5
+        ),
+        "studio_white": SceneTemplate(
+            key="studio_white",
+            name="White Studio",
+            prompt="clean white photography studio background, professional lighting setup, seamless white backdrop, soft diffused light, minimal shadows",
+            negative_extra="colored, textured, dirty",
+            category="Professional",
+            icon="📷",
+            guidance_scale=8.0
+        ),
+        "coworking": SceneTemplate(
+            key="coworking",
+            name="Coworking Space",
+            prompt="modern coworking space, open plan office, plants, exposed brick, industrial chic design, natural light, collaborative environment",
+            negative_extra="empty, dark, boring",
+            category="Professional",
+            icon="💼",
+            guidance_scale=7.0
+        ),
+        "conference": SceneTemplate(
+            key="conference",
+            name="Conference Room",
+            prompt="modern conference room, large meeting table, glass walls, professional presentation screen, bright corporate lighting, clean minimal design",
+            negative_extra="small, cramped, outdated",
+            category="Professional",
+            icon="🤝",
+            guidance_scale=7.5
+        ),
+        # Nature Category
+        "beach_sunset": SceneTemplate(
+            key="beach_sunset",
+            name="Sunset Beach",
+            prompt="beautiful tropical beach at golden hour sunset, palm trees silhouette, calm turquoise ocean waves, warm orange and pink sky, soft sand, paradise vacation vibes",
+            negative_extra="storm, rain, crowded, trash",
+            category="Nature",
+            icon="🏖️",
+            guidance_scale=7.0
+        ),
+        "forest_enchanted": SceneTemplate(
+            key="forest_enchanted",
+            name="Enchanted Forest",
+            prompt="magical enchanted forest, sunlight streaming through tall trees, lush green foliage, mystical atmosphere, morning mist, fairy tale woodland",
+            negative_extra="dead trees, dark, scary, barren",
+            category="Nature",
+            icon="🌲",
+            guidance_scale=7.0
+        ),
+        "mountain_scenic": SceneTemplate(
+            key="mountain_scenic",
+            name="Mountain Vista",
+            prompt="breathtaking mountain landscape, snow-capped peaks, alpine meadow, clear blue sky, majestic scenic view, pristine nature, peaceful atmosphere",
+            negative_extra="industrial, polluted, crowded",
+            category="Nature",
+            icon="🏔️",
+            guidance_scale=7.5
+        ),
+        "garden_spring": SceneTemplate(
+            key="garden_spring",
+            name="Spring Garden",
+            prompt="beautiful spring flower garden, colorful blooming flowers, roses and tulips, manicured hedges, sunny day, botanical paradise, fresh and vibrant",
+            negative_extra="dead, winter, wilted, dry",
+            category="Nature",
+            icon="🌸",
+            guidance_scale=7.0
+        ),
+        "lake_serene": SceneTemplate(
+            key="lake_serene",
+            name="Serene Lake",
+            prompt="peaceful serene lake at dawn, mirror-like water reflection, surrounding mountains, soft morning light, tranquil atmosphere, pristine natural beauty",
+            negative_extra="stormy, polluted, industrial",
+            category="Nature",
+            icon="🏞️",
+            guidance_scale=7.0
+        ),
+        "cherry_blossom": SceneTemplate(
+            key="cherry_blossom",
+            name="Cherry Blossom",
+            prompt="stunning cherry blossom trees in full bloom, pink sakura petals falling gently, Japanese garden aesthetic, soft spring sunlight, romantic atmosphere",
+            negative_extra="winter, dead, brown, wilted",
+            category="Nature",
+            icon="🌸",
+            guidance_scale=7.0
+        ),
+        # Urban Category
+        "city_skyline": SceneTemplate(
+            key="city_skyline",
+            name="City Skyline",
+            prompt="modern city skyline at blue hour, impressive skyscrapers, glass buildings reflecting sunset, urban metropolitan view, cinematic atmosphere",
+            negative_extra="slums, dirty, abandoned, ruins",
+            category="Urban",
+            icon="🌆",
+            guidance_scale=7.5
+        ),
+        "cafe_cozy": SceneTemplate(
+            key="cafe_cozy",
+            name="Cozy Cafe",
+            prompt="warm cozy coffee shop interior, wooden furniture, ambient lighting, exposed brick walls, plants, comfortable atmosphere, artisan cafe vibes",
+            negative_extra="fast food, plastic, harsh lighting",
+            category="Urban",
+            icon="☕",
+            guidance_scale=7.0
+        ),
+        "street_european": SceneTemplate(
+            key="street_european",
+            name="European Street",
+            prompt="charming European cobblestone street, historic buildings, outdoor cafe, flowers on balconies, warm afternoon light, romantic Paris or Rome vibes",
+            negative_extra="modern, industrial, ugly, dirty",
+            category="Urban",
+            icon="🏛️",
+            guidance_scale=7.0
+        ),
+        "night_neon": SceneTemplate(
+            key="night_neon",
+            name="Neon Nightlife",
+            prompt="vibrant city nightlife scene, neon lights and signs, urban night atmosphere, colorful reflections on wet street, cyberpunk aesthetic, electric energy",
+            negative_extra="daytime, boring, plain",
+            category="Urban",
+            icon="🌃",
+            guidance_scale=6.5
+        ),
+        "rooftop_view": SceneTemplate(
+            key="rooftop_view",
+            name="Rooftop Terrace",
+            prompt="luxury rooftop terrace, city panoramic view, modern outdoor furniture, string lights, sunset golden hour, sophisticated urban oasis",
+            negative_extra="cheap, dirty, crowded",
+            category="Urban",
+            icon="🏙️",
+            guidance_scale=7.5
+        ),
+        # Artistic Category
+        "gradient_soft": SceneTemplate(
+            key="gradient_soft",
+            name="Soft Gradient",
+            prompt="smooth soft gradient background, pastel colors blending beautifully, pink to blue to purple transition, dreamy aesthetic, professional portrait backdrop",
+            negative_extra="harsh, noisy, textured, busy",
+            category="Artistic",
+            icon="🎨",
+            guidance_scale=8.0
+        ),
+        "abstract_modern": SceneTemplate(
+            key="abstract_modern",
+            name="Modern Abstract",
+            prompt="modern abstract art background, geometric shapes, bold colors, contemporary design, artistic composition, museum gallery aesthetic",
+            negative_extra="realistic, plain, boring",
+            category="Artistic",
+            icon="🖼️",
+            guidance_scale=6.5
+        ),
+        "vintage_retro": SceneTemplate(
+            key="vintage_retro",
+            name="Vintage Retro",
+            prompt="vintage retro aesthetic background, warm sepia tones, nostalgic 70s vibes, film grain texture, classic photography style, timeless elegance",
+            negative_extra="modern, digital, cold, harsh",
+            category="Artistic",
+            icon="📻",
+            guidance_scale=7.0
+        ),
+        "watercolor_dream": SceneTemplate(
+            key="watercolor_dream",
+            name="Watercolor Dream",
+            prompt="beautiful watercolor painting background, soft flowing colors, artistic brush strokes, dreamy ethereal atmosphere, delicate artistic aesthetic",
+            negative_extra="digital, sharp, photorealistic",
+            category="Artistic",
+            icon="🖌️",
+            guidance_scale=6.5
+        ),
+        # Seasonal Category
+        "autumn_foliage": SceneTemplate(
+            key="autumn_foliage",
+            name="Autumn Foliage",
+            prompt="beautiful autumn scenery, vibrant fall foliage, orange red and golden leaves, maple trees, warm sunlight filtering through, cozy seasonal atmosphere",
+            negative_extra="spring, summer, green, snow",
+            category="Seasonal",
+            icon="🍂",
+            guidance_scale=7.0
+        ),
+        "winter_snow": SceneTemplate(
+            key="winter_snow",
+            name="Winter Wonderland",
+            prompt="magical winter wonderland, fresh white snow covering everything, snow-laden pine trees, soft snowfall, peaceful cold atmosphere, holiday season vibes",
+            negative_extra="summer, green, rain, mud",
+            category="Seasonal",
+            icon="❄️",
+            guidance_scale=7.0
+        ),
+        "summer_tropical": SceneTemplate(
+            key="summer_tropical",
+            name="Tropical Summer",
+            prompt="vibrant tropical summer scene, lush palm trees, bright sunny day, exotic flowers, paradise vacation destination, warm and inviting atmosphere",
+            negative_extra="winter, cold, snow, gray",
+            category="Seasonal",
+            icon="🌴",
+            guidance_scale=7.0
+        ),
+        "spring_meadow": SceneTemplate(
+            key="spring_meadow",
+            name="Spring Meadow",
+            prompt="beautiful spring meadow, wildflowers blooming, fresh green grass, butterflies, soft warm sunlight, renewal and new beginnings, pastoral beauty",
+            negative_extra="winter, autumn, dead, dry",
+            category="Seasonal",
+            icon="🌷",
+            guidance_scale=7.0
+        ),
+    }
+    # Category display order
+    CATEGORIES = ["Professional", "Nature", "Urban", "Artistic", "Seasonal"]
+    def __init__(self):
+        """Initialize the scene template manager"""
+        logger.info(f"SceneTemplateManager initialized with {len(self.TEMPLATES)} templates")
+    def get_all_templates(self) -> Dict[str, SceneTemplate]:
+        """Get all available templates"""
+        return self.TEMPLATES
+    def get_template(self, key: str) -> Optional[SceneTemplate]:
+        """Get a specific template by key"""
+        return self.TEMPLATES.get(key)
+    def get_templates_by_category(self, category: str) -> List[SceneTemplate]:
+        """Get all templates in a specific category"""
+        return [t for t in self.TEMPLATES.values() if t.category == category]
+    def get_categories(self) -> List[str]:
+        """Get list of all categories in display order"""
+        return self.CATEGORIES
+    def get_template_choices_sorted(self) -> List[str]:
+        """
+        Get template choices formatted for Gradio dropdown.
+        Returns list of display strings sorted A-Z: "🏢 Modern Office"
+        """
+        display_list = []
+        for key, template in self.TEMPLATES.items():
+            display_name = f"{template.icon} {template.name}"
+            display_list.append(display_name)
+        # Sort alphabetically by name (ignoring emoji)
+        display_list.sort(key=lambda x: x.split(' ', 1)[1] if ' ' in x else x)
+        return display_list
+    def get_template_key_from_display(self, display_name: str) -> Optional[str]:
+        """
+        Get template key from display name.
+        Example: "🏢 Modern Office" -> "office_modern"
+        """
+        if not display_name:
+            return None
+        for key, template in self.TEMPLATES.items():
+            if f"{template.icon} {template.name}" == display_name:
+                return key
+        return None
+    def get_prompt_for_template(self, key: str) -> Optional[str]:
+        """Get the prompt string for a template"""
+        template = self.get_template(key)
+        return template.prompt if template else None
+    def get_negative_prompt_for_template(
+        self,
+        key: str,
+        base_negative: str = "blurry, low quality, distorted, people, characters"
+    ) -> str:
+        """Get combined negative prompt for a template"""
+        template = self.get_template(key)
+        if template and template.negative_extra:
+            return f"{base_negative}, {template.negative_extra}"
+        return base_negative
+    def get_guidance_scale_for_template(self, key: str) -> float:
+        """Get the recommended guidance scale for a template"""
+        template = self.get_template(key)
+        return template.guidance_scale if template else 7.5
+    def build_gallery_html(self) -> str:
+        """
+        Build HTML for the scene template gallery.
+        Returns HTML string for display in Gradio.
+        """
+        html_parts = ['<div class="scene-gallery">']
+        for category in self.CATEGORIES:
+            templates = self.get_templates_by_category(category)
+            if not templates:
+                continue
+            html_parts.append(f'''
+            <div class="scene-category">
+                <h4 class="scene-category-title">{category}</h4>
+                <div class="scene-grid">
+            ''')
+            for template in templates:
+                html_parts.append(f'''
+                <button class="scene-card" data-template="{template.key}" onclick="selectTemplate('{template.key}')">
+                    <span class="scene-icon">{template.icon}</span>
+                    <span class="scene-name">{template.name}</span>
+                </button>
+                ''')
+            html_parts.append('</div></div>')
+        html_parts.append('</div>')
+        return ''.join(html_parts)
+    def get_gallery_css(self) -> str:
+        """Get CSS styles for the scene gallery"""
+        return """
+        /* Scene Gallery Styles */
+        .scene-gallery {
+            margin: 16px 0;
+        }
+        .scene-category {
+            margin-bottom: 20px;
+        }
+        .scene-category-title {
+            font-size: 0.9rem;
+            font-weight: 600;
+            color: #475569;
+            margin-bottom: 12px;
+            padding-bottom: 8px;
+            border-bottom: 1px solid #e2e8f0;
+        }
+        .scene-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fill, minmax(100px, 1fr));
+            gap: 8px;
+        }
+        .scene-card {
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            justify-content: center;
+            padding: 12px 8px;
+            background: #f8fafc;
+            border: 1px solid #e2e8f0;
+            border-radius: 8px;
+            cursor: pointer;
+            transition: all 0.2s ease;
+            min-height: 70px;
+        }
+        .scene-card:hover {
+            background: #dbeafe;
+            border-color: #3b82f6;
+            transform: translateY(-2px);
+            box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+        }
+        .scene-card.selected {
+            background: #dbeafe;
+            border-color: #3b82f6;
+            box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.3);
+        }
+        .scene-icon {
+            font-size: 1.5rem;
+            margin-bottom: 4px;
+        }
+        .scene-name {
+            font-size: 0.75rem;
+            font-weight: 500;
+            color: #1e293b;
+            text-align: center;
+            line-height: 1.2;
+        }
+        @media (max-width: 768px) {
+            .scene-grid {
+                grid-template-columns: repeat(3, 1fr);
+            }
+        }
+        """

ui_manager.py CHANGED Viewed

@@ -1,20 +1,35 @@
 import gradio as gr
 from PIL import Image
-from typing import Tuple
 from FlowFacade import FlowFacade
 from css_style import DELTAFLOW_CSS
 from prompt_examples import PROMPT_EXAMPLES
 class UIManager:
-    def __init__(self, facade: FlowFacade):
         self.facade = facade
     def create_interface(self) -> gr.Blocks:
         with gr.Blocks(
             theme=gr.themes.Soft(),
             css=DELTAFLOW_CSS,
-            title="VividFlow - Fast AI Image to Video"
         ) as interface:
             # Header
@@ -22,276 +37,523 @@ class UIManager:
                 <div class="header-container">
                     <h1 class="header-title">🌊 VividFlow</h1>
                     <p class="header-subtitle">
-                        Bring Your Images to Life with AI Magic ✨<br>
-                        Transform any still image into dynamic, cinematic videos
                     </p>
                 </div>
             """)
-            with gr.Row():
-                # Left Panel: Input
-                with gr.Column(scale=1, elem_classes="input-card"):
-                    gr.Markdown("### 📤 Input")
-                    image_input = gr.Image(
-                        label="Upload Image (any type: photo, art, cartoon, etc.)",
-                        type="pil",
-                        elem_classes="image-upload",
-                        height=320
                     )
-                    resolution_info = gr.Markdown(
-                        value="",
-                        visible=False,
-                        elem_classes="info-text"
                     )
-                    prompt_input = gr.Textbox(
-                        label="Motion Instruction",
-                        placeholder="Describe camera movements (zoom, pan, orbit) and subject actions (head turn, hair flow, expression change). Be specific and cinematic! Example: 'Camera slowly zooms in, subject's eyes sparkle, hair flows gently in wind'",
-                        lines=3,
-                        max_lines=6
                     )
-                    # Quick preset selector
-                    category_dropdown = gr.Dropdown(
-                        choices=list(PROMPT_EXAMPLES.keys()),
-                        label="💡 Quick Prompt Category",
-                        value="💃 Fashion / Beauty (Facial Only)",
-                        interactive=True
                     )
-                    example_dropdown = gr.Dropdown(
-                        choices=PROMPT_EXAMPLES["💃 Fashion / Beauty (Facial Only)"],
-                        label="Example Prompts (click to use)",
-                        value=None,
-                        interactive=True
                     )
-                    # Quality tips banner (blue)
-                    gr.HTML("""
-                        <div class="quality-banner">
-                            <strong>💡 Choose the Right Prompt Category:</strong><br>
-                            • <strong>💃 Facial Only:</strong> Safe for headshots and portraits without visible hands<br>
-                            • <strong>🙌 Hands Visible Required:</strong> Only use if hands are fully visible in your image (prevents artifacts)<br>
-                            • <strong>🌄 Scenery/Objects:</strong> For landscapes, products, and abstract content
-                        </div>
-                    """)
-                    # Generate button with patience banner
                     gr.HTML("""
-                        <div class="patience-banner">
-                            <strong>⏱️ Models are Initializing!</strong><br>
-                            This first-time generation may take a moment while high-fidelity assets load into memory.<br>
-                            Grab a coffee ☕, and watch the magic happen! Subsequent runs will be significantly faster.
                         </div>
                     """)
-                    generate_btn = gr.Button(
-                        "🎬 Generate Video",
-                        variant="primary",
-                        elem_classes="primary-button",
-                        size="lg"
                     )
-                    # Advanced settings
-                    with gr.Accordion("⚙️ Advanced Settings", open=False):
-                        duration_slider = gr.Slider(
-                            minimum=0.5,
-                            maximum=5.0,
-                            step=0.5,
-                            value=3.0,
-                            label="Duration (seconds)",
-                            info="3.0s = 49 frames, 5.0s = 81 frames (16fps)"
-                        )
-                        steps_slider = gr.Slider(
-                            minimum=4,
-                            maximum=12,
-                            step=1,
-                            value=4,
-                            label="Inference Steps",
-                            info="4-6 recommended • Higher steps = longer generation time"
-                        )
-                        with gr.Row():
-                            guidance_scale = gr.Slider(
-                                minimum=0.0,
-                                maximum=5.0,
-                                step=0.5,
-                                value=1.0,
-                                label="Guidance Scale (high noise)"
-                            )
-                            guidance_scale_2 = gr.Slider(
-                                minimum=0.0,
-                                maximum=5.0,
-                                step=0.5,
-                                value=1.0,
-                                label="Guidance Scale (low noise)"
-                            )
-                        with gr.Row():
-                            seed_input = gr.Number(
-                                label="Seed",
-                                value=42,
-                                precision=0,
-                                minimum=0,
-                                maximum=2147483647,
-                                info="Use same seed for reproducible results"
-                            )
-                            randomize_seed = gr.Checkbox(
-                                label="Randomize Seed",
-                                value=True,
-                                info="Generate different results each time"
-                            )
-                        enable_ai_prompt = gr.Checkbox(
-                            label="🤖 Enable AI Prompt Expansion (Qwen2.5)",
-                            value=False,
-                            info="Use AI to enhance your prompt (adds ~30s)"
-                        )
-                # Right Panel: Output
-                with gr.Column(scale=1, elem_classes="output-card"):
-                    gr.Markdown("### 🎥 Output")
-                    video_output = gr.Video(
-                        label="Generated Video",
-                        height=400,
-                        autoplay=True
                     )
-                    with gr.Row():
-                        prompt_output = gr.Textbox(
-                            label="Final Prompt Used",
-                            lines=3,
-                            interactive=False,
-                            scale=3
-                        )
-                        seed_output = gr.Number(
-                            label="Seed Used",
-                            precision=0,
-                            interactive=False,
-                            scale=1
-                        )
-            # Info section
-            with gr.Row():
                 gr.HTML("""
-                    <div class="info-box">
-                        <strong>ℹ️ Tips for Best Results:</strong><br>
-                        • <strong>Use example prompts:</strong> Select a category above and click an example to get started<br>
-                        • <strong>Works with ANY image:</strong> Fashion portraits, anime, landscapes, products, abstract art, etc.<br>
-                        • <strong>For dramatic effects:</strong> Choose prompts with words like "explosive", "dramatic", "swirls", "transforms"<br>
-                        • <strong>Image quality matters:</strong> Higher resolution and clear subjects produce better results
                     </div>
                 """)
-            # Footer
-            gr.HTML("""
-                <div class="footer">
-                    <p style="font-size: 0.9rem;">
-                        <strong>Powered by:</strong><br>
-                        <a href="https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers" target="_blank" style="color: #6366f1; text-decoration: none;">Wan2.2-I2V-A14B</a> (Wan-AI, optimized by <a href="https://huggingface.co/cbensimon" target="_blank" style="color: #6366f1; text-decoration: none;">cbensimon</a>)
-                        · Lightning LoRA (<a href="https://huggingface.co/Kijai/WanVideo_comfy" target="_blank" style="color: #6366f1; text-decoration: none;">Lightx2v</a>)
-                        · <a href="https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct" target="_blank" style="color: #6366f1; text-decoration: none;">Qwen2.5-0.5B</a>
-                    </p>
-                </div>
-            """)
-            def update_examples(category):
-                return gr.update(choices=PROMPT_EXAMPLES[category], value=None)
-            def fill_prompt(selected_example):
-                return selected_example if selected_example else ""
-            def show_resolution_info(image):
-                if image is None:
-                    return gr.update(value="", visible=False)
-                from PIL import Image
-                original_w, original_h = image.size
-                resized_image = self.facade.video_engine.resize_image(image)
-                output_w, output_h = resized_image.width, resized_image.height
-                info = f"**📐 Resolution:** Input: {original_w}×{original_h} → Output: {output_w}×{output_h}"
-                return gr.update(value=info, visible=True)
-            category_dropdown.change(fn=update_examples, inputs=[category_dropdown],
-                                    outputs=[example_dropdown])
-            example_dropdown.change(fn=fill_prompt, inputs=[example_dropdown],
-                                   outputs=[prompt_input])
-            image_input.change(fn=show_resolution_info, inputs=[image_input],
-                             outputs=[resolution_info])
-            generate_btn.click(
-                fn=self._handle_generation,
-                inputs=[
-                    image_input,
-                    prompt_input,
-                    duration_slider,
-                    steps_slider,
-                    guidance_scale,
-                    guidance_scale_2,
-                    seed_input,
-                    randomize_seed,
-                    enable_ai_prompt
-                ],
-                outputs=[video_output, prompt_output, seed_output],
-                show_progress=True
-            )
-        return interface
-    def _handle_generation(self, image: Image.Image, prompt: str, duration: float,
-                          steps: int, guidance_1: float, guidance_2: float, seed: int,
-                          randomize: bool, enable_ai: bool,
-                          progress=gr.Progress()) -> Tuple[str, str, int]:
         try:
-            if image is None:
-                raise gr.Error("❌ Please upload an image")
-            if not prompt or prompt.strip() == "":
-                raise gr.Error("❌ Please provide a motion instruction")
-            if not self.facade.validate_image(image):
-                raise gr.Error("❌ Image dimensions invalid (256-4096px)")
-            video_path, final_prompt, seed_used = self.facade.generate_video_from_image(
-                image=image,
-                user_instruction=prompt,
-                duration_seconds=duration,
-                num_inference_steps=steps,
-                guidance_scale=guidance_1,
-                guidance_scale_2=guidance_2,
-                seed=int(seed),
-                randomize_seed=randomize,
-                enable_prompt_expansion=enable_ai,
-                progress=progress
             )
-            return video_path, final_prompt, seed_used
-        except gr.Error:
-            raise
         except Exception as e:
-            import traceback
-            import os
-            error_msg = str(e)
-            if os.environ.get('DEBUG'):
-                print(f"\n✗ UI Error: {type(e).__name__}")
-                print(traceback.format_exc())
-            if "CUDA out of memory" in error_msg or "OutOfMemoryError" in error_msg:
-                raise gr.Error("❌ GPU memory insufficient. Try reducing duration/steps or restart.")
-            else:
-                raise gr.Error(f"❌ Generation failed: {error_msg}")
-    def launch(self, share: bool = False, server_name: str = "0.0.0.0",
-               server_port: int = None, **kwargs) -> None:
-        interface = self.create_interface()
-        interface.launch(share=share, server_name=server_name,
-                        server_port=server_port, **kwargs)

 import gradio as gr
 from PIL import Image
+from typing import Tuple, Optional, Dict, Any
+import os
+import logging
 from FlowFacade import FlowFacade
+from BackgroundEngine import BackgroundEngine
+from scene_templates import SceneTemplateManager
 from css_style import DELTAFLOW_CSS
 from prompt_examples import PROMPT_EXAMPLES
+try:
+    import spaces
+    SPACES_AVAILABLE = True
+except ImportError:
+    SPACES_AVAILABLE = False
+logger = logging.getLogger(__name__)
 class UIManager:
+    def __init__(self, facade: FlowFacade, background_engine: BackgroundEngine):
         self.facade = facade
+        self.background_engine = background_engine
+        self.template_manager = SceneTemplateManager()
     def create_interface(self) -> gr.Blocks:
         with gr.Blocks(
             theme=gr.themes.Soft(),
             css=DELTAFLOW_CSS,
+            title="VividFlow - AI Image Enhancement & Video Generation"
         ) as interface:
             # Header
                 <div class="header-container">
                     <h1 class="header-title">🌊 VividFlow</h1>
                     <p class="header-subtitle">
+                        AI-Powered Image Enhancement & Video Generation<br>
+                        Transform images with background replacement, then bring them to life with AI
                     </p>
                 </div>
             """)
+            # Main Tabs
+            with gr.Tabs() as main_tabs:
+                # Tab 1: Image to Video (Original Functionality)
+                with gr.Tab("🎬 Image to Video"):
+                    self._create_i2v_tab()
+                # Tab 2: Background Generation (New Feature)
+                with gr.Tab("🎨 Background Generation"):
+                    self._create_background_tab()
+            # Footer
+            gr.HTML("""
+                <div class="footer">
+                    <p>Powered by Wan2.2-I2V-A14B, SDXL, and OpenCLIP | Built with Gradio</p>
+                </div>
+            """)
+        return interface
+    def _create_i2v_tab(self):
+        """Create Image to Video tab (original VividFlow functionality)"""
+        with gr.Row():
+            # Left Panel: Input
+            with gr.Column(scale=1, elem_classes="input-card"):
+                gr.Markdown("### 📤 Input")
+                image_input = gr.Image(
+                    label="Upload Image (any type: photo, art, cartoon, etc.)",
+                    type="pil",
+                    elem_classes="image-upload",
+                    height=320
+                )
+                resolution_info = gr.Markdown(
+                    value="",
+                    visible=False,
+                    elem_classes="info-text"
+                )
+                prompt_input = gr.Textbox(
+                    label="Motion Instruction",
+                    placeholder="Describe camera movements and subject actions...",
+                    lines=3,
+                    max_lines=6
+                )
+                category_dropdown = gr.Dropdown(
+                    choices=list(PROMPT_EXAMPLES.keys()),
+                    label="💡 Quick Prompt Category",
+                    value="💃 Fashion / Beauty (Facial Only)",
+                    interactive=True
+                )
+                example_dropdown = gr.Dropdown(
+                    choices=PROMPT_EXAMPLES["💃 Fashion / Beauty (Facial Only)"],
+                    label="Example Prompts (click to use)",
+                    value=None,
+                    interactive=True
+                )
+                gr.HTML("""
+                    <div class="quality-banner">
+                        <strong>💡 Choose the Right Prompt Category:</strong><br>
+                        • <strong>💃 Facial Only:</strong> Safe for headshots without visible hands<br>
+                        • <strong>🙌 Hands Visible Required:</strong> Only use if hands are fully visible<br>
+                        • <strong>🌄 Scenery/Objects:</strong> For landscapes, products, abstract content
+                    </div>
+                """)
+                gr.HTML("""
+                    <div class="patience-banner">
+                        <strong>⏱️ First-time loading may take a moment!</strong><br>
+                        Subsequent runs will be much faster.
+                    </div>
+                """)
+                generate_btn = gr.Button(
+                    "🎬 Generate Video",
+                    variant="primary",
+                    elem_classes="primary-button",
+                    size="lg"
+                )
+                with gr.Accordion("⚙️ Advanced Settings", open=False):
+                    duration_slider = gr.Slider(
+                        minimum=0.5,
+                        maximum=5.0,
+                        value=3.0,
+                        step=0.5,
+                        label="Video Duration (seconds)"
                     )
+                    steps_slider = gr.Slider(
+                        minimum=4,
+                        maximum=25,
+                        value=4,
+                        step=1,
+                        label="Quality Steps (4=Lightning Fast, 8-25=Higher Quality)"
                     )
+                    fps_slider = gr.Slider(
+                        minimum=8,
+                        maximum=24,
+                        value=16,
+                        step=1,
+                        label="Frames Per Second"
                     )
+                    expand_prompt = gr.Checkbox(
+                        label="AI Prompt Expansion (experimental)",
+                        value=False
                     )
+                    randomize_seed = gr.Checkbox(
+                        label="Randomize Seed",
+                        value=True
                     )
+                    seed_input = gr.Number(
+                        label="Manual Seed (if not randomized)",
+                        value=42,
+                        precision=0
+                    )
+            # Right Panel: Output
+            with gr.Column(scale=1, elem_classes="output-card"):
+                gr.Markdown("### 🎥 Output")
+                video_output = gr.Video(
+                    label="Generated Video",
+                    elem_classes="video-player"
+                )
+                final_prompt_output = gr.Textbox(
+                    label="Final Prompt Used",
+                    interactive=False,
+                    lines=2
+                )
+                seed_output = gr.Number(
+                    label="Seed Used",
+                    interactive=False,
+                    precision=0
+                )
+        # Event handlers for I2V tab
+        def update_resolution_display(img):
+            if img is None:
+                return gr.update(visible=False)
+            w, h = img.size
+            new_w = (w // 16) * 16
+            new_h = (h // 16) * 16
+            return gr.update(
+                value=f"📐 **Resolution:** Input: {w}×{h} → Output: {new_w}×{new_h}",
+                visible=True
+            )
+        def category_changed(category):
+            if category in PROMPT_EXAMPLES:
+                return gr.update(choices=PROMPT_EXAMPLES[category], value=None)
+            return gr.update()
+        def example_selected(example):
+            return example if example else ""
+        image_input.change(
+            fn=update_resolution_display,
+            inputs=[image_input],
+            outputs=[resolution_info]
+        )
+        category_dropdown.change(
+            fn=category_changed,
+            inputs=[category_dropdown],
+            outputs=[example_dropdown]
+        )
+        example_dropdown.change(
+            fn=example_selected,
+            inputs=[example_dropdown],
+            outputs=[prompt_input]
+        )
+        generate_btn.click(
+            fn=self._generate_video_handler,
+            inputs=[
+                image_input, prompt_input, duration_slider,
+                steps_slider, fps_slider, expand_prompt,
+                randomize_seed, seed_input
+            ],
+            outputs=[video_output, final_prompt_output, seed_output]
+        )
+    def _generate_video_handler(
+        self,
+        image: Image.Image,
+        prompt: str,
+        duration: float,
+        steps: int,
+        fps: int,
+        expand_prompt: bool,
+        randomize_seed: bool,
+        seed: int
+    ) -> Tuple[str, str, int]:
+        """Handler for video generation"""
+        if image is None:
+            return None, "Please upload an image", 0
+        if not prompt.strip():
+            return None, "Please provide a motion prompt", 0
+        try:
+            video_path, final_prompt, seed_used = self.facade.generate_video_from_image(
+                image=image,
+                user_instruction=prompt,
+                duration_seconds=duration,
+                num_inference_steps=steps,
+                enable_prompt_expansion=expand_prompt,
+                randomize_seed=randomize_seed,
+                seed=seed
+            )
+            return video_path, final_prompt, seed_used
+        except Exception as e:
+            logger.error(f"Video generation failed: {e}")
+            return None, f"Error: {str(e)}", 0
+    def _create_background_tab(self):
+        """Create Background Generation tab (SceneWeaver functionality)"""
+        with gr.Row():
+            # Left Panel: Input
+            with gr.Column(scale=1, elem_classes="feature-card"):
+                gr.Markdown("### 📸 Upload & Configure")
+                gr.HTML("""
+                    <div class="quality-banner">
+                        <strong>💡 Best Results Tips:</strong><br>
+                        • Clean portrait photos with simple backgrounds work best<br>
+                        • Complex scenes (e.g., pets with grass) may need parameter adjustments<br>
+                        • Use Advanced Options below to fine-tune edge blending
+                    </div>
+                """)
+                bg_image_input = gr.Image(
+                    label="Upload Your Image",
+                    type="pil",
+                    height=280
+                )
+                # Scene Template Selector
+                template_dropdown = gr.Dropdown(
+                    label="Scene Templates (24 curated scenes A-Z)",
+                    choices=[""] + self.template_manager.get_template_choices_sorted(),
+                    value="",
+                    info="Optional: Select a preset or describe your own",
+                    elem_classes=["template-dropdown"]
+                )
+                bg_prompt_input = gr.Textbox(
+                    label="Background Scene Description",
+                    placeholder="Select a template above or describe your own scene...",
+                    lines=3
+                )
+                combination_mode = gr.Dropdown(
+                    label="Composition Mode",
+                    choices=["center", "left_half", "right_half", "full"],
+                    value="center",
+                    info="center=Smart Center | full=Full Image"
+                )
+                focus_mode = gr.Dropdown(
+                    label="Focus Mode",
+                    choices=["person", "scene"],
+                    value="person",
+                    info="person=Tight Crop | scene=Include Surrounding"
+                )
+                with gr.Accordion("Advanced Options", open=False):
                     gr.HTML("""
+                        <div style="padding: 8px; background: #f0f4ff; border-radius: 6px; margin-bottom: 12px; font-size: 13px;">
+                            <strong>💡 When to Adjust:</strong><br>
+                            • <strong>Feather Radius:</strong> Use 5-10 for complex scenes with fine details (hair, fur, foliage). 0 = sharp edges for clean portraits.<br>
+                            • <strong>Mask Preview:</strong> Check the "Mask Preview" tab after generation. White = kept, Black = replaced. Helps diagnose edge issues.
                         </div>
                     """)
+                    feather_radius_slider = gr.Slider(
+                        label="Feather Radius (Edge Softness)",
+                        minimum=0,
+                        maximum=20,
+                        value=0,
+                        step=1,
+                        info="Softens mask edges. Try 5-10 if edges look harsh."
                     )
+                    bg_negative_prompt = gr.Textbox(
+                        label="Negative Prompt",
+                        value="blurry, low quality, distorted, people, characters",
+                        lines=2,
+                        info="Prevents unwanted elements in background"
+                    )
+                    bg_steps_slider = gr.Slider(
+                        label="Quality Steps",
+                        minimum=15,
+                        maximum=50,
+                        value=25,
+                        step=5,
+                        info="Higher = better quality but slower"
+                    )
+                    bg_guidance_slider = gr.Slider(
+                        label="Guidance Scale",
+                        minimum=5.0,
+                        maximum=15.0,
+                        value=7.5,
+                        step=0.5,
+                        info="How strictly to follow prompt"
                     )
+                generate_bg_btn = gr.Button(
+                    "🎨 Generate Background",
+                    variant="primary",
+                    elem_classes="primary-button",
+                    size="lg"
+                )
+            # Right Panel: Output
+            with gr.Column(scale=2, elem_classes="feature-card"):
+                gr.Markdown("### 🎭 Results Gallery")
                 gr.HTML("""
+                    <div class="patience-banner">
+                        <strong>⏱️ First-time users:</strong> Initial model loading takes 1-2 minutes.
+                        Subsequent generations are much faster (~30s).
                     </div>
                 """)
+                with gr.Tabs():
+                    with gr.TabItem("Final Result"):
+                        bg_combined_output = gr.Image(
+                            label="Your Generated Image",
+                            elem_classes=["result-gallery"]
+                        )
+                    with gr.TabItem("Background"):
+                        bg_generated_output = gr.Image(
+                            label="Generated Background",
+                            elem_classes=["result-gallery"]
+                        )
+                    with gr.TabItem("Original"):
+                        bg_original_output = gr.Image(
+                            label="Processed Original",
+                            elem_classes=["result-gallery"]
+                        )
+                    with gr.TabItem("Mask Preview"):
+                        gr.HTML("""
+                            <div style="padding: 8px; background: #f0f4ff; border-radius: 6px; margin-bottom: 8px; font-size: 13px;">
+                                <strong>📐 How to Read:</strong> White = Original kept | Black = Background replaced<br>
+                                Use this to diagnose edge quality. If edges are too harsh, increase Feather Radius.
+                            </div>
+                        """)
+                        bg_mask_output = gr.Image(
+                            label="Blending Mask",
+                            elem_classes=["result-gallery"]
+                        )
+                bg_status_output = gr.Textbox(
+                    label="Status",
+                    value="Ready to create! Upload an image and describe your vision.",
+                    interactive=False,
+                    elem_classes=["status-panel"]
+                )
+                with gr.Row():
+                    clear_bg_btn = gr.Button(
+                        "Clear All",
+                        elem_classes=["secondary-button"]
+                    )
+                    memory_btn = gr.Button(
+                        "Clean Memory",
+                        elem_classes=["secondary-button"]
+                    )
+        # Event handlers for Background Generation tab
+        def apply_template(display_name: str, current_negative: str) -> Tuple[str, str, float]:
+            if not display_name:
+                return "", current_negative, 7.5
+            template_key = self.template_manager.get_template_key_from_display(display_name)
+            if not template_key:
+                return "", current_negative, 7.5
+            template = self.template_manager.get_template(template_key)
+            if template:
+                prompt = template.prompt
+                negative = self.template_manager.get_negative_prompt_for_template(
+                    template_key, current_negative
+                )
+                guidance = template.guidance_scale
+                return prompt, negative, guidance
+            return "", current_negative, 7.5
+        template_dropdown.change(
+            fn=apply_template,
+            inputs=[template_dropdown, bg_negative_prompt],
+            outputs=[bg_prompt_input, bg_negative_prompt, bg_guidance_slider]
+        )
+        generate_bg_btn.click(
+            fn=self._generate_background_handler,
+            inputs=[
+                bg_image_input, bg_prompt_input, combination_mode,
+                focus_mode, bg_negative_prompt, bg_steps_slider, bg_guidance_slider,
+                feather_radius_slider
+            ],
+            outputs=[
+                bg_combined_output, bg_generated_output,
+                bg_original_output, bg_mask_output, bg_status_output
+            ]
+        )
+        clear_bg_btn.click(
+            fn=lambda: (None, None, None, None, "Ready to create!"),
+            outputs=[
+                bg_combined_output, bg_generated_output,
+                bg_original_output, bg_mask_output, bg_status_output
+            ]
+        )
+        memory_btn.click(
+            fn=lambda: self.background_engine._memory_cleanup() or "Memory cleaned!",
+            outputs=[bg_status_output]
+        )
+    def _generate_background_handler(
+        self,
+        image: Image.Image,
+        prompt: str,
+        combination_mode: str,
+        focus_mode: str,
+        negative_prompt: str,
+        steps: int,
+        guidance: float,
+        feather_radius: int
+    ) -> Tuple[Optional[Image.Image], Optional[Image.Image], Optional[Image.Image], Optional[Image.Image], str]:
+        """Handler for background generation"""
+        if image is None:
+            return None, None, None, None, "Please upload an image to get started!"
+        if not prompt.strip():
+            return None, None, None, None, "Please describe the background scene you'd like!"
         try:
+            # Apply ZeroGPU decorator if available
+            if SPACES_AVAILABLE:
+                generate_fn = spaces.GPU(duration=60)(self._background_generate_core)
+            else:
+                generate_fn = self._background_generate_core
+            result = generate_fn(
+                image, prompt, combination_mode, focus_mode,
+                negative_prompt, steps, guidance, feather_radius
             )
+            if result["success"]:
+                return (
+                    result["combined_image"],
+                    result["generated_scene"],
+                    result["original_image"],
+                    result["mask"],
+                    "Image created successfully!"
+                )
+            else:
+                error_msg = result.get("error", "Something went wrong")
+                return None, None, None, None, f"Error: {error_msg}"
         except Exception as e:
+            logger.error(f"Background generation failed: {e}")
+            return None, None, None, None, f"Error: {str(e)}"
+    def _background_generate_core(
+        self,
+        image: Image.Image,
+        prompt: str,
+        combination_mode: str,
+        focus_mode: str,
+        negative_prompt: str,
+        steps: int,
+        guidance: float,
+        feather_radius: int
+    ) -> Dict[str, Any]:
+        """Core background generation with models"""
+        if not self.background_engine.is_initialized:
+            logger.info("Loading background generation models...")
+            self.background_engine.load_models()
+        result = self.background_engine.generate_and_combine(
+            original_image=image,
+            prompt=prompt,
+            combination_mode=combination_mode,
+            focus_mode=focus_mode,
+            negative_prompt=negative_prompt,
+            num_inference_steps=int(steps),
+            guidance_scale=float(guidance),
+            enable_prompt_enhancement=True,
+            feather_radius=int(feather_radius)
+        )
+        return result