Spaces:

ayushpfullstack
/

virtual-staging-api

Build error

App Files Files Community

ayushpfullstack commited on Aug 18

Commit

c494a9e

verified ·

1 Parent(s): 893006f

Update main.py

Browse files

Files changed (1) hide show

main.py +51 -35

main.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import torch
 import numpy as np
 import cv2
 from PIL import Image
@@ -11,8 +12,8 @@ from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from contextlib import asynccontextmanager
-# Diffusers & Transformers Libraries
-from transformers import pipeline
 from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel, UniPCMultistepScheduler
 # --- API Data Models ---
@@ -32,11 +33,17 @@ async def lifespan(app: FastAPI):
     device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-    models['segmentation_pipeline'] = pipeline("image-segmentation", model="Intel/dpt-large-ade", device=device)
-    models['depth_estimator'] = pipeline("depth-estimation", model="Intel/dpt-hybrid-midas", device=device)
     controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-depth", torch_dtype=torch_dtype)
     models['inpainting_pipe'] = StableDiffusionControlNetInpaintPipeline.from_pretrained(
         "runwayml/stable-diffusion-v1-5",
         controlnet=controlnet,
@@ -55,40 +62,49 @@ app = FastAPI(lifespan=lifespan)
 # --- Helper Functions (Core Logic) ---
 def create_precise_mask(image_pil: Image.Image) -> Image.Image:
-    # REVERTED CHANGE: Pass the PIL image directly to the pipeline
-    segments = models['segmentation_pipeline'](image_pil)
-    W, H = image_pil.size
-    inclusion_mask_np = np.zeros((H, W), dtype=np.uint8)
-    exclusion_mask_np = np.zeros((H, W), dtype=np.uint8)
-    inclusion_labels = {"wall", "floor", "ceiling"}
-    base_exclusion_labels = {"door", "window", "windowpane", "window blind"}
-    insert_labels = {"painting", "picture", "shelf", "showcase", "cabinet", "mirror", "television", "radiator"}
-    walls, inserts = [], []
-    for segment in segments:
-        label, mask = segment['label'], np.array(segment['mask'])
-        if label in inclusion_labels:
-            inclusion_mask_np = np.maximum(inclusion_mask_np, mask)
-            if label == "wall": walls.append(mask)
-        if label in base_exclusion_labels:
-            exclusion_mask_np = np.maximum(exclusion_mask_np, mask)
-        if label in insert_labels:
-            inserts.append(mask)
-    for insert_mask in inserts:
-        for wall_mask in walls:
-            if np.all((wall_mask >= insert_mask)[insert_mask > 0]):
-                exclusion_mask_np = np.maximum(exclusion_mask_np, insert_mask)
-                break
-    raw_mask_np = np.copy(inclusion_mask_np); raw_mask_np[exclusion_mask_np > 0] = 0
     mask_filled_np = cv2.morphologyEx(raw_mask_np, cv2.MORPH_CLOSE, np.ones((10,10),np.uint8))
     return Image.fromarray(mask_filled_np)
 def generate_depth_map(image_pil: Image.Image) -> Image.Image:
-    # REVERTED CHANGE: Pass the PIL image directly to the pipeline
-    predicted_depth = models['depth_estimator'](image_pil)['predicted_depth']
-    depth_map_np = predicted_depth.cpu().numpy()
-    depth_map_np = (depth_map_np - depth_map_np.min()) / (depth_map_np.max() - depth_map_np.min()) * 255.0
-    depth_map_np = depth_map_np.astype(np.uint8)
-    return Image.fromarray(np.concatenate([depth_map_np[..., None]] * 3, axis=-1))
 # --- API Endpoints ---
 @app.get("/")

 import torch
+import torch.nn.functional as F
 import numpy as np
 import cv2
 from PIL import Image
 from pydantic import BaseModel
 from contextlib import asynccontextmanager
+# Diffusers & Transformers Libraries - UPDATED IMPORTS
+from transformers import DPTForSemanticSegmentation, DPTImageProcessor, DPTForDepthEstimation
 from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel, UniPCMultistepScheduler
 # --- API Data Models ---
     device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    # --- UPDATED: Load processors and models separately ---
+    # Segmentation model
+    models['seg_processor'] = DPTImageProcessor.from_pretrained("Intel/dpt-large-ade")
+    models['seg_model'] = DPTForSemanticSegmentation.from_pretrained("Intel/dpt-large-ade").to(device)
+    # Depth estimation model
+    models['depth_processor'] = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")
+    models['depth_model'] = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device)
+    # ControlNet and Inpainting Pipeline
     controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-depth", torch_dtype=torch_dtype)
     models['inpainting_pipe'] = StableDiffusionControlNetInpaintPipeline.from_pretrained(
         "runwayml/stable-diffusion-v1-5",
         controlnet=controlnet,
 # --- Helper Functions (Core Logic) ---
 def create_precise_mask(image_pil: Image.Image) -> Image.Image:
+    # --- UPDATED: Manual processing and inference ---
+    processor = models['seg_processor']
+    model = models['seg_model']
+    inputs = processor(images=image_pil, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    logits = outputs.logits
+    # ADE20k has 150 classes
+    upsampled_logits = F.interpolate(logits, size=image_pil.size[::-1], mode="bilinear", align_corners=False)
+    pred_seg = upsampled_logits.argmax(dim=1)[0].cpu().numpy().astype(np.uint8)
+    # Use a simplified mapping for room structure labels
+    # Wall=2, Floor=3, Ceiling=5 (based on common ADE20k indices)
+    inclusion_indices = {2, 3, 5}
+    # Door=14, Window=17
+    exclusion_indices = {14, 17}
+    inclusion_mask_np = np.isin(pred_seg, list(inclusion_indices)).astype(np.uint8) * 255
+    exclusion_mask_np = np.isin(pred_seg, list(exclusion_indices)).astype(np.uint8) * 255
+    raw_mask_np = np.copy(inclusion_mask_np)
+    raw_mask_np[exclusion_mask_np > 0] = 0
     mask_filled_np = cv2.morphologyEx(raw_mask_np, cv2.MORPH_CLOSE, np.ones((10,10),np.uint8))
     return Image.fromarray(mask_filled_np)
 def generate_depth_map(image_pil: Image.Image) -> Image.Image:
+    # --- UPDATED: Manual processing and inference ---
+    processor = models['depth_processor']
+    model = models['depth_model']
+    inputs = processor(images=image_pil, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    predicted_depth = outputs.predicted_depth
+    prediction = F.interpolate(predicted_depth.unsqueeze(1), size=image_pil.size[::-1], mode="bicubic", align_corners=False)
+    depth_map = prediction.squeeze().cpu().numpy()
+    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255.0
+    depth_map = depth_map.astype(np.uint8)
+    return Image.fromarray(np.concatenate([depth_map[..., None]] * 3, axis=-1))
 # --- API Endpoints ---
 @app.get("/")