Spaces:

maldons77
/

ai-storyboard-creator

Sleeping

App Files Files Community

maldons77 commited on Aug 14

Commit

eba2043

verified ·

1 Parent(s): d1f998c

Update app.py

Browse files

Files changed (1) hide show

app.py +171 -81

app.py CHANGED Viewed

@@ -1,123 +1,213 @@
-import os
 import gradio as gr
-from transformers import pipeline
-from diffusers import StableDiffusionPipeline
 from PIL import Image, ImageDraw, ImageFont
 import torch
-import tempfile
-from fpdf import FPDF
-import spaces
-# Global caches
-_txtgen = None
-_t2i_pipe = None
-# GPU function for image generation
-@spaces.GPU(duration=60)
-def t2i_generate(prompt, style, width, height, num_images, seed=None):
-    global _t2i_pipe
-    if _t2i_pipe is None:
-        model_id = "stabilityai/sd-turbo"
-        _t2i_pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
-        _t2i_pipe.to("cuda")
-    generator = torch.Generator(device="cuda")
-    if seed:
-        generator = generator.manual_seed(int(seed))
-    images = _t2i_pipe(
-        f"{style} style, {prompt}",
-        num_inference_steps=2,
-        guidance_scale=0.0,
-        width=width,
-        height=height,
-        generator=generator,
-        num_images_per_prompt=num_images
-    ).images
-    return images
-# CPU function for text generation (always on CPU for ZeroGPU compatibility)
-def get_txtgen():
     global _txtgen
     if _txtgen is None:
         _txtgen = pipeline("text-generation", model="distilgpt2", device=-1)
     return _txtgen
-def generate_captions(prompt, n=3):
-    gen = get_txtgen()
     outputs = []
-    for _ in range(n):
-        text = gen(f"{prompt} - scene description:", max_length=30, num_return_sequences=1)[0]['generated_text']
-        outputs.append(text.strip())
     return outputs
-# Storyboard PDF creation
-def create_storyboard(user_prompt, style, num_panels, width, height, seed):
-    # Generate captions
-    captions = generate_captions(user_prompt, n=num_panels)
-    # Generate images on GPU
-    images = t2i_generate(user_prompt, style, width, height, num_panels, seed)
-    # Add captions to images
-    final_images = []
-    for img, text in zip(images, captions):
-        img_with_text = img.copy()
-        draw = ImageDraw.Draw(img_with_text)
-        try:
-            font = ImageFont.truetype("arial.ttf", 20)
-        except:
-            font = ImageFont.load_default()
-        # FIX: textsize -> textbbox for Pillow >= 10
-        bbox = draw.textbbox((0, 0), text, font=font)
-        text_w = bbox[2] - bbox[0]
-        text_h = bbox[3] - bbox[1]
-        x = (img_with_text.width - text_w) / 2
-        y = img_with_text.height - text_h - 10
-        draw.rectangle([x-5, y-5, x+text_w+5, y+text_h+5], fill=(0, 0, 0, 127))
-        draw.text((x, y), text, font=font, fill=(255, 255, 255))
-        final_images.append(img_with_text)
-    # Save PDF
     pdf_path = tempfile.mktemp(suffix=".pdf")
     pdf = FPDF()
-    for img in final_images:
-        temp_path = tempfile.mktemp(suffix=".png")
-        img.save(temp_path)
         pdf.add_page()
-        pdf.image(temp_path, x=10, y=10, w=180)
     pdf.output(pdf_path)
     return final_images, pdf_path
-# Gradio UI
-with gr.Blocks() as demo:
     gr.Markdown(
         """
         # 🎬 AI Storyboard Creator
-        Generate a short storyboard from a single prompt, with captions and a downloadable PDF.
-        Works with CPU basic and ZeroGPU.
         """
     )
     with gr.Row():
         with gr.Column():
             prompt = gr.Textbox(label="Story prompt", placeholder="A cyberpunk detective in the rain", lines=2)
-            style = gr.Dropdown(["Realistic", "Anime", "Comic", "Watercolor"], value="Realistic", label="Style")
             num_panels = gr.Slider(3, 6, value=3, step=1, label="Number of panels")
             width = gr.Slider(384, 768, value=448, step=64, label="Panel width (px)")
             height = gr.Slider(384, 768, value=448, step=64, label="Panel height (px)")
             seed = gr.Textbox(label="Seed (optional)", placeholder="e.g., 42")
             run_btn = gr.Button("Create Storyboard")
         with gr.Column():
-            gallery = gr.Gallery(label="Preview (grid)").style(grid=[2], height="auto")
             pdf_file = gr.File(label="Download PDF")
     run_btn.click(
         create_storyboard,
         inputs=[prompt, style, num_panels, width, height, seed],
-        outputs=[gallery, pdf_file]
     )
 if __name__ == "__main__":

+import os, io, math, tempfile
 import gradio as gr
 from PIL import Image, ImageDraw, ImageFont
 import torch
+from transformers import pipeline
+from diffusers import StableDiffusionPipeline
+from spaces import GPU  # ZeroGPU support
+from fpdf import FPDF   # make sure requirements.txt includes: fpdf==1.7.2
+# Avoid tokenizers parallelism warning after fork
+os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+# ------------------ Globals (CPU-safe) ------------------
+_txtgen = None           # text generator stays on CPU
+_t2i_cpu = None          # CPU fallback pipeline
+STYLE_PRESETS = {
+    "Realistic": "realistic photography, finely detailed, natural lighting, 35mm",
+    "Anime": "anime, vibrant colors, cel shading, clean lineart",
+    "Comic": "comic book style, halftone, bold lines, dramatic shading",
+    "Watercolor": "watercolor painting, soft edges, gentle colors, textured paper",
+    "Sketch": "pencil sketch, cross-hatching, grayscale, paper texture",
+}
+NEGATIVE = "nsfw, nudity, gore, deformed, extra limbs, low quality, blurry, worst quality, lowres, text artifacts, watermark, logo"
+# ------------------ Loaders ------------------
+def get_txtgen_cpu():
+    """Load text generator on CPU (ZeroGPU-safe)."""
     global _txtgen
     if _txtgen is None:
         _txtgen = pipeline("text-generation", model="distilgpt2", device=-1)
     return _txtgen
+def get_t2i_cpu():
+    """CPU Stable Diffusion pipeline (fallback)."""
+    global _t2i_cpu
+    if _t2i_cpu is None:
+        _t2i_cpu = StableDiffusionPipeline.from_pretrained(
+            "stabilityai/sd-turbo",
+            torch_dtype=torch.float32,
+            safety_checker=None,
+        )
+        _t2i_cpu.enable_attention_slicing()
+    return _t2i_cpu
+# ------------------ GPU path (ZeroGPU) ------------------
+@GPU(duration=120)
+def t2i_generate_batch_gpu(prompts, width, height, steps, guidance, negative_prompt, seed=None):
+    """Runs inside a GPU-allocated context (ZeroGPU)."""
+    pipe = StableDiffusionPipeline.from_pretrained(
+        "stabilityai/sd-turbo",
+        torch_dtype=torch.float16,
+        safety_checker=None,
+    ).to("cuda")
+    generator = torch.Generator(device="cuda")
+    if seed is not None and str(seed).strip().isdigit():
+        generator = generator.manual_seed(int(seed))
+    images = []
+    for p in prompts:
+        img = pipe(
+            prompt=p,
+            negative_prompt=negative_prompt,
+            num_inference_steps=steps,
+            guidance_scale=guidance,
+            width=width,
+            height=height,
+            generator=generator,
+        ).images[0]
+        images.append(img)
+    return images
+# ------------------ Helpers ------------------
+def build_prompt(user_prompt: str, style: str, panel_idx: int, num_panels: int) -> str:
+    style_desc = STYLE_PRESETS.get(style, "")
+    beat = ["opening shot", "rising action", "key moment", "twist", "resolution"]
+    beat_text = beat[min(panel_idx, len(beat) - 1)]
+    return f"{user_prompt}, {style_desc}, storyboard panel {panel_idx+1} of {num_panels}, {beat_text}, cinematic composition, wide shot"
+def generate_captions(user_prompt: str, n: int = 3):
+    gen = get_txtgen_cpu()
+    # Simple, fast prompts; keep it short
     outputs = []
+    for i in range(n):
+        text = gen(
+            f"Write a very short scene caption (<=10 words) about: {user_prompt}",
+            max_new_tokens=30,
+            do_sample=True,
+            temperature=0.9,
+            top_p=0.95,
+            num_return_sequences=1,
+        )[0]["generated_text"].strip()
+        # Fallback if something weird comes out
+        if not text or len(text.split()) < 2:
+            text = f"Scene {i+1}"
+        outputs.append(text[:80])
     return outputs
+def add_caption_strip(img: Image.Image, text: str, width_hint: int) -> Image.Image:
+    """Add a black strip with white text at the bottom. Uses textbbox (Pillow>=10)."""
+    out = img.copy()
+    draw = ImageDraw.Draw(out)
+    try:
+        font = ImageFont.truetype("DejaVuSans.ttf", size=max(16, width_hint // 28))
+    except Exception:
+        font = ImageFont.load_default()
+    bbox = draw.textbbox((0, 0), text, font=font)
+    text_w = bbox[2] - bbox[0]
+    text_h = bbox[3] - bbox[1]
+    strip_h = text_h + 14
+    strip = Image.new("RGB", (out.width, strip_h), (0, 0, 0))
+    d2 = ImageDraw.Draw(strip)
+    d2.text(((out.width - text_w) // 2, 7), text, font=font, fill=(255, 255, 255))
+    combined = Image.new("RGB", (out.width, out.height + strip_h), (0, 0, 0))
+    combined.paste(out, (0, 0))
+    combined.paste(strip, (0, out.height))
+    return combined
+def images_to_pdf_with_fpdf(images):
+    """Write a simple multipage PDF using FPDF."""
+    if not images:
+        return None
     pdf_path = tempfile.mktemp(suffix=".pdf")
     pdf = FPDF()
+    for img in images:
+        # Save temp PNG to insert in PDF
+        tmp = tempfile.mktemp(suffix=".png")
+        img.save(tmp)
         pdf.add_page()
+        # Fit the image nicely within margins
+        pdf.image(tmp, x=10, y=10, w=190)
     pdf.output(pdf_path)
+    return pdf_path
+# ------------------ Core logic ------------------
+def create_storyboard(user_prompt, style, num_panels, width, height, seed):
+    if not user_prompt or not user_prompt.strip():
+        return [], None
+    # Build prompts + captions
+    captions = generate_captions(user_prompt, n=num_panels)
+    prompts = [build_prompt(user_prompt, style, i, num_panels) for i in range(num_panels)]
+    # Try GPU (ZeroGPU). If it fails (no GPU), fallback to CPU.
+    images = None
+    try:
+        images = t2i_generate_batch_gpu(prompts, width, height, steps=2, guidance=0.0,
+                                        negative_prompt=NEGATIVE, seed=seed)
+    except Exception:
+        # GPU not available → CPU fallback (slower)
+        pipe = get_t2i_cpu()
+        images = []
+        # No seed control on CPU path by default; can be added with torch.Generator("cpu")
+        for p in prompts:
+            img = pipe(
+                prompt=p,
+                negative_prompt=NEGATIVE,
+                num_inference_steps=4,
+                guidance_scale=0.0,
+                width=width,
+                height=height,
+            ).images[0]
+            images.append(img)
+    # Add caption strips
+    final_images = [add_caption_strip(img, cap, width_hint=width) for img, cap in zip(images, captions)]
+    # Build PDF
+    pdf_path = images_to_pdf_with_fpdf(final_images)
     return final_images, pdf_path
+# ------------------ UI ------------------
+with gr.Blocks(title="AI Storyboard Creator") as demo:
     gr.Markdown(
         """
         # 🎬 AI Storyboard Creator
+        Turn a single prompt into a mini storyboard: 3–6 panels, captions, and a downloadable PDF.
+        Works on **CPU basic** and supports **ZeroGPU** (GPU on-demand).
         """
     )
     with gr.Row():
         with gr.Column():
             prompt = gr.Textbox(label="Story prompt", placeholder="A cyberpunk detective in the rain", lines=2)
+            style = gr.Dropdown(choices=list(STYLE_PRESETS.keys()), value="Comic", label="Style")
             num_panels = gr.Slider(3, 6, value=3, step=1, label="Number of panels")
             width = gr.Slider(384, 768, value=448, step=64, label="Panel width (px)")
             height = gr.Slider(384, 768, value=448, step=64, label="Panel height (px)")
             seed = gr.Textbox(label="Seed (optional)", placeholder="e.g., 42")
             run_btn = gr.Button("Create Storyboard")
         with gr.Column():
+            # NOTE: no .style(); use columns=2 instead
+            gallery = gr.Gallery(label="Preview (grid)", columns=2, height="auto")
             pdf_file = gr.File(label="Download PDF")
     run_btn.click(
         create_storyboard,
         inputs=[prompt, style, num_panels, width, height, seed],
+        outputs=[gallery, pdf_file],
     )
 if __name__ == "__main__":