maldons77 commited on
Commit
eba2043
·
verified ·
1 Parent(s): d1f998c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -81
app.py CHANGED
@@ -1,123 +1,213 @@
1
- import os
2
  import gradio as gr
3
- from transformers import pipeline
4
- from diffusers import StableDiffusionPipeline
5
  from PIL import Image, ImageDraw, ImageFont
6
  import torch
7
- import tempfile
8
- from fpdf import FPDF
9
- import spaces
10
-
11
- # Global caches
12
- _txtgen = None
13
- _t2i_pipe = None
14
-
15
- # GPU function for image generation
16
- @spaces.GPU(duration=60)
17
- def t2i_generate(prompt, style, width, height, num_images, seed=None):
18
- global _t2i_pipe
19
- if _t2i_pipe is None:
20
- model_id = "stabilityai/sd-turbo"
21
- _t2i_pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
22
- _t2i_pipe.to("cuda")
23
- generator = torch.Generator(device="cuda")
24
- if seed:
25
- generator = generator.manual_seed(int(seed))
26
- images = _t2i_pipe(
27
- f"{style} style, {prompt}",
28
- num_inference_steps=2,
29
- guidance_scale=0.0,
30
- width=width,
31
- height=height,
32
- generator=generator,
33
- num_images_per_prompt=num_images
34
- ).images
35
- return images
36
 
37
- # CPU function for text generation (always on CPU for ZeroGPU compatibility)
38
- def get_txtgen():
 
 
39
  global _txtgen
40
  if _txtgen is None:
41
  _txtgen = pipeline("text-generation", model="distilgpt2", device=-1)
42
  return _txtgen
43
 
44
- def generate_captions(prompt, n=3):
45
- gen = get_txtgen()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  outputs = []
47
- for _ in range(n):
48
- text = gen(f"{prompt} - scene description:", max_length=30, num_return_sequences=1)[0]['generated_text']
49
- outputs.append(text.strip())
 
 
 
 
 
 
 
 
 
 
50
  return outputs
51
 
52
- # Storyboard PDF creation
53
- def create_storyboard(user_prompt, style, num_panels, width, height, seed):
54
- # Generate captions
55
- captions = generate_captions(user_prompt, n=num_panels)
56
 
57
- # Generate images on GPU
58
- images = t2i_generate(user_prompt, style, width, height, num_panels, seed)
59
-
60
- # Add captions to images
61
- final_images = []
62
- for img, text in zip(images, captions):
63
- img_with_text = img.copy()
64
- draw = ImageDraw.Draw(img_with_text)
65
- try:
66
- font = ImageFont.truetype("arial.ttf", 20)
67
- except:
68
- font = ImageFont.load_default()
69
-
70
- # FIX: textsize -> textbbox for Pillow >= 10
71
- bbox = draw.textbbox((0, 0), text, font=font)
72
- text_w = bbox[2] - bbox[0]
73
- text_h = bbox[3] - bbox[1]
74
-
75
- x = (img_with_text.width - text_w) / 2
76
- y = img_with_text.height - text_h - 10
77
- draw.rectangle([x-5, y-5, x+text_w+5, y+text_h+5], fill=(0, 0, 0, 127))
78
- draw.text((x, y), text, font=font, fill=(255, 255, 255))
79
- final_images.append(img_with_text)
80
-
81
- # Save PDF
 
 
 
82
  pdf_path = tempfile.mktemp(suffix=".pdf")
83
  pdf = FPDF()
84
- for img in final_images:
85
- temp_path = tempfile.mktemp(suffix=".png")
86
- img.save(temp_path)
 
87
  pdf.add_page()
88
- pdf.image(temp_path, x=10, y=10, w=180)
 
89
  pdf.output(pdf_path)
 
 
 
 
 
 
 
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  return final_images, pdf_path
92
 
93
- # Gradio UI
94
- with gr.Blocks() as demo:
 
95
  gr.Markdown(
96
  """
97
  # 🎬 AI Storyboard Creator
98
- Generate a short storyboard from a single prompt, with captions and a downloadable PDF.
99
- Works with CPU basic and ZeroGPU.
100
  """
101
  )
102
-
103
  with gr.Row():
104
  with gr.Column():
105
  prompt = gr.Textbox(label="Story prompt", placeholder="A cyberpunk detective in the rain", lines=2)
106
- style = gr.Dropdown(["Realistic", "Anime", "Comic", "Watercolor"], value="Realistic", label="Style")
107
  num_panels = gr.Slider(3, 6, value=3, step=1, label="Number of panels")
108
  width = gr.Slider(384, 768, value=448, step=64, label="Panel width (px)")
109
  height = gr.Slider(384, 768, value=448, step=64, label="Panel height (px)")
110
  seed = gr.Textbox(label="Seed (optional)", placeholder="e.g., 42")
111
  run_btn = gr.Button("Create Storyboard")
112
-
113
  with gr.Column():
114
- gallery = gr.Gallery(label="Preview (grid)").style(grid=[2], height="auto")
 
115
  pdf_file = gr.File(label="Download PDF")
116
 
117
  run_btn.click(
118
  create_storyboard,
119
  inputs=[prompt, style, num_panels, width, height, seed],
120
- outputs=[gallery, pdf_file]
121
  )
122
 
123
  if __name__ == "__main__":
 
1
+ import os, io, math, tempfile
2
  import gradio as gr
 
 
3
  from PIL import Image, ImageDraw, ImageFont
4
  import torch
5
+ from transformers import pipeline
6
+ from diffusers import StableDiffusionPipeline
7
+ from spaces import GPU # ZeroGPU support
8
+ from fpdf import FPDF # make sure requirements.txt includes: fpdf==1.7.2
9
+
10
+ # Avoid tokenizers parallelism warning after fork
11
+ os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
12
+
13
+ # ------------------ Globals (CPU-safe) ------------------
14
+ _txtgen = None # text generator stays on CPU
15
+ _t2i_cpu = None # CPU fallback pipeline
16
+
17
+ STYLE_PRESETS = {
18
+ "Realistic": "realistic photography, finely detailed, natural lighting, 35mm",
19
+ "Anime": "anime, vibrant colors, cel shading, clean lineart",
20
+ "Comic": "comic book style, halftone, bold lines, dramatic shading",
21
+ "Watercolor": "watercolor painting, soft edges, gentle colors, textured paper",
22
+ "Sketch": "pencil sketch, cross-hatching, grayscale, paper texture",
23
+ }
24
+ NEGATIVE = "nsfw, nudity, gore, deformed, extra limbs, low quality, blurry, worst quality, lowres, text artifacts, watermark, logo"
 
 
 
 
 
 
 
 
 
25
 
26
+
27
+ # ------------------ Loaders ------------------
28
+ def get_txtgen_cpu():
29
+ """Load text generator on CPU (ZeroGPU-safe)."""
30
  global _txtgen
31
  if _txtgen is None:
32
  _txtgen = pipeline("text-generation", model="distilgpt2", device=-1)
33
  return _txtgen
34
 
35
+
36
+ def get_t2i_cpu():
37
+ """CPU Stable Diffusion pipeline (fallback)."""
38
+ global _t2i_cpu
39
+ if _t2i_cpu is None:
40
+ _t2i_cpu = StableDiffusionPipeline.from_pretrained(
41
+ "stabilityai/sd-turbo",
42
+ torch_dtype=torch.float32,
43
+ safety_checker=None,
44
+ )
45
+ _t2i_cpu.enable_attention_slicing()
46
+ return _t2i_cpu
47
+
48
+
49
+ # ------------------ GPU path (ZeroGPU) ------------------
50
+ @GPU(duration=120)
51
+ def t2i_generate_batch_gpu(prompts, width, height, steps, guidance, negative_prompt, seed=None):
52
+ """Runs inside a GPU-allocated context (ZeroGPU)."""
53
+ pipe = StableDiffusionPipeline.from_pretrained(
54
+ "stabilityai/sd-turbo",
55
+ torch_dtype=torch.float16,
56
+ safety_checker=None,
57
+ ).to("cuda")
58
+
59
+ generator = torch.Generator(device="cuda")
60
+ if seed is not None and str(seed).strip().isdigit():
61
+ generator = generator.manual_seed(int(seed))
62
+
63
+ images = []
64
+ for p in prompts:
65
+ img = pipe(
66
+ prompt=p,
67
+ negative_prompt=negative_prompt,
68
+ num_inference_steps=steps,
69
+ guidance_scale=guidance,
70
+ width=width,
71
+ height=height,
72
+ generator=generator,
73
+ ).images[0]
74
+ images.append(img)
75
+ return images
76
+
77
+
78
+ # ------------------ Helpers ------------------
79
+ def build_prompt(user_prompt: str, style: str, panel_idx: int, num_panels: int) -> str:
80
+ style_desc = STYLE_PRESETS.get(style, "")
81
+ beat = ["opening shot", "rising action", "key moment", "twist", "resolution"]
82
+ beat_text = beat[min(panel_idx, len(beat) - 1)]
83
+ return f"{user_prompt}, {style_desc}, storyboard panel {panel_idx+1} of {num_panels}, {beat_text}, cinematic composition, wide shot"
84
+
85
+
86
+ def generate_captions(user_prompt: str, n: int = 3):
87
+ gen = get_txtgen_cpu()
88
+ # Simple, fast prompts; keep it short
89
  outputs = []
90
+ for i in range(n):
91
+ text = gen(
92
+ f"Write a very short scene caption (<=10 words) about: {user_prompt}",
93
+ max_new_tokens=30,
94
+ do_sample=True,
95
+ temperature=0.9,
96
+ top_p=0.95,
97
+ num_return_sequences=1,
98
+ )[0]["generated_text"].strip()
99
+ # Fallback if something weird comes out
100
+ if not text or len(text.split()) < 2:
101
+ text = f"Scene {i+1}"
102
+ outputs.append(text[:80])
103
  return outputs
104
 
 
 
 
 
105
 
106
+ def add_caption_strip(img: Image.Image, text: str, width_hint: int) -> Image.Image:
107
+ """Add a black strip with white text at the bottom. Uses textbbox (Pillow>=10)."""
108
+ out = img.copy()
109
+ draw = ImageDraw.Draw(out)
110
+ try:
111
+ font = ImageFont.truetype("DejaVuSans.ttf", size=max(16, width_hint // 28))
112
+ except Exception:
113
+ font = ImageFont.load_default()
114
+
115
+ bbox = draw.textbbox((0, 0), text, font=font)
116
+ text_w = bbox[2] - bbox[0]
117
+ text_h = bbox[3] - bbox[1]
118
+ strip_h = text_h + 14
119
+
120
+ strip = Image.new("RGB", (out.width, strip_h), (0, 0, 0))
121
+ d2 = ImageDraw.Draw(strip)
122
+ d2.text(((out.width - text_w) // 2, 7), text, font=font, fill=(255, 255, 255))
123
+
124
+ combined = Image.new("RGB", (out.width, out.height + strip_h), (0, 0, 0))
125
+ combined.paste(out, (0, 0))
126
+ combined.paste(strip, (0, out.height))
127
+ return combined
128
+
129
+
130
+ def images_to_pdf_with_fpdf(images):
131
+ """Write a simple multipage PDF using FPDF."""
132
+ if not images:
133
+ return None
134
  pdf_path = tempfile.mktemp(suffix=".pdf")
135
  pdf = FPDF()
136
+ for img in images:
137
+ # Save temp PNG to insert in PDF
138
+ tmp = tempfile.mktemp(suffix=".png")
139
+ img.save(tmp)
140
  pdf.add_page()
141
+ # Fit the image nicely within margins
142
+ pdf.image(tmp, x=10, y=10, w=190)
143
  pdf.output(pdf_path)
144
+ return pdf_path
145
+
146
+
147
+ # ------------------ Core logic ------------------
148
+ def create_storyboard(user_prompt, style, num_panels, width, height, seed):
149
+ if not user_prompt or not user_prompt.strip():
150
+ return [], None
151
 
152
+ # Build prompts + captions
153
+ captions = generate_captions(user_prompt, n=num_panels)
154
+ prompts = [build_prompt(user_prompt, style, i, num_panels) for i in range(num_panels)]
155
+
156
+ # Try GPU (ZeroGPU). If it fails (no GPU), fallback to CPU.
157
+ images = None
158
+ try:
159
+ images = t2i_generate_batch_gpu(prompts, width, height, steps=2, guidance=0.0,
160
+ negative_prompt=NEGATIVE, seed=seed)
161
+ except Exception:
162
+ # GPU not available → CPU fallback (slower)
163
+ pipe = get_t2i_cpu()
164
+ images = []
165
+ # No seed control on CPU path by default; can be added with torch.Generator("cpu")
166
+ for p in prompts:
167
+ img = pipe(
168
+ prompt=p,
169
+ negative_prompt=NEGATIVE,
170
+ num_inference_steps=4,
171
+ guidance_scale=0.0,
172
+ width=width,
173
+ height=height,
174
+ ).images[0]
175
+ images.append(img)
176
+
177
+ # Add caption strips
178
+ final_images = [add_caption_strip(img, cap, width_hint=width) for img, cap in zip(images, captions)]
179
+ # Build PDF
180
+ pdf_path = images_to_pdf_with_fpdf(final_images)
181
  return final_images, pdf_path
182
 
183
+
184
+ # ------------------ UI ------------------
185
+ with gr.Blocks(title="AI Storyboard Creator") as demo:
186
  gr.Markdown(
187
  """
188
  # 🎬 AI Storyboard Creator
189
+ Turn a single prompt into a mini storyboard: 3–6 panels, captions, and a downloadable PDF.
190
+ Works on **CPU basic** and supports **ZeroGPU** (GPU on-demand).
191
  """
192
  )
 
193
  with gr.Row():
194
  with gr.Column():
195
  prompt = gr.Textbox(label="Story prompt", placeholder="A cyberpunk detective in the rain", lines=2)
196
+ style = gr.Dropdown(choices=list(STYLE_PRESETS.keys()), value="Comic", label="Style")
197
  num_panels = gr.Slider(3, 6, value=3, step=1, label="Number of panels")
198
  width = gr.Slider(384, 768, value=448, step=64, label="Panel width (px)")
199
  height = gr.Slider(384, 768, value=448, step=64, label="Panel height (px)")
200
  seed = gr.Textbox(label="Seed (optional)", placeholder="e.g., 42")
201
  run_btn = gr.Button("Create Storyboard")
 
202
  with gr.Column():
203
+ # NOTE: no .style(); use columns=2 instead
204
+ gallery = gr.Gallery(label="Preview (grid)", columns=2, height="auto")
205
  pdf_file = gr.File(label="Download PDF")
206
 
207
  run_btn.click(
208
  create_storyboard,
209
  inputs=[prompt, style, num_panels, width, height, seed],
210
+ outputs=[gallery, pdf_file],
211
  )
212
 
213
  if __name__ == "__main__":