shreyansjain commited on
Commit
ae41f1f
·
1 Parent(s): d9324f5

initial commit

Browse files
Files changed (2) hide show
  1. app.py +397 -0
  2. persona_annotator_sample.json +0 -0
app.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import random
4
+ import os
5
+ from typing import List, Dict, Any, Optional
6
+
7
+ # -----------------------------
8
+ # Available JSON files (persona datasets)
9
+ # -----------------------------
10
+ available_files = [
11
+ "persona_annotator_sample.json"
12
+ ]
13
+
14
+ data = []
15
+ index = 0
16
+ current_file = None
17
+
18
+ ICONS = {
19
+ "header": "👤",
20
+ "categories": "🏷️",
21
+ "presenting": "🚩",
22
+ "clinical": "🩺",
23
+ "history": "📜",
24
+ "functioning": "🔧",
25
+ "summary": "🧾",
26
+ "context": "🧩",
27
+ "metadata": "🔖",
28
+ "other": "🗂️",
29
+ }
30
+
31
+ SECTION_FIELDS = {
32
+ "header": [
33
+ "name", "archetype", "age", "sex", "location",
34
+ "education_level", "bachelors_field", "ethnic_background", "marital_status",
35
+ "version"
36
+ ],
37
+ "categories": ["appearance_category", "behavior_category"],
38
+ "presenting": ["presenting_problems"],
39
+ "clinical": ["appearance", "behavior", "mood_affect", "speech",
40
+ "thought_content", "insight_judgment", "cognition"],
41
+ "history": ["medical_developmental_history", "family_history", "educational_vocational_history"],
42
+ "functioning": ["emotional_behavioral_functioning", "social_functioning"],
43
+ "summary": ["summary_of_psychological_profile"],
44
+ "context": ["archetype_description", "memoir", "memoir_summary", "memoir_narrative"],
45
+ "metadata": ["uid"],
46
+ }
47
+
48
+ # -----------------------------
49
+ # Persistent storage path
50
+ # -----------------------------
51
+ PERSISTENT_DIR = "/home/user/app/storage"
52
+ if os.path.exists(PERSISTENT_DIR):
53
+ STORAGE_DIR = PERSISTENT_DIR
54
+ else:
55
+ STORAGE_DIR = "."
56
+ os.makedirs(STORAGE_DIR, exist_ok=True)
57
+ ANNOTATION_FILE = os.path.join(STORAGE_DIR, "persona_annotations.jsonl")
58
+
59
+ # -----------------------------
60
+ # Core functions
61
+ # -----------------------------
62
+
63
+ def _get(entry: Dict[str, Any], key: str, default: str = "—") -> str:
64
+ v = entry.get(key, default)
65
+ if v is None:
66
+ return default
67
+ if isinstance(v, (list, dict)):
68
+ try:
69
+ return json.dumps(v, ensure_ascii=False)
70
+ except Exception:
71
+ return str(v)
72
+ return str(v).strip()
73
+
74
+ def _truncate(s: str, limit: int = 2000) -> str:
75
+ s = s or ""
76
+ return (s[:limit] + " …") if len(s) > limit else s
77
+
78
+
79
+ def load_file(file_name):
80
+ """Load selected JSON file and show first/random entry"""
81
+ global data, index, current_file
82
+ current_file = file_name
83
+ with open(file_name, "r", encoding="utf-8") as f:
84
+ data = json.load(f)
85
+ index = random.randint(0, len(data) - 1)
86
+ return show_entry()
87
+
88
+
89
+ def save_annotation(p_hash, *scores_and_comments):
90
+ """Save annotations to persistent storage as JSONL (with file name)"""
91
+ ann = {
92
+ "file_name": current_file,
93
+ "hash_id": p_hash,
94
+ "annotations": {}
95
+ }
96
+
97
+ rubric_fields = [
98
+ "clarity", "originality", "coherence", "diversity", "realism",
99
+ "psychological_depth", "consistency", "informativeness",
100
+ "ethical_considerations", "demographic_fidelity", "overall_score"
101
+ ]
102
+
103
+ for field, value in zip(rubric_fields, scores_and_comments):
104
+ ann["annotations"][field] = value
105
+
106
+ with open(ANNOTATION_FILE, "a", encoding="utf-8") as f:
107
+ f.write(json.dumps(ann, ensure_ascii=False) + "\n")
108
+
109
+ return f"✅ Saved annotation for {p_hash} (from {current_file}) → {ANNOTATION_FILE}"
110
+
111
+ def export_annotations():
112
+ """Return path to annotations file for download"""
113
+ if os.path.exists(ANNOTATION_FILE):
114
+ return ANNOTATION_FILE
115
+ else:
116
+ with open(ANNOTATION_FILE, "w", encoding="utf-8") as f:
117
+ pass
118
+ return ANNOTATION_FILE
119
+
120
+
121
+ def md_header(entry: Dict[str, Any]) -> str:
122
+ name = _get(entry, "name")
123
+ archetype = _get(entry, "archetype")
124
+ age = _get(entry, "age")
125
+ sex = _get(entry, "sex")
126
+ location = _get(entry, "location")
127
+ education_level = _get(entry, "education_level")
128
+ bachelors_field = _get(entry, "bachelors_field")
129
+ ethnic_background = _get(entry, "ethnic_background")
130
+ marital_status = _get(entry, "marital_status")
131
+ version = _get(entry, "version")
132
+ return (
133
+ f"## {ICONS['header']} Persona\n"
134
+ f"**Name:** {name} \n"
135
+ f"**Archetype:** {archetype} \n"
136
+ f"**Age:** {age} \n"
137
+ f"**Sex:** {sex} \n"
138
+ f"**Location:** {location} \n"
139
+ f"**Education Level:** {education_level} \n"
140
+ f"**Bachelor’s Field:** {bachelors_field} \n"
141
+ f"**Ethnic Background:** {ethnic_background} \n"
142
+ f"**Marital Status:** {marital_status} \n"
143
+ f"**Version:** {version}"
144
+ )
145
+
146
+ def md_categories(entry: Dict[str, Any]) -> str:
147
+ app_cat = _get(entry, "appearance_category")
148
+ beh_cat = _get(entry, "behavior_category")
149
+ return (
150
+ f"## {ICONS['categories']} Categories\n"
151
+ f"**Appearance Category:** {app_cat} \n"
152
+ f"**Behavior Category:** {beh_cat}"
153
+ )
154
+
155
+ def md_presenting(entry: Dict[str, Any]) -> str:
156
+ raw = entry.get("presenting_problems")
157
+ items: List[str] = []
158
+ if isinstance(raw, list):
159
+ items = [str(x).strip() for x in raw if str(x).strip()]
160
+ elif isinstance(raw, str) and raw.strip():
161
+ try:
162
+ parsed = json.loads(raw)
163
+ if isinstance(parsed, list):
164
+ items = [str(x).strip() for x in parsed if str(x).strip()]
165
+ else:
166
+ items = [x.strip() for x in raw.split(";") if x.strip()]
167
+ except Exception:
168
+ items = [x.strip() for x in raw.split(";") if x.strip()]
169
+ bullets = "\n".join(f"- {x}" for x in items) if items else "—"
170
+ return f"## {ICONS['presenting']} Presenting Problems\n{bullets}"
171
+
172
+ def md_clinical(entry: Dict[str, Any]) -> str:
173
+ blocks = []
174
+ mapping = [
175
+ ("appearance", "Appearance"),
176
+ ("behavior", "Behavior"),
177
+ ("mood_affect", "Mood / Affect"),
178
+ ("speech", "Speech"),
179
+ ("thought_content", "Thought Content"),
180
+ ("insight_judgment", "Insight & Judgment"),
181
+ ("cognition", "Cognition"),
182
+ ]
183
+ for k, label in mapping:
184
+ v = entry.get(k)
185
+ if isinstance(v, str) and v.strip():
186
+ blocks.append(f"**{label}**\n{_truncate(v)}")
187
+ return f"## {ICONS['clinical']} Clinical Observations\n" + ("\n\n".join(blocks) if blocks else "—")
188
+
189
+ def md_history(entry: Dict[str, Any]) -> str:
190
+ blocks = []
191
+ mapping = [
192
+ ("medical_developmental_history", "Medical / Developmental History"),
193
+ ("family_history", "Family History"),
194
+ ("educational_vocational_history", "Educational / Vocational History"),
195
+ ]
196
+ for k, label in mapping:
197
+ v = entry.get(k)
198
+ if isinstance(v, str) and v.strip():
199
+ blocks.append(f"**{label}**\n{_truncate(v)}")
200
+ return f"## {ICONS['history']} Life History\n" + ("\n\n".join(blocks) if blocks else "—")
201
+
202
+ def md_functioning(entry: Dict[str, Any]) -> str:
203
+ blocks = []
204
+ mapping = [
205
+ ("emotional_behavioral_functioning", "Emotional / Behavioral Functioning"),
206
+ ("social_functioning", "Social Functioning"),
207
+ ]
208
+ for k, label in mapping:
209
+ v = entry.get(k)
210
+ if isinstance(v, str) and v.strip():
211
+ blocks.append(f"**{label}**\n{_truncate(v)}")
212
+ return f"## {ICONS['functioning']} Functioning\n" + ("\n\n".join(blocks) if blocks else "—")
213
+
214
+ def md_summary(entry: Dict[str, Any]) -> str:
215
+ v = entry.get("summary_of_psychological_profile")
216
+ body = _truncate(v) if isinstance(v, str) and v.strip() else "—"
217
+ return f"## {ICONS['summary']} Summary\n{body}"
218
+
219
+ def md_context(entry: Dict[str, Any]) -> str:
220
+ arch_desc = entry.get("archetype_description") or entry.get("archetype_summary") or "—"
221
+ memoir_title = entry.get("memoir")
222
+ memoir_summary = entry.get("memoir_summary")
223
+ memoir_narr = entry.get("memoir_narrative")
224
+
225
+ title_line = f"**Memoir:** {memoir_title}\n\n" if isinstance(memoir_title, str) and memoir_title.strip() else ""
226
+ sum_line = f"**Memoir Summary**\n{_truncate(memoir_summary)}\n\n" if isinstance(memoir_summary, str) and memoir_summary.strip() else ""
227
+ narr_line = f"**Memoir Narrative**\n{_truncate(memoir_narr)}" if isinstance(memoir_narr, str) and memoir_narr.strip() else "—"
228
+
229
+ return (
230
+ f"## {ICONS['context']} Context\n"
231
+ f"**Archetype Description**\n{_truncate(str(arch_desc)) if isinstance(arch_desc, str) else '—'}\n\n"
232
+ f"{title_line}{sum_line}{narr_line}"
233
+ )
234
+
235
+ def md_metadata(entry: Dict[str, Any]) -> str:
236
+ uid = _get(entry, "uid")
237
+ return f"## {ICONS['metadata']} Metadata\n**UID:** {uid}"
238
+
239
+ def md_other_fields(entry: Dict[str, Any]) -> str:
240
+ # Show any extra keys (e.g., concat_field, concat_embedding) not covered elsewhere
241
+ known = set().union(*SECTION_FIELDS.values())
242
+ other_keys = [k for k in entry.keys() if k not in known]
243
+ if not other_keys:
244
+ return f"## {ICONS['other']} Other Fields\n—"
245
+ pairs = []
246
+ for k in sorted(other_keys):
247
+ v = entry.get(k)
248
+ if isinstance(v, (dict, list)):
249
+ try:
250
+ s = json.dumps(v, ensure_ascii=False)
251
+ except Exception:
252
+ s = str(v)
253
+ else:
254
+ s = str(v) if v is not None else ""
255
+ pairs.append(f"- **{k}:** {_truncate(s)}")
256
+ return f"## {ICONS['other']} Other Fields\n" + ("\n".join(pairs) if pairs else "—")
257
+
258
+ def show_entry(step=None):
259
+ """Navigate entries and show persona entry"""
260
+ global index, data
261
+ if not data:
262
+ return "", ""
263
+
264
+ if step == "Next":
265
+ index = (index + 1) % len(data)
266
+ elif step == "Previous":
267
+ index = (index - 1) % len(data)
268
+ elif step == "Random Shuffle":
269
+ index = random.randint(0, len(data) - 1) % len(data)
270
+
271
+ entry = data[index]
272
+ p_hash = entry.get("uuid", f"persona_{index}")
273
+
274
+ if not entry:
275
+ empty = "_No data_"
276
+ # diagram HTML, then the sections
277
+ return ["", empty, empty, empty, empty, empty, empty, empty, empty, empty]
278
+
279
+ return [
280
+ p_hash,
281
+ md_header(entry),
282
+ md_categories(entry),
283
+ md_presenting(entry),
284
+ md_clinical(entry),
285
+ md_history(entry),
286
+ md_functioning(entry),
287
+ md_summary(entry),
288
+ md_context(entry),
289
+ md_metadata(entry),
290
+ md_other_fields(entry),
291
+ ]
292
+
293
+ # persona_str = entry.get("persona_string", "").replace("\n", "<br>")
294
+ # archetype = entry.get("archetype", "N/A")
295
+ # persona_md = f"### 👤 Persona Summary\n**Archetype:** {archetype}\n\n{persona_str}"
296
+
297
+ # -----------------------------
298
+ # Gradio UI
299
+ # -----------------------------
300
+ with gr.Blocks() as demo:
301
+ gr.Markdown("## Persona Annotation Tool")
302
+
303
+ # File selection dropdown
304
+ file_dropdown = gr.Dropdown(
305
+ choices=available_files,
306
+ value=available_files[0],
307
+ label="Select Persona JSON File"
308
+ )
309
+
310
+ with gr.Row():
311
+ prev_btn = gr.Button("Previous")
312
+ next_btn = gr.Button("Next")
313
+ shuffle_btn = gr.Button("Random Shuffle")
314
+
315
+ phash_out = gr.Textbox(label="Persona Hash ID", interactive=False)
316
+ # persona_out = gr.Markdown(label="Persona Description")
317
+ md_header_out = gr.Markdown()
318
+ md_cats_out = gr.Markdown()
319
+ md_present_out = gr.Markdown()
320
+ md_clinical_out = gr.Markdown()
321
+ md_history_out = gr.Markdown()
322
+ md_function_out = gr.Markdown()
323
+ md_summary_out = gr.Markdown()
324
+ md_context_out = gr.Markdown()
325
+ md_meta_out = gr.Markdown()
326
+ md_other_out = gr.Markdown()
327
+
328
+ gr.Markdown("### Evaluation Rubric (0 = Worst, 5 = Best)")
329
+
330
+ choices = [str(i) for i in range(6)]
331
+
332
+ clarity = gr.Dropdown(choices=choices, label="Clarity", value=None)
333
+ originality = gr.Dropdown(choices=choices, label="Originality", value=None)
334
+ coherence = gr.Dropdown(choices=choices, label="Coherence", value=None)
335
+ diversity = gr.Dropdown(choices=choices, label="Diversity", value=None)
336
+ realism = gr.Dropdown(choices=choices, label="Realism", value=None)
337
+ psychological_depth = gr.Dropdown(choices=choices, label="Psychological Depth (focus metric)", value=None)
338
+ consistency = gr.Dropdown(choices=choices, label="Consistency", value=None)
339
+ informativeness = gr.Dropdown(choices=choices, label="Informativeness", value=None)
340
+ ethical_considerations = gr.Dropdown(choices=choices, label="Ethical Considerations (0–5)", value=None)
341
+ demographic_fidelity = gr.Dropdown(choices=choices, label="Demographic Fidelity", value=None)
342
+ overall_score = gr.Dropdown(choices=choices, label="Overall Score", value=None)
343
+
344
+ save_btn = gr.Button("Save Annotation")
345
+ save_status = gr.Textbox(label="Status", interactive=False)
346
+
347
+ with gr.Row():
348
+ export_btn = gr.Button("Download All Annotations")
349
+ export_file = gr.File(label="Exported Annotations", type="filepath")
350
+
351
+ # Wiring
352
+ file_dropdown.change(
353
+ load_file,
354
+ inputs=file_dropdown,
355
+ outputs=[phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
356
+ md_history_out, md_function_out, md_summary_out, md_context_out, md_meta_out, md_other_out]
357
+ )
358
+
359
+ prev_btn.click(
360
+ show_entry,
361
+ inputs=gr.State("Previous"),
362
+ outputs=[phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
363
+ md_history_out, md_function_out, md_summary_out, md_context_out, md_meta_out, md_other_out]
364
+ )
365
+
366
+ next_btn.click(
367
+ show_entry,
368
+ inputs=gr.State("Next"),
369
+ outputs=[phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
370
+ md_history_out, md_function_out, md_summary_out, md_context_out, md_meta_out, md_other_out]
371
+ )
372
+
373
+ shuffle_btn.click(
374
+ show_entry,
375
+ inputs=gr.State("Random Shuffle"),
376
+ outputs=[phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
377
+ md_history_out, md_function_out, md_summary_out, md_context_out, md_meta_out, md_other_out]
378
+ )
379
+
380
+ save_btn.click(
381
+ save_annotation,
382
+ inputs=[phash_out, clarity, originality, coherence, diversity, realism,
383
+ psychological_depth, consistency, informativeness,
384
+ ethical_considerations, demographic_fidelity, overall_score],
385
+ outputs=save_status
386
+ )
387
+
388
+ export_btn.click(export_annotations, inputs=None, outputs=export_file)
389
+
390
+ demo.load(
391
+ load_file,
392
+ inputs=gr.State(available_files[0]),
393
+ outputs=[phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
394
+ md_history_out, md_function_out, md_summary_out, md_context_out, md_meta_out, md_other_out]
395
+ )
396
+
397
+ demo.launch()
persona_annotator_sample.json ADDED
The diff for this file is too large to render. See raw diff