Spaces:

TailsResearch
/

PersonaAnnotator

Sleeping

App Files Files Community

shreyansjain commited on Sep 30, 2025

Commit

ae41f1f

1 Parent(s): d9324f5

initial commit

Browse files

Files changed (2) hide show

app.py +397 -0
persona_annotator_sample.json +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,397 @@

+import gradio as gr
+import json
+import random
+import os
+from typing import List, Dict, Any, Optional
+# -----------------------------
+# Available JSON files (persona datasets)
+# -----------------------------
+available_files = [
+    "persona_annotator_sample.json"
+]
+data = []
+index = 0
+current_file = None
+ICONS = {
+    "header": "👤",
+    "categories": "🏷️",
+    "presenting": "🚩",
+    "clinical": "🩺",
+    "history": "📜",
+    "functioning": "🔧",
+    "summary": "🧾",
+    "context": "🧩",
+    "metadata": "🔖",
+    "other": "🗂️",
+}
+SECTION_FIELDS = {
+    "header": [
+        "name", "archetype", "age", "sex", "location",
+        "education_level", "bachelors_field", "ethnic_background", "marital_status",
+        "version"
+    ],
+    "categories": ["appearance_category", "behavior_category"],
+    "presenting": ["presenting_problems"],
+    "clinical": ["appearance", "behavior", "mood_affect", "speech",
+                 "thought_content", "insight_judgment", "cognition"],
+    "history": ["medical_developmental_history", "family_history", "educational_vocational_history"],
+    "functioning": ["emotional_behavioral_functioning", "social_functioning"],
+    "summary": ["summary_of_psychological_profile"],
+    "context": ["archetype_description", "memoir", "memoir_summary", "memoir_narrative"],
+    "metadata": ["uid"],
+}
+# -----------------------------
+# Persistent storage path
+# -----------------------------
+PERSISTENT_DIR = "/home/user/app/storage"
+if os.path.exists(PERSISTENT_DIR):
+    STORAGE_DIR = PERSISTENT_DIR
+else:
+    STORAGE_DIR = "."
+os.makedirs(STORAGE_DIR, exist_ok=True)
+ANNOTATION_FILE = os.path.join(STORAGE_DIR, "persona_annotations.jsonl")
+# -----------------------------
+# Core functions
+# -----------------------------
+def _get(entry: Dict[str, Any], key: str, default: str = "—") -> str:
+    v = entry.get(key, default)
+    if v is None:
+        return default
+    if isinstance(v, (list, dict)):
+        try:
+            return json.dumps(v, ensure_ascii=False)
+        except Exception:
+            return str(v)
+    return str(v).strip()
+def _truncate(s: str, limit: int = 2000) -> str:
+    s = s or ""
+    return (s[:limit] + " …") if len(s) > limit else s
+def load_file(file_name):
+    """Load selected JSON file and show first/random entry"""
+    global data, index, current_file
+    current_file = file_name
+    with open(file_name, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    index = random.randint(0, len(data) - 1)
+    return show_entry()
+def save_annotation(p_hash, *scores_and_comments):
+    """Save annotations to persistent storage as JSONL (with file name)"""
+    ann = {
+        "file_name": current_file,
+        "hash_id": p_hash,
+        "annotations": {}
+    }
+    rubric_fields = [
+        "clarity", "originality", "coherence", "diversity", "realism",
+        "psychological_depth", "consistency", "informativeness",
+        "ethical_considerations", "demographic_fidelity", "overall_score"
+    ]
+    for field, value in zip(rubric_fields, scores_and_comments):
+        ann["annotations"][field] = value
+    with open(ANNOTATION_FILE, "a", encoding="utf-8") as f:
+        f.write(json.dumps(ann, ensure_ascii=False) + "\n")
+    return f"✅ Saved annotation for {p_hash} (from {current_file}) → {ANNOTATION_FILE}"
+def export_annotations():
+    """Return path to annotations file for download"""
+    if os.path.exists(ANNOTATION_FILE):
+        return ANNOTATION_FILE
+    else:
+        with open(ANNOTATION_FILE, "w", encoding="utf-8") as f:
+            pass
+        return ANNOTATION_FILE
+def md_header(entry: Dict[str, Any]) -> str:
+    name = _get(entry, "name")
+    archetype = _get(entry, "archetype")
+    age = _get(entry, "age")
+    sex = _get(entry, "sex")
+    location = _get(entry, "location")
+    education_level = _get(entry, "education_level")
+    bachelors_field = _get(entry, "bachelors_field")
+    ethnic_background = _get(entry, "ethnic_background")
+    marital_status = _get(entry, "marital_status")
+    version = _get(entry, "version")
+    return (
+        f"## {ICONS['header']} Persona\n"
+        f"**Name:** {name}  \n"
+        f"**Archetype:** {archetype}  \n"
+        f"**Age:** {age}  \n"
+        f"**Sex:** {sex}  \n"
+        f"**Location:** {location}  \n"
+        f"**Education Level:** {education_level}  \n"
+        f"**Bachelor’s Field:** {bachelors_field}  \n"
+        f"**Ethnic Background:** {ethnic_background}  \n"
+        f"**Marital Status:** {marital_status}  \n"
+        f"**Version:** {version}"
+    )
+def md_categories(entry: Dict[str, Any]) -> str:
+    app_cat = _get(entry, "appearance_category")
+    beh_cat = _get(entry, "behavior_category")
+    return (
+        f"## {ICONS['categories']} Categories\n"
+        f"**Appearance Category:** {app_cat}  \n"
+        f"**Behavior Category:** {beh_cat}"
+    )
+def md_presenting(entry: Dict[str, Any]) -> str:
+    raw = entry.get("presenting_problems")
+    items: List[str] = []
+    if isinstance(raw, list):
+        items = [str(x).strip() for x in raw if str(x).strip()]
+    elif isinstance(raw, str) and raw.strip():
+        try:
+            parsed = json.loads(raw)
+            if isinstance(parsed, list):
+                items = [str(x).strip() for x in parsed if str(x).strip()]
+            else:
+                items = [x.strip() for x in raw.split(";") if x.strip()]
+        except Exception:
+            items = [x.strip() for x in raw.split(";") if x.strip()]
+    bullets = "\n".join(f"- {x}" for x in items) if items else "—"
+    return f"## {ICONS['presenting']} Presenting Problems\n{bullets}"
+def md_clinical(entry: Dict[str, Any]) -> str:
+    blocks = []
+    mapping = [
+        ("appearance", "Appearance"),
+        ("behavior", "Behavior"),
+        ("mood_affect", "Mood / Affect"),
+        ("speech", "Speech"),
+        ("thought_content", "Thought Content"),
+        ("insight_judgment", "Insight & Judgment"),
+        ("cognition", "Cognition"),
+    ]
+    for k, label in mapping:
+        v = entry.get(k)
+        if isinstance(v, str) and v.strip():
+            blocks.append(f"**{label}**\n{_truncate(v)}")
+    return f"## {ICONS['clinical']} Clinical Observations\n" + ("\n\n".join(blocks) if blocks else "—")
+def md_history(entry: Dict[str, Any]) -> str:
+    blocks = []
+    mapping = [
+        ("medical_developmental_history", "Medical / Developmental History"),
+        ("family_history", "Family History"),
+        ("educational_vocational_history", "Educational / Vocational History"),
+    ]
+    for k, label in mapping:
+        v = entry.get(k)
+        if isinstance(v, str) and v.strip():
+            blocks.append(f"**{label}**\n{_truncate(v)}")
+    return f"## {ICONS['history']} Life History\n" + ("\n\n".join(blocks) if blocks else "—")
+def md_functioning(entry: Dict[str, Any]) -> str:
+    blocks = []
+    mapping = [
+        ("emotional_behavioral_functioning", "Emotional / Behavioral Functioning"),
+        ("social_functioning", "Social Functioning"),
+    ]
+    for k, label in mapping:
+        v = entry.get(k)
+        if isinstance(v, str) and v.strip():
+            blocks.append(f"**{label}**\n{_truncate(v)}")
+    return f"## {ICONS['functioning']} Functioning\n" + ("\n\n".join(blocks) if blocks else "—")
+def md_summary(entry: Dict[str, Any]) -> str:
+    v = entry.get("summary_of_psychological_profile")
+    body = _truncate(v) if isinstance(v, str) and v.strip() else "—"
+    return f"## {ICONS['summary']} Summary\n{body}"
+def md_context(entry: Dict[str, Any]) -> str:
+    arch_desc = entry.get("archetype_description") or entry.get("archetype_summary") or "—"
+    memoir_title = entry.get("memoir")
+    memoir_summary = entry.get("memoir_summary")
+    memoir_narr = entry.get("memoir_narrative")
+    title_line = f"**Memoir:** {memoir_title}\n\n" if isinstance(memoir_title, str) and memoir_title.strip() else ""
+    sum_line = f"**Memoir Summary**\n{_truncate(memoir_summary)}\n\n" if isinstance(memoir_summary, str) and memoir_summary.strip() else ""
+    narr_line = f"**Memoir Narrative**\n{_truncate(memoir_narr)}" if isinstance(memoir_narr, str) and memoir_narr.strip() else "—"
+    return (
+        f"## {ICONS['context']} Context\n"
+        f"**Archetype Description**\n{_truncate(str(arch_desc)) if isinstance(arch_desc, str) else '—'}\n\n"
+        f"{title_line}{sum_line}{narr_line}"
+    )
+def md_metadata(entry: Dict[str, Any]) -> str:
+    uid = _get(entry, "uid")
+    return f"## {ICONS['metadata']} Metadata\n**UID:** {uid}"
+def md_other_fields(entry: Dict[str, Any]) -> str:
+    # Show any extra keys (e.g., concat_field, concat_embedding) not covered elsewhere
+    known = set().union(*SECTION_FIELDS.values())
+    other_keys = [k for k in entry.keys() if k not in known]
+    if not other_keys:
+        return f"## {ICONS['other']} Other Fields\n—"
+    pairs = []
+    for k in sorted(other_keys):
+        v = entry.get(k)
+        if isinstance(v, (dict, list)):
+            try:
+                s = json.dumps(v, ensure_ascii=False)
+            except Exception:
+                s = str(v)
+        else:
+            s = str(v) if v is not None else ""
+        pairs.append(f"- **{k}:** {_truncate(s)}")
+    return f"## {ICONS['other']} Other Fields\n" + ("\n".join(pairs) if pairs else "—")
+def show_entry(step=None):
+    """Navigate entries and show persona entry"""
+    global index, data
+    if not data:
+        return "", ""
+    if step == "Next":
+        index = (index + 1) % len(data)
+    elif step == "Previous":
+        index = (index - 1) % len(data)
+    elif step == "Random Shuffle":
+        index = random.randint(0, len(data) - 1) % len(data)
+    entry = data[index]
+    p_hash = entry.get("uuid", f"persona_{index}")
+    if not entry:
+        empty = "_No data_"
+        # diagram HTML, then the sections
+        return ["", empty, empty, empty, empty, empty, empty, empty, empty, empty]
+    return [
+        p_hash,
+        md_header(entry),
+        md_categories(entry),
+        md_presenting(entry),
+        md_clinical(entry),
+        md_history(entry),
+        md_functioning(entry),
+        md_summary(entry),
+        md_context(entry),
+        md_metadata(entry),
+        md_other_fields(entry),
+    ]
+    # persona_str = entry.get("persona_string", "").replace("\n", "<br>")
+    # archetype = entry.get("archetype", "N/A")
+    # persona_md = f"### 👤 Persona Summary\n**Archetype:** {archetype}\n\n{persona_str}"
+# -----------------------------
+# Gradio UI
+# -----------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("## Persona Annotation Tool")
+    # File selection dropdown
+    file_dropdown = gr.Dropdown(
+        choices=available_files,
+        value=available_files[0],
+        label="Select Persona JSON File"
+    )
+    with gr.Row():
+        prev_btn = gr.Button("Previous")
+        next_btn = gr.Button("Next")
+        shuffle_btn = gr.Button("Random Shuffle")
+    phash_out = gr.Textbox(label="Persona Hash ID", interactive=False)
+    # persona_out = gr.Markdown(label="Persona Description")
+    md_header_out = gr.Markdown()
+    md_cats_out = gr.Markdown()
+    md_present_out = gr.Markdown()
+    md_clinical_out = gr.Markdown()
+    md_history_out = gr.Markdown()
+    md_function_out = gr.Markdown()
+    md_summary_out = gr.Markdown()
+    md_context_out = gr.Markdown()
+    md_meta_out = gr.Markdown()
+    md_other_out = gr.Markdown()
+    gr.Markdown("### Evaluation Rubric (0 = Worst, 5 = Best)")
+    choices = [str(i) for i in range(6)]
+    clarity = gr.Dropdown(choices=choices, label="Clarity", value=None)
+    originality = gr.Dropdown(choices=choices, label="Originality", value=None)
+    coherence = gr.Dropdown(choices=choices, label="Coherence", value=None)
+    diversity = gr.Dropdown(choices=choices, label="Diversity", value=None)
+    realism = gr.Dropdown(choices=choices, label="Realism", value=None)
+    psychological_depth = gr.Dropdown(choices=choices, label="Psychological Depth (focus metric)", value=None)
+    consistency = gr.Dropdown(choices=choices, label="Consistency", value=None)
+    informativeness = gr.Dropdown(choices=choices, label="Informativeness", value=None)
+    ethical_considerations = gr.Dropdown(choices=choices, label="Ethical Considerations (0–5)", value=None)
+    demographic_fidelity = gr.Dropdown(choices=choices, label="Demographic Fidelity", value=None)
+    overall_score = gr.Dropdown(choices=choices, label="Overall Score", value=None)
+    save_btn = gr.Button("Save Annotation")
+    save_status = gr.Textbox(label="Status", interactive=False)
+    with gr.Row():
+        export_btn = gr.Button("Download All Annotations")
+        export_file = gr.File(label="Exported Annotations", type="filepath")
+    # Wiring
+    file_dropdown.change(
+        load_file,
+        inputs=file_dropdown,
+        outputs=[phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
+            md_history_out, md_function_out, md_summary_out, md_context_out, md_meta_out, md_other_out]
+    )
+    prev_btn.click(
+        show_entry,
+        inputs=gr.State("Previous"),
+        outputs=[phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
+            md_history_out, md_function_out, md_summary_out, md_context_out, md_meta_out, md_other_out]
+    )
+    next_btn.click(
+        show_entry,
+        inputs=gr.State("Next"),
+        outputs=[phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
+            md_history_out, md_function_out, md_summary_out, md_context_out, md_meta_out, md_other_out]
+    )
+    shuffle_btn.click(
+        show_entry,
+        inputs=gr.State("Random Shuffle"),
+        outputs=[phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
+            md_history_out, md_function_out, md_summary_out, md_context_out, md_meta_out, md_other_out]
+    )
+    save_btn.click(
+        save_annotation,
+        inputs=[phash_out, clarity, originality, coherence, diversity, realism,
+                psychological_depth, consistency, informativeness,
+                ethical_considerations, demographic_fidelity, overall_score],
+        outputs=save_status
+    )
+    export_btn.click(export_annotations, inputs=None, outputs=export_file)
+    demo.load(
+        load_file,
+        inputs=gr.State(available_files[0]),
+        outputs=[phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
+            md_history_out, md_function_out, md_summary_out, md_context_out, md_meta_out, md_other_out]
+    )
+demo.launch()

persona_annotator_sample.json ADDED Viewed

The diff for this file is too large to render. See raw diff