# app.py - Single-tab Gradio App with GPT-Orchestrated RAG Pipeline
"""
AgriScholarQA: Agricultural Research Assistant

Pipeline per user query:
1. GPT classifier checks if the question is an agricultural scholarly question.
2. If NOT agricultural scholarly:
    - No RAG retrieval.
    - The system just explains what AgriScholarQA is and what it can do.
3. If agricultural scholarly:
    - RAG pipeline (AgriCritiqueRAG) retrieves evidence + generates a raw answer.
    - The same RAG model self-validates (validate_answer) using the evidence.
    - GPT refines the answer:
        - Thinks about the question + raw answer + evidence + critique.
        - Removes repetition and noise.
        - Produces a clean, well-structured Markdown answer with:
            - main answer
            - evidence citations [1], [2], ...
            - a short, high-level reasoning section (no step-by-step chain-of-thought).
"""

import os
import json
from dataclasses import dataclass, asdict
from typing import Any, Dict, List, Optional

import gradio as gr
from rag_pipeline import AgriCritiqueRAG

# ---- OpenAI client (GPT) ----
try:
    from openai import OpenAI
except ImportError:
    OpenAI = None


# ----------------------------------------------------------------------
# Data structures for orchestration
# ----------------------------------------------------------------------
@dataclass
class GPTClassification:
    """Structured view of GPT's classification result."""
    is_agri_scholarly: bool
    intent_type: str      # "agri_scholarly" | "chit_chat" | "generic_qa" | "other"
    confidence: float
    brief_reason: str


@dataclass
class OrchestratorResult:
    """
    Unified result returned by the orchestrator to the UI.

    mode:
        - "rag"         : RAG pipeline was used
        - "system_chat" : only system explanation / light chat
        - "error"       : some error (GPT / RAG / OpenAI issue)

    answer:
        - final answer string to show to user in chat

    evidence:
        - list of evidence chunks (from RAG) if mode == "rag"
        - empty otherwise

    meta:
        - extra diagnostic info (classification, raw RAG output, critique, etc.)
    """
    mode: str
    answer: str
    evidence: List[Dict[str, Any]]
    meta: Dict[str, Any]


# ----------------------------------------------------------------------
# GPT + RAG Orchestrator
# ----------------------------------------------------------------------
class GPTAgriRAGOrchestrator:
    """
    Orchestrator that:
      1) Uses GPT to decide if a query is an agricultural scholarly question.
      2) For agri-scholarly queries:
          - runs the RAG pipeline (AgriCritiqueRAG) for evidence + answer,
          - validates that answer using the RAG model,
          - sends everything to GPT for polishing and formatting.
      3) For non-agri queries:
          - no RAG, just a friendly system explanation.
    """

    def __init__(
        self,
        rag_system: AgriCritiqueRAG,
        gpt_model_classify: str = "gpt-4.1-mini",
        gpt_model_refine: Optional[str] = None,
        openai_api_key_env: str = "OPENAI_API_KEY",
    ):
        """
        Args:
            rag_system: instance of AgriCritiqueRAG.
            gpt_model_classify: OpenAI model used for classification.
            gpt_model_refine: OpenAI model used for answer refinement (defaults to same).
            openai_api_key_env: env var for the OpenAI API key.
        """
        self.rag = rag_system
        self.gpt_model_classify = gpt_model_classify
        self.gpt_model_refine = gpt_model_refine or gpt_model_classify

        api_key = os.getenv(openai_api_key_env)
        if OpenAI is None:
            self.client = None
            self.gpt_available = False
        elif not api_key:
            self.client = None
            self.gpt_available = False
        else:
            self.client = OpenAI(api_key=api_key)
            self.gpt_available = True

    # ------------------------------------------------------------------
    # 1. GPT classification
    # ------------------------------------------------------------------
    def _classify_with_gpt(self, question: str) -> GPTClassification:
        """
        Ask GPT: is this an agricultural scholarly question?

        GPT should return JSON:
          {
            "is_agri_scholarly": true/false,
            "intent_type": "agri_scholarly" | "chit_chat" | "generic_qa" | "other",
            "confidence": 0-1,
            "brief_reason": "..."
          }
        """
        # If GPT not available, simple fallback: treat everything as agri_scholarly
        if not self.gpt_available:
            return GPTClassification(
                is_agri_scholarly=True,
                intent_type="agri_scholarly",
                confidence=0.5,
                brief_reason="GPT not available; falling back to always using RAG."
            )

        system_prompt = (
            "You are a classifier for an agricultural research assistant called AgriScholarQA.\n\n"
            "Your job: given a single user query, decide whether it is an "
            "**agricultural scholarly question** that should trigger a retrieval-augmented "
            "pipeline over agricultural research papers.\n\n"
            "Definitions:\n"
            "- Agricultural scholarly question: asks about crops, soils, climate impacts, "
            "  agronomy, plant physiology, agricultural experiments, yields, pests, diseases, "
            "  fertilizers, irrigation, crop models, etc., in a technically informed way.\n"
            "- Chit-chat / meta: greetings, what is this system, who are you, etc.\n"
            "- Generic QA: everyday knowledge or non-agricultural topics.\n"
            "- Other: anything else not clearly fitting above.\n\n"
            "Return a strict JSON object with fields:\n"
            "- is_agri_scholarly: boolean\n"
            "- intent_type: one of \"agri_scholarly\", \"chit_chat\", \"generic_qa\", \"other\"\n"
            "- confidence: float between 0 and 1\n"
            "- brief_reason: short natural language reason (1–2 sentences)\n\n"
            "Do not add extra keys. Do not write explanations outside the JSON."
        )

        user_prompt = f"User query:\n\"\"\"{question}\"\"\""

        resp = self.client.chat.completions.create(
            model=self.gpt_model_classify,
            temperature=0,
            response_format={"type": "json_object"},
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
        )

        raw = resp.choices[0].message.content.strip()
        try:
            data = json.loads(raw)
        except json.JSONDecodeError as e:
            # Fallback: if parsing fails, treat as agri_scholarly with low confidence
            return GPTClassification(
                is_agri_scholarly=True,
                intent_type="agri_scholarly",
                confidence=0.5,
                brief_reason=f"Failed to parse GPT JSON: {e} | raw={raw[:200]}",
            )

        return GPTClassification(
            is_agri_scholarly=bool(data.get("is_agri_scholarly", False)),
            intent_type=str(data.get("intent_type", "other")),
            confidence=float(data.get("confidence", 0.0)),
            brief_reason=str(data.get("brief_reason", "")),
        )

    # ------------------------------------------------------------------
    # 2. GPT refinement of answer (UPDATED TO BE "EDITOR" ONLY)
    # ------------------------------------------------------------------
    def _refine_answer_with_gpt(
        self,
        question: str,
        raw_answer: str,
        evidence: List[Dict[str, Any]],
        critique: str,
    ) -> str:
        """
        Use GPT to clean up and structure the RAG answer.

        IMPORTANT (as per design):
        - Treat the RAG draft answer as the primary source of content.
        - Do NOT delete or drop important points from the draft, except for duplicates.
        - Main job is to:
            * remove repetition,
            * merge overlapping points,
            * improve clarity and structure,
            * add light formatting (Markdown),
            * optionally reference evidence.
        - Do NOT invent new facts or numbers that are not in the draft answer.
        - If evidence is weak or not directly relevant, just ignore it
          instead of commenting on "lack of evidence".
        - Do NOT write sentences like "no evidence was available" or
          "the snippets do not contain direct results".
        - List down the extracted evidence as paper citation.
        """
        if not self.gpt_available:
            # If GPT not available, just return raw answer + short note
            return (
                "*(GPT refinement disabled – showing raw RAG answer.)*\n\n"
                + raw_answer
            )

        # Build compact evidence text (used only as soft support / for citations)
        ev_blocks = []
        for i, ev in enumerate(evidence[:5], 1):
            title = ev.get("paper_title") or ev.get("paper_id") or f"Doc {ev.get('idx', i)}"
            snippet = ev.get("text") or ev.get("text_preview") or ""
            snippet = " ".join(snippet.split())
            snippet = snippet[:800]  # cap per evidence block
            ev_blocks.append(f"[{i}] {title}\n{snippet}\n")

        evidence_text = "\n\n".join(ev_blocks) if ev_blocks else "(no evidence text provided)"

        system_prompt = (
            "You are an expert agricultural research assistant.\n\n"
            "You are given:\n"
            "1) The user's question.\n"
            "2) A draft answer produced by an internal RAG model (this is the MAIN content).\n"
            "3) Evidence snippets from research papers, each labeled [1], [2], etc.\n"
            "4) A critique from another checker model.\n\n"
            "Your role here is primarily an **editor and organizer**, not a critic:\n"
            "- Keep all important substantive points from the draft answer.\n"
            "- Do NOT delete major claims or sections unless they are clearly duplicate.\n"
            "- Do NOT introduce new claims, numbers, or experimental results that are not in the draft.\n"
            "- Do NOT write sentences like “no direct evidence is available”, "
            "  “the snippets do not contain data”, or similar.\n"
            "- If evidence does not clearly support a point, simply avoid citing it; do not comment on that.\n\n"
            "Your main tasks:\n"
            "- Remove repetition and merge overlapping points.\n"
            "- Improve clarity, flow, and structure.\n"
            "- Format the answer nicely in Markdown (sections, bullets, etc.).\n"
            "- Where appropriate, you may attach citations like [1], [2] after statements that are clearly "
            "  supported by a snippet.\n"
            "- Use the critique only to polish wording and structure, not to argue that evidence is missing.\n\n"
            "Output ONLY the final, organized answer in Markdown."
        )

        user_prompt = (
            f"QUESTION:\n{question}\n\n"
            f"DRAFT ANSWER (from RAG model):\n{raw_answer}\n\n"
            f"EVIDENCE SNIPPETS (optional, use only when clearly helpful):\n{evidence_text}\n\n"
            f"CRITIQUE (for polishing, not for rejection):\n{critique}\n\n"
            "Now rewrite the answer according to the instructions above."
        )

        resp = self.client.chat.completions.create(
            model=self.gpt_model_refine,
            temperature=0.3,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
        )

        refined = resp.choices[0].message.content.strip()
        return refined

    # ------------------------------------------------------------------
    # 3. System chat (non-agri)
    # ------------------------------------------------------------------
    def _system_chat_answer(self, question: str, cls: GPTClassification) -> str:
        """
        For non-agri queries: explain the system and capabilities.
        """
        intro = (
            "Hi! 👋 I’m **AgriScholarQA**, an agricultural scholarly assistant.\n\n"
            "I’m designed specifically to answer **research-oriented questions about agriculture** "
            "using a retrieval-augmented pipeline over scientific papers."
        )

        capabilities = (
            "\n\n**Here’s what I can do:**\n"
            "- 📚 Answer questions about **crop production, soil, climate impacts, pests, diseases**, etc.\n"
            "- 🔍 Retrieve and show **evidence from agricultural research papers**.\n"
            "- 🧪 Help you reason about **field experiments, treatments, and agronomic practices**.\n"
            "- 🚨 Detect potential **hallucinations or weakly supported claims**.\n"
        )

        meta = (
            f"\nYour current query looks like **{cls.intent_type.replace('_', ' ')}** "
            "rather than a detailed agricultural scholarly question, so I did not trigger "
            "the heavy retrieval pipeline for this turn.\n"
        )

        nudge = (
            "\nIf you’d like to use my full capabilities, you can ask questions like:\n"
            "- *“How does nitrogen fertilizer rate affect rice yield under water stress?”*\n"
            "- *“What are sustainable pest management strategies for maize in the tropics?”*\n"
            "- *“How does climate change influence wheat phenology and grain quality?”*\n"
        )

        return intro + capabilities + meta + nudge

    # ------------------------------------------------------------------
    # 4. Main entry point: handle single query
    # ------------------------------------------------------------------
    def handle_query(self, question: str) -> OrchestratorResult:
        """
        Handle a single user question through the full pipeline.

        Returns:
            OrchestratorResult with:
            - mode: "rag" | "system_chat" | "error"
            - answer: final answer string
            - evidence: list[dict] (if rag)
            - meta: classification info, raw_rag_result, critique, etc.
        """
        q = (question or "").strip()
        if not q:
            return OrchestratorResult(
                mode="system_chat",
                answer="Please enter a question. I specialize in **agricultural research** questions.",
                evidence=[],
                meta={"classification": None},
            )

        # 1. Classify (GPT or fallback)
        try:
            cls = self._classify_with_gpt(q)
        except Exception as e:
            return OrchestratorResult(
                mode="error",
                answer=f"⚠️ Error while classifying your question: `{e}`",
                evidence=[],
                meta={"classification": None},
            )

        # 2. If NOT agricultural scholarly (or confidence low): system chat
        if (not cls.is_agri_scholarly) or cls.confidence < 0.5:
            answer = self._system_chat_answer(q, cls)
            return OrchestratorResult(
                mode="system_chat",
                answer=answer,
                evidence=[],
                meta={"classification": asdict(cls)},
            )

        # 3. Agricultural scholarly → run RAG
        try:
            rag_result = self.rag.ask(q)
        except Exception as e:
            return OrchestratorResult(
                mode="error",
                answer=(
                    "Your question looks like an **agricultural scholarly query**, "
                    "but I hit an error while running the retrieval pipeline:\n\n"
                    f"`{e}`"
                ),
                evidence=[],
                meta={"classification": asdict(cls)},
            )

        raw_answer = rag_result.get("answer", "") if isinstance(rag_result, dict) else str(rag_result)
        evidence = rag_result.get("evidence", []) if isinstance(rag_result, dict) else []

        # 4. Self-validation using RAG's own validate_answer method
        try:
            critique = self.rag.validate_answer(q, raw_answer, evidence)
        except Exception as e:
            critique = f"(Validation step failed: {e})"

        # 5. GPT refinement SKIPPED (User request)
        # We directly use the raw_answer from the RAG model.
        # The critique is still calculated (step 4) and available in 'meta', but we don't use GPT to merge it.
        refined_answer = raw_answer
        
        # Previous GPT logic disabled:
        # try:
        #     refined_answer = self._refine_answer_with_gpt(q, raw_answer, evidence, critique)
        # except Exception as e:
        #     refined_answer = (
        #         f"⚠️ I had trouble refining the answer with GPT (`{e}`). "
        #         "Showing the original RAG answer plus critique:\n\n"
        #         f"{raw_answer}\n\n---\n\n**Internal critique:**\n{critique}"
        #     )

        return OrchestratorResult(
            mode="rag",
            answer=refined_answer,
            evidence=evidence,
            meta={
                "classification": asdict(cls),
                "raw_rag_result": rag_result,
                "critique": critique,
            },
        )


# ----------------------------------------------------------------------
# Global instances (lazy init)
# ----------------------------------------------------------------------
rag_system: Optional[AgriCritiqueRAG] = None
orchestrator: Optional[GPTAgriRAGOrchestrator] = None


def initialize_orchestrator() -> GPTAgriRAGOrchestrator:
    global rag_system, orchestrator
    if rag_system is None:
        rag_system = AgriCritiqueRAG()
    if orchestrator is None:
        orchestrator = GPTAgriRAGOrchestrator(rag_system=rag_system)
    return orchestrator


# ----------------------------------------------------------------------
# Helper: Format evidence for display
# ----------------------------------------------------------------------
def format_evidence_for_display(evidence: List[Dict[str, Any]]) -> str:
    """
    Format evidence chunks into a readable Markdown reference section.
    """
    if not evidence:
        return ""
    
    out = ["\n\n---\n### 📚 Evidence Sources"]
    for i, ev in enumerate(evidence, 1):
        title = ev.get("paper_id", "Unknown Paper")
        # limit snippet length for display
        snippet = ev.get("text", "")[:300].replace("\n", " ") + "..."
        score = f"{ev.get('score', 0.0):.4f}"
        
        out.append(f"**[{i}] {title}** (Score: {score})\n> {snippet}")
    
    return "\n".join(out)


# ----------------------------------------------------------------------
# Gradio Chat function
# ----------------------------------------------------------------------
def chat_response(message: str, history: List[List[str]]) -> str:
    """
    Main chat function used by Gradio.

    Args:
        message: current user input
        history: list of [user, bot] pairs (not used directly since RAG keeps its own session)

    Returns:
        Final answer string to display in the chat.
    """
    if not message:
        return "Please enter a question. I specialize in **agricultural research** questions."

    try:
        orch = initialize_orchestrator()
        result = orch.handle_query(message)
        
        # Append evidence if available (for RAG mode)
        final_output = result.answer
        if result.mode == "rag" and result.evidence:
             evidence_section = format_evidence_for_display(result.evidence)
             final_output += evidence_section
             
        return final_output
    except Exception as e:
        return f"❌ Unexpected error in chat pipeline: `{e}`"


# ----------------------------------------------------------------------
# Build Gradio UI (single-tab ChatInterface with larger chat area)
# ----------------------------------------------------------------------
with gr.Blocks(title="🌾 AgriScholarQA", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # 🌾 AgriScholarQA Research Assistant
        **Evidence-Based Agricultural QA with Self-Correction & GPT-Orchestrated Answering**

        - 🧠 Uses GPT to detect if your question is an *agricultural scholarly* query.
        - 📚 For scholarly queries: runs a RAG pipeline over research papers.
        - 🔍 The internal model self-checks its answer before responding.
        - ✨ GPT then refines the answer for clarity, formatting, and evidence grounding.
        """
    )

    gr.ChatInterface(
        fn=chat_response,
        title=None,
        description=None,
        examples=[
            "How does drought stress during flowering affect rice yield?",
            "What are sustainable pest management strategies for maize?",
            "How does increased temperature impact wheat phenology and grain quality?",
        ],
        retry_btn=None,
        undo_btn=None,
        clear_btn="🗑️ Clear",
        chatbot=gr.Chatbot(
            height=600,  # Increased height for better visibility
            show_label=False,
            container=True,
            scale=1,
            elem_id="chatbot"
        ),
        textbox=gr.Textbox(
            placeholder="Ask your agricultural research question here...",
            container=False,
            scale=7,
            lines=2
        ),
    )

if __name__ == "__main__":
    print("🚀 Starting AgriScholarQA (single-tab GPT-orchestrated RAG)...")
    demo.launch(share=True)