Spaces:

sayande
/

AgriScholarQA

Running

App Files Files Community

AgriScholarQA / app.py

sayande

Update app.py

6c72ec5 verified 3 days ago

raw

history blame contribute delete

21.8 kB

	# app.py - Single-tab Gradio App with GPT-Orchestrated RAG Pipeline
	"""
	AgriScholarQA: Agricultural Research Assistant

	Pipeline per user query:
	1. GPT classifier checks if the question is an agricultural scholarly question.
	2. If NOT agricultural scholarly:
	- No RAG retrieval.
	- The system just explains what AgriScholarQA is and what it can do.
	3. If agricultural scholarly:
	- RAG pipeline (AgriCritiqueRAG) retrieves evidence + generates a raw answer.
	- The same RAG model self-validates (validate_answer) using the evidence.
	- GPT refines the answer:
	- Thinks about the question + raw answer + evidence + critique.
	- Removes repetition and noise.
	- Produces a clean, well-structured Markdown answer with:
	- main answer
	- evidence citations [1], [2], ...
	- a short, high-level reasoning section (no step-by-step chain-of-thought).
	"""

	import os
	import json
	from dataclasses import dataclass, asdict
	from typing import Any, Dict, List, Optional

	import gradio as gr
	from rag_pipeline import AgriCritiqueRAG

	# ---- OpenAI client (GPT) ----
	try:
	from openai import OpenAI
	except ImportError:
	OpenAI = None


	# ----------------------------------------------------------------------
	# Data structures for orchestration
	# ----------------------------------------------------------------------
	@dataclass
	class GPTClassification:
	"""Structured view of GPT's classification result."""
	is_agri_scholarly: bool
	intent_type: str # "agri_scholarly" \| "chit_chat" \| "generic_qa" \| "other"
	confidence: float
	brief_reason: str


	@dataclass
	class OrchestratorResult:
	"""
	Unified result returned by the orchestrator to the UI.

	mode:
	- "rag" : RAG pipeline was used
	- "system_chat" : only system explanation / light chat
	- "error" : some error (GPT / RAG / OpenAI issue)

	answer:
	- final answer string to show to user in chat

	evidence:
	- list of evidence chunks (from RAG) if mode == "rag"
	- empty otherwise

	meta:
	- extra diagnostic info (classification, raw RAG output, critique, etc.)
	"""
	mode: str
	answer: str
	evidence: List[Dict[str, Any]]
	meta: Dict[str, Any]


	# ----------------------------------------------------------------------
	# GPT + RAG Orchestrator
	# ----------------------------------------------------------------------
	class GPTAgriRAGOrchestrator:
	"""
	Orchestrator that:
	1) Uses GPT to decide if a query is an agricultural scholarly question.
	2) For agri-scholarly queries:
	- runs the RAG pipeline (AgriCritiqueRAG) for evidence + answer,
	- validates that answer using the RAG model,
	- sends everything to GPT for polishing and formatting.
	3) For non-agri queries:
	- no RAG, just a friendly system explanation.
	"""

	def __init__(
	self,
	rag_system: AgriCritiqueRAG,
	gpt_model_classify: str = "gpt-4.1-mini",
	gpt_model_refine: Optional[str] = None,
	openai_api_key_env: str = "OPENAI_API_KEY",
	):
	"""
	Args:
	rag_system: instance of AgriCritiqueRAG.
	gpt_model_classify: OpenAI model used for classification.
	gpt_model_refine: OpenAI model used for answer refinement (defaults to same).
	openai_api_key_env: env var for the OpenAI API key.
	"""
	self.rag = rag_system
	self.gpt_model_classify = gpt_model_classify
	self.gpt_model_refine = gpt_model_refine or gpt_model_classify

	api_key = os.getenv(openai_api_key_env)
	if OpenAI is None:
	self.client = None
	self.gpt_available = False
	elif not api_key:
	self.client = None
	self.gpt_available = False
	else:
	self.client = OpenAI(api_key=api_key)
	self.gpt_available = True

	# ------------------------------------------------------------------
	# 1. GPT classification
	# ------------------------------------------------------------------
	def _classify_with_gpt(self, question: str) -> GPTClassification:
	"""
	Ask GPT: is this an agricultural scholarly question?

	GPT should return JSON:
	{
	"is_agri_scholarly": true/false,
	"intent_type": "agri_scholarly" \| "chit_chat" \| "generic_qa" \| "other",
	"confidence": 0-1,
	"brief_reason": "..."
	}
	"""
	# If GPT not available, simple fallback: treat everything as agri_scholarly
	if not self.gpt_available:
	return GPTClassification(
	is_agri_scholarly=True,
	intent_type="agri_scholarly",
	confidence=0.5,
	brief_reason="GPT not available; falling back to always using RAG."
	)

	system_prompt = (
	"You are a classifier for an agricultural research assistant called AgriScholarQA.\n\n"
	"Your job: given a single user query, decide whether it is an "
	"agricultural scholarly question that should trigger a retrieval-augmented "
	"pipeline over agricultural research papers.\n\n"
	"Definitions:\n"
	"- Agricultural scholarly question: asks about crops, soils, climate impacts, "
	" agronomy, plant physiology, agricultural experiments, yields, pests, diseases, "
	" fertilizers, irrigation, crop models, etc., in a technically informed way.\n"
	"- Chit-chat / meta: greetings, what is this system, who are you, etc.\n"
	"- Generic QA: everyday knowledge or non-agricultural topics.\n"
	"- Other: anything else not clearly fitting above.\n\n"
	"Return a strict JSON object with fields:\n"
	"- is_agri_scholarly: boolean\n"
	"- intent_type: one of \"agri_scholarly\", \"chit_chat\", \"generic_qa\", \"other\"\n"
	"- confidence: float between 0 and 1\n"
	"- brief_reason: short natural language reason (1–2 sentences)\n\n"
	"Do not add extra keys. Do not write explanations outside the JSON."
	)

	user_prompt = f"User query:\n\"\"\"{question}\"\"\""

	resp = self.client.chat.completions.create(
	model=self.gpt_model_classify,
	temperature=0,
	response_format={"type": "json_object"},
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt},
	],
	)

	raw = resp.choices[0].message.content.strip()
	try:
	data = json.loads(raw)
	except json.JSONDecodeError as e:
	# Fallback: if parsing fails, treat as agri_scholarly with low confidence
	return GPTClassification(
	is_agri_scholarly=True,
	intent_type="agri_scholarly",
	confidence=0.5,
	brief_reason=f"Failed to parse GPT JSON: {e} \| raw={raw[:200]}",
	)

	return GPTClassification(
	is_agri_scholarly=bool(data.get("is_agri_scholarly", False)),
	intent_type=str(data.get("intent_type", "other")),
	confidence=float(data.get("confidence", 0.0)),
	brief_reason=str(data.get("brief_reason", "")),
	)

	# ------------------------------------------------------------------
	# 2. GPT refinement of answer (UPDATED TO BE "EDITOR" ONLY)
	# ------------------------------------------------------------------
	def _refine_answer_with_gpt(
	self,
	question: str,
	raw_answer: str,
	evidence: List[Dict[str, Any]],
	critique: str,
	) -> str:
	"""
	Use GPT to clean up and structure the RAG answer.

	IMPORTANT (as per design):
	- Treat the RAG draft answer as the primary source of content.
	- Do NOT delete or drop important points from the draft, except for duplicates.
	- Main job is to:
	* remove repetition,
	* merge overlapping points,
	* improve clarity and structure,
	* add light formatting (Markdown),
	* optionally reference evidence.
	- Do NOT invent new facts or numbers that are not in the draft answer.
	- If evidence is weak or not directly relevant, just ignore it
	instead of commenting on "lack of evidence".
	- Do NOT write sentences like "no evidence was available" or
	"the snippets do not contain direct results".
	- List down the extracted evidence as paper citation.
	"""
	if not self.gpt_available:
	# If GPT not available, just return raw answer + short note
	return (
	"(GPT refinement disabled – showing raw RAG answer.)\n\n"
	+ raw_answer
	)

	# Build compact evidence text (used only as soft support / for citations)
	ev_blocks = []
	for i, ev in enumerate(evidence[:5], 1):
	title = ev.get("paper_title") or ev.get("paper_id") or f"Doc {ev.get('idx', i)}"
	snippet = ev.get("text") or ev.get("text_preview") or ""
	snippet = " ".join(snippet.split())
	snippet = snippet[:800] # cap per evidence block
	ev_blocks.append(f"[{i}] {title}\n{snippet}\n")

	evidence_text = "\n\n".join(ev_blocks) if ev_blocks else "(no evidence text provided)"

	system_prompt = (
	"You are an expert agricultural research assistant.\n\n"
	"You are given:\n"
	"1) The user's question.\n"
	"2) A draft answer produced by an internal RAG model (this is the MAIN content).\n"
	"3) Evidence snippets from research papers, each labeled [1], [2], etc.\n"
	"4) A critique from another checker model.\n\n"
	"Your role here is primarily an editor and organizer, not a critic:\n"
	"- Keep all important substantive points from the draft answer.\n"
	"- Do NOT delete major claims or sections unless they are clearly duplicate.\n"
	"- Do NOT introduce new claims, numbers, or experimental results that are not in the draft.\n"
	"- Do NOT write sentences like “no direct evidence is available”, "
	" “the snippets do not contain data”, or similar.\n"
	"- If evidence does not clearly support a point, simply avoid citing it; do not comment on that.\n\n"
	"Your main tasks:\n"
	"- Remove repetition and merge overlapping points.\n"
	"- Improve clarity, flow, and structure.\n"
	"- Format the answer nicely in Markdown (sections, bullets, etc.).\n"
	"- Where appropriate, you may attach citations like [1], [2] after statements that are clearly "
	" supported by a snippet.\n"
	"- Use the critique only to polish wording and structure, not to argue that evidence is missing.\n\n"
	"Output ONLY the final, organized answer in Markdown."
	)

	user_prompt = (
	f"QUESTION:\n{question}\n\n"
	f"DRAFT ANSWER (from RAG model):\n{raw_answer}\n\n"
	f"EVIDENCE SNIPPETS (optional, use only when clearly helpful):\n{evidence_text}\n\n"
	f"CRITIQUE (for polishing, not for rejection):\n{critique}\n\n"
	"Now rewrite the answer according to the instructions above."
	)

	resp = self.client.chat.completions.create(
	model=self.gpt_model_refine,
	temperature=0.3,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt},
	],
	)

	refined = resp.choices[0].message.content.strip()
	return refined

	# ------------------------------------------------------------------
	# 3. System chat (non-agri)
	# ------------------------------------------------------------------
	def _system_chat_answer(self, question: str, cls: GPTClassification) -> str:
	"""
	For non-agri queries: explain the system and capabilities.
	"""
	intro = (
	"Hi! 👋 I’m AgriScholarQA, an agricultural scholarly assistant.\n\n"
	"I’m designed specifically to answer research-oriented questions about agriculture "
	"using a retrieval-augmented pipeline over scientific papers."
	)

	capabilities = (
	"\n\nHere’s what I can do:\n"
	"- 📚 Answer questions about crop production, soil, climate impacts, pests, diseases, etc.\n"
	"- 🔍 Retrieve and show evidence from agricultural research papers.\n"
	"- 🧪 Help you reason about field experiments, treatments, and agronomic practices.\n"
	"- 🚨 Detect potential hallucinations or weakly supported claims.\n"
	)

	meta = (
	f"\nYour current query looks like {cls.intent_type.replace('_', ' ')} "
	"rather than a detailed agricultural scholarly question, so I did not trigger "
	"the heavy retrieval pipeline for this turn.\n"
	)

	nudge = (
	"\nIf you’d like to use my full capabilities, you can ask questions like:\n"
	"- “How does nitrogen fertilizer rate affect rice yield under water stress?”\n"
	"- “What are sustainable pest management strategies for maize in the tropics?”\n"
	"- “How does climate change influence wheat phenology and grain quality?”\n"
	)

	return intro + capabilities + meta + nudge

	# ------------------------------------------------------------------
	# 4. Main entry point: handle single query
	# ------------------------------------------------------------------
	def handle_query(self, question: str) -> OrchestratorResult:
	"""
	Handle a single user question through the full pipeline.

	Returns:
	OrchestratorResult with:
	- mode: "rag" \| "system_chat" \| "error"
	- answer: final answer string
	- evidence: list[dict] (if rag)
	- meta: classification info, raw_rag_result, critique, etc.
	"""
	q = (question or "").strip()
	if not q:
	return OrchestratorResult(
	mode="system_chat",
	answer="Please enter a question. I specialize in agricultural research questions.",
	evidence=[],
	meta={"classification": None},
	)

	# 1. Classify (GPT or fallback)
	try:
	cls = self._classify_with_gpt(q)
	except Exception as e:
	return OrchestratorResult(
	mode="error",
	answer=f"⚠️ Error while classifying your question: `{e}`",
	evidence=[],
	meta={"classification": None},
	)

	# 2. If NOT agricultural scholarly (or confidence low): system chat
	if (not cls.is_agri_scholarly) or cls.confidence < 0.5:
	answer = self._system_chat_answer(q, cls)
	return OrchestratorResult(
	mode="system_chat",
	answer=answer,
	evidence=[],
	meta={"classification": asdict(cls)},
	)

	# 3. Agricultural scholarly → run RAG
	try:
	rag_result = self.rag.ask(q)
	except Exception as e:
	return OrchestratorResult(
	mode="error",
	answer=(
	"Your question looks like an agricultural scholarly query, "
	"but I hit an error while running the retrieval pipeline:\n\n"
	f"`{e}`"
	),
	evidence=[],
	meta={"classification": asdict(cls)},
	)

	raw_answer = rag_result.get("answer", "") if isinstance(rag_result, dict) else str(rag_result)
	evidence = rag_result.get("evidence", []) if isinstance(rag_result, dict) else []

	# 4. Self-validation using RAG's own validate_answer method
	try:
	critique = self.rag.validate_answer(q, raw_answer, evidence)
	except Exception as e:
	critique = f"(Validation step failed: {e})"

	# 5. GPT refinement SKIPPED (User request)
	# We directly use the raw_answer from the RAG model.
	# The critique is still calculated (step 4) and available in 'meta', but we don't use GPT to merge it.
	refined_answer = raw_answer

	# Previous GPT logic disabled:
	# try:
	# refined_answer = self._refine_answer_with_gpt(q, raw_answer, evidence, critique)
	# except Exception as e:
	# refined_answer = (
	# f"⚠️ I had trouble refining the answer with GPT (`{e}`). "
	# "Showing the original RAG answer plus critique:\n\n"
	# f"{raw_answer}\n\n---\n\nInternal critique:\n{critique}"
	# )

	return OrchestratorResult(
	mode="rag",
	answer=refined_answer,
	evidence=evidence,
	meta={
	"classification": asdict(cls),
	"raw_rag_result": rag_result,
	"critique": critique,
	},
	)


	# ----------------------------------------------------------------------
	# Global instances (lazy init)
	# ----------------------------------------------------------------------
	rag_system: Optional[AgriCritiqueRAG] = None
	orchestrator: Optional[GPTAgriRAGOrchestrator] = None


	def initialize_orchestrator() -> GPTAgriRAGOrchestrator:
	global rag_system, orchestrator
	if rag_system is None:
	rag_system = AgriCritiqueRAG()
	if orchestrator is None:
	orchestrator = GPTAgriRAGOrchestrator(rag_system=rag_system)
	return orchestrator


	# ----------------------------------------------------------------------
	# Helper: Format evidence for display
	# ----------------------------------------------------------------------
	def format_evidence_for_display(evidence: List[Dict[str, Any]]) -> str:
	"""
	Format evidence chunks into a readable Markdown reference section.
	"""
	if not evidence:
	return ""

	out = ["\n\n---\n### 📚 Evidence Sources"]
	for i, ev in enumerate(evidence, 1):
	title = ev.get("paper_id", "Unknown Paper")
	# limit snippet length for display
	snippet = ev.get("text", "")[:300].replace("\n", " ") + "..."
	score = f"{ev.get('score', 0.0):.4f}"

	out.append(f"[{i}] {title} (Score: {score})\n> {snippet}")

	return "\n".join(out)


	# ----------------------------------------------------------------------
	# Gradio Chat function
	# ----------------------------------------------------------------------
	def chat_response(message: str, history: List[List[str]]) -> str:
	"""
	Main chat function used by Gradio.

	Args:
	message: current user input
	history: list of [user, bot] pairs (not used directly since RAG keeps its own session)

	Returns:
	Final answer string to display in the chat.
	"""
	if not message:
	return "Please enter a question. I specialize in agricultural research questions."

	try:
	orch = initialize_orchestrator()
	result = orch.handle_query(message)

	# Append evidence if available (for RAG mode)
	final_output = result.answer
	if result.mode == "rag" and result.evidence:
	evidence_section = format_evidence_for_display(result.evidence)
	final_output += evidence_section

	return final_output
	except Exception as e:
	return f"❌ Unexpected error in chat pipeline: `{e}`"


	# ----------------------------------------------------------------------
	# Build Gradio UI (single-tab ChatInterface with larger chat area)
	# ----------------------------------------------------------------------
	with gr.Blocks(title="🌾 AgriScholarQA", theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# 🌾 AgriScholarQA Research Assistant
	Evidence-Based Agricultural QA with Self-Correction & GPT-Orchestrated Answering

	- 🧠 Uses GPT to detect if your question is an agricultural scholarly query.
	- 📚 For scholarly queries: runs a RAG pipeline over research papers.
	- 🔍 The internal model self-checks its answer before responding.
	- ✨ GPT then refines the answer for clarity, formatting, and evidence grounding.
	"""
	)

	gr.ChatInterface(
	fn=chat_response,
	title=None,
	description=None,
	examples=[
	"How does drought stress during flowering affect rice yield?",
	"What are sustainable pest management strategies for maize?",
	"How does increased temperature impact wheat phenology and grain quality?",
	],
	retry_btn=None,
	undo_btn=None,
	clear_btn="🗑️ Clear",
	chatbot=gr.Chatbot(
	height=600, # Increased height for better visibility
	show_label=False,
	container=True,
	scale=1,
	elem_id="chatbot"
	),
	textbox=gr.Textbox(
	placeholder="Ask your agricultural research question here...",
	container=False,
	scale=7,
	lines=2
	),
	)

	if __name__ == "__main__":
	print("🚀 Starting AgriScholarQA (single-tab GPT-orchestrated RAG)...")
	demo.launch(share=True)