Spaces:

Dpshkh
/

pdf

Running

App Files Files Community

pdf / main.py

Dpshkh

Upload 8 files

beba6d9 verified 5 months ago

raw

history blame contribute delete

2.14 kB

	from fastapi import FastAPI, UploadFile, Form, File
	from fastapi.responses import JSONResponse
	from app.parser import extract_text_from_pdf
	from app.chunker import chunk_text
	from app.retriever import store_chunks_in_pinecone, query_chunks_from_pinecone
	from app.groq_llm import query_groq_llm

	import uuid
	from dotenv import load_dotenv
	import logging

	load_dotenv()
	app = FastAPI()

	logging.basicConfig(level=logging.INFO)

	@app.post("/run")
	async def run_query(file: UploadFile = File(...), question: str = Form(...)):
	try:
	logging.info("📥 Received file and question: %s", question)

	file_bytes = await file.read()
	raw_text = extract_text_from_pdf(file_bytes)
	logging.info("📝 Extracted %d characters of text", len(raw_text))

	if not raw_text.strip():
	return JSONResponse(content={"error": "No extractable text found in PDF."}, status_code=400)

	chunks = chunk_text(raw_text)
	logging.info("✂️ Generated %d chunks", len(chunks))

	if not chunks:
	return JSONResponse(content={"error": "Failed to generate any chunks from text."}, status_code=400)

	file_id = str(uuid.uuid4())
	store_chunks_in_pinecone(chunks, file_id)
	logging.info("📦 Stored chunks in Pinecone with file_id: %s", file_id)

	top_chunks = query_chunks_from_pinecone(question)
	logging.info("🔍 Retrieved %d top matching chunks", len(top_chunks))

	if not top_chunks:
	return JSONResponse(content={"error": "No relevant context found."}, status_code=400)

	context = " ".join(top_chunks[:2])
	answer = query_groq_llm(context, question)

	return {
	"question": question,
	"context_used": top_chunks[:2],
	"answer": answer
	}

	except Exception as e:
	logging.exception("❌ Error during /run endpoint:")
	return JSONResponse(content={"error": str(e)}, status_code=500)

	@app.get("/")
	def read_root():
	return {"message": "✅ LLM PDF QA API is running. Visit /docs to test."}