pdf / main.py
Dpshkh's picture
Upload 8 files
beba6d9 verified
from fastapi import FastAPI, UploadFile, Form, File
from fastapi.responses import JSONResponse
from app.parser import extract_text_from_pdf
from app.chunker import chunk_text
from app.retriever import store_chunks_in_pinecone, query_chunks_from_pinecone
from app.groq_llm import query_groq_llm
import uuid
from dotenv import load_dotenv
import logging
load_dotenv()
app = FastAPI()
logging.basicConfig(level=logging.INFO)
@app.post("/run")
async def run_query(file: UploadFile = File(...), question: str = Form(...)):
try:
logging.info("πŸ“₯ Received file and question: %s", question)
file_bytes = await file.read()
raw_text = extract_text_from_pdf(file_bytes)
logging.info("πŸ“ Extracted %d characters of text", len(raw_text))
if not raw_text.strip():
return JSONResponse(content={"error": "No extractable text found in PDF."}, status_code=400)
chunks = chunk_text(raw_text)
logging.info("βœ‚οΈ Generated %d chunks", len(chunks))
if not chunks:
return JSONResponse(content={"error": "Failed to generate any chunks from text."}, status_code=400)
file_id = str(uuid.uuid4())
store_chunks_in_pinecone(chunks, file_id)
logging.info("πŸ“¦ Stored chunks in Pinecone with file_id: %s", file_id)
top_chunks = query_chunks_from_pinecone(question)
logging.info("πŸ” Retrieved %d top matching chunks", len(top_chunks))
if not top_chunks:
return JSONResponse(content={"error": "No relevant context found."}, status_code=400)
context = " ".join(top_chunks[:2])
answer = query_groq_llm(context, question)
return {
"question": question,
"context_used": top_chunks[:2],
"answer": answer
}
except Exception as e:
logging.exception("❌ Error during /run endpoint:")
return JSONResponse(content={"error": str(e)}, status_code=500)
@app.get("/")
def read_root():
return {"message": "βœ… LLM PDF QA API is running. Visit /docs to test."}