# File: main.py import os from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.responses import JSONResponse import uvicorn from llm_processor import load_llm_model, generate_json_from_text from ocr_processor import extract_text_from_image # Set environment variables for performance os.environ["OMP_NUM_THREADS"] = "1" os.environ["TOKENIZERS_PARALLELISM"] = "false" # Create the FastAPI app app = FastAPI( title="Invoice Processing API", description="A single endpoint to process an invoice image and return both raw text and structured JSON." ) @app.on_event("startup") def startup_event(): """Load models once when the server starts.""" load_llm_model() @app.get("/", summary="Health Check") def read_root(): """A simple endpoint to check if the API is running.""" return {"status": "API is running"} @app.post("/process_invoice/", summary="Process Invoice to Text & JSON") async def process_invoice_endpoint(file: UploadFile = File(...)): """ Accepts an image file and returns both the extracted OCR text and the structured JSON data. """ # Validate file type if not file.content_type.startswith("image/"): raise HTTPException(status_code=400, detail="Only image files are supported (e.g., PNG, JPEG).") try: image_bytes = await file.read() # Step 1: Extract text from the image using the OCR processor raw_text = extract_text_from_image(image_bytes) if not raw_text or "No text detected" in raw_text: return JSONResponse(content={ "extracted_text": raw_text, "structured_json": {"error": "No text could be extracted from the image."} }) # Step 2: Generate structured JSON from the extracted text json_data = generate_json_from_text(raw_text) # Step 3: Combine both results into a single response combined_response = { "extracted_text": raw_text, "structured_json": json_data } return JSONResponse(content=combined_response) except Exception as e: raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") if __name__ == "__main__": uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=False) # Disable reload for production