# File: ocr_processor.py
import numpy as np
from paddleocr import PaddleOCR
from PIL import Image
import io

# Initialize PaddleOCR with modern, compatible settings
ocr = PaddleOCR(lang='en', use_angle_cls=True)

def extract_text_from_image(image_bytes: bytes) -> str:
    """
    Performs OCR on a given image using PaddleOCR.
    """
    try:
        # 1. Convert bytes to PIL Image
        img = Image.open(io.BytesIO(image_bytes))
        img = img.convert("RGB")
        img_array = np.array(img)

        # 2. Run OCR
        result = ocr.ocr(img_array)

        # 3. Extract and combine the recognized text
        if result and result[0]:
            text_lines = [line[1][0] for line in result[0]]
            return " ".join(text_lines)
        else:
            return "No text detected in the image."

    except Exception as e:
        return f"An error occurred during OCR: {str(e)}"