# File: ocr_processor.py import numpy as np from paddleocr import PaddleOCR from PIL import Image import io # Initialize PaddleOCR with modern, compatible settings ocr = PaddleOCR(lang='en', use_angle_cls=True) def extract_text_from_image(image_bytes: bytes) -> str: """ Performs OCR on a given image using PaddleOCR. """ try: # 1. Convert bytes to PIL Image img = Image.open(io.BytesIO(image_bytes)) img = img.convert("RGB") img_array = np.array(img) # 2. Run OCR result = ocr.ocr(img_array) # 3. Extract and combine the recognized text if result and result[0]: text_lines = [line[1][0] for line in result[0]] return " ".join(text_lines) else: return "No text detected in the image." except Exception as e: return f"An error occurred during OCR: {str(e)}"