File size: 441 Bytes
beba6d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from pypdf import PdfReader
import io

def extract_text_from_pdf(file_bytes: bytes, max_pages: int = 20):
    reader = PdfReader(io.BytesIO(file_bytes))
    text_chunks = []

    for i, page in enumerate(reader.pages):
        if i >= max_pages:
            break  # Stop early to limit memory use
        text = page.extract_text()
        if text:
            text_chunks.append(text)
    
    return "\n".join(text_chunks)