Spaces:
Running
Running
File size: 441 Bytes
beba6d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
from pypdf import PdfReader
import io
def extract_text_from_pdf(file_bytes: bytes, max_pages: int = 20):
reader = PdfReader(io.BytesIO(file_bytes))
text_chunks = []
for i, page in enumerate(reader.pages):
if i >= max_pages:
break # Stop early to limit memory use
text = page.extract_text()
if text:
text_chunks.append(text)
return "\n".join(text_chunks)
|