csebuetnlp/xlsum
Updated โข 4.91k โข 152
How to use rudalson/kobart-summarization-xlsum with Transformers:
# Use a pipeline as a high-level helper
# Warning: Pipeline type "summarization" is no longer supported in transformers v5.
# You must load the model directly (see below) or downgrade to v4.x with:
# 'pip install "transformers<5.0.0'
from transformers import pipeline
pipe = pipeline("summarization", model="rudalson/kobart-summarization-xlsum") # Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("rudalson/kobart-summarization-xlsum")
model = AutoModelForSeq2SeqLM.from_pretrained("rudalson/kobart-summarization-xlsum")์ด ๋ชจ๋ธ์ gogamza/kobart-summarization์ ๊ธฐ๋ฐ์ผ๋ก XL-Sum ํ๊ตญ์ด ๋ฐ์ดํฐ์
์ ํ์ฉํด LoRA(Low-Rank Adaptation) ๊ธฐ๋ฒ์ผ๋ก ํ์ธํ๋ํ ๋ชจ๋ธ์
๋๋ค. ๋ด์ค ๊ธฐ์ฌ์ ๊ฐ์ ๊ธด ํ
์คํธ๋ฅผ ํต์ฌ ๋ฌธ์ฅ์ผ๋ก ์์ถํ๋ ๋ฐ ํนํ๋์ด ์์ต๋๋ค.
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("rudalson/kobart-summarization-ko")
model = AutoModelForSeq2SeqLM.from_pretrained("rudalson/kobart-summarization-ko")
text = """5์ 1์ผ ๋
ธ๋์ ๋ถํฐ 5์ผ ์ด๋ฆฐ์ด๋ ๊น์ง ์ด์ด์ง๋ ์ต๋ 5์ผ๊ฐ์ ํฉ๊ธ์ฐํด๋ฅผ ์๋๊ณ ๊ตญ๋ด ์ฃผ์ ๊ด๊ด์ง์ ์๋ฐ ์๊ธ์ด ์ฒ์ ๋ถ์ง๋ก ์น์๊ณ ์์ต๋๋ค.
์ค๋์ ์ ์ฌํ๋ก ํญ๊ณต ์ ๋ฅ๋น๊ฐ ์ค๋ฅด์ ํด์ธ ๋์ ๊ตญ๋ด ์ฌํ์ผ๋ก ์์๊ฐ ๋ชฐ๋ฆฐ ๋ฐ๋ค, ์ผ๋ณธ๊ณผ ์ค๊ตญ์ ์ฐํด๊น์ง ๊ฒน์น๋ฉฐ ์์ ๊ตฌํ๊ธฐ๊ฐ ๊ทธ์ผ๋ง๋ก 'ํ๋์ ๋ณ๋ฐ๊ธฐ'๊ฐ ๋ ์ํฉ์
๋๋ค."""
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
summary_ids = model.generate(
inputs["input_ids"],
num_beams=4,
max_length=128,
min_length=10,
no_repeat_ngram_size=3,
repetition_penalty=1.2,
early_stopping=True
)
print(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
ํ๊ฐ ์ ํํ์ ๋ถ์๊ธฐ Kiwi๋ฅผ ์ฌ์ฉํ์ฌ ํ ํฐํ ํ ์ธก์ ๋์์ต๋๋ค.
| Metric | Score |
|---|---|
| ROUGE-1 | 11.63% |
| ROUGE-L | 11.57% |
Base model
gogamza/kobart-summarization