Spaces:

transZ
/

test_parascore

Paused

App Files Files Community

test_parascore / test_parascore.py

transZ

Fix bug

4fdef25 almost 3 years ago

raw

history blame contribute delete

5.07 kB

	# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""TODO: Add a description here."""

	import evaluate
	import datasets
	import nltk


	_CITATION = """\
	@article{Shen2022,
	archivePrefix = {arXiv},
	arxivId = {2202.08479},
	author = {Shen, Lingfeng and Liu, Lemao and Jiang, Haiyun and Shi, Shuming},
	journal = {EMNLP 2022 - 2022 Conference on Empirical Methods in Natural Language Processing, Proceedings},
	eprint = {2202.08479},
	month = {feb},
	number = {1},
	pages = {3178--3190},
	title = {{On the Evaluation Metrics for Paraphrase Generation}},
	url = {http://arxiv.org/abs/2202.08479},
	year = {2022}
	}
	"""

	_DESCRIPTION = """\
	ParaScore is a new metric to scoring the performance of paraphrase generation tasks
	"""


	# TODO: Add description of the arguments of the module here
	_KWARGS_DESCRIPTION = """
	Calculates how good the paraphrase is
	Args:
	predictions: list of predictions to score. Each predictions
	should be a string with tokens separated by spaces.
	references: list of reference for each prediction. Each
	reference should be a string with tokens separated by spaces.
	Returns:
	score: description of the first score,
	Examples:
	Examples should be written in doctest format, and should illustrate how
	to use the function.

	>>> metrics = evaluate.load("transZ/test_parascore")
	>>> results = my_new_module.compute(references=["They work for 6 months"], predictions=["They have working for 6 months"])
	>>> print(results)
	{'score': 0.85}
	"""

	# TODO: Define external resources urls if needed
	BAD_WORDS_URL = "https://github.com/shadowkiller33/parascore_toolkit"


	@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
	class test_parascore(evaluate.Metric):
	"""ParaScore is a new metric to scoring the performance of paraphrase generation tasks"""

	def _info(self):
	return evaluate.MetricInfo(
	# This is the description that will appear on the modules page.
	module_type="metric",
	description=_DESCRIPTION,
	citation=_CITATION,
	inputs_description=_KWARGS_DESCRIPTION,
	# This defines the format of each prediction and reference
	features=[
	datasets.Features(
	{
	"predictions": datasets.Value("string", id="sequence"),
	"references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
	}
	),
	datasets.Features(
	{
	"predictions": datasets.Value("string", id="sequence"),
	"references": datasets.Value("string", id="sequence"),
	}
	),
	],
	# Homepage of the module for documentation
	homepage="https://github.com/shadowkiller33/ParaScore",
	# Additional links to the codebase or references
	codebase_urls=["https://github.com/shadowkiller33/ParaScore"],
	reference_urls=["https://github.com/shadowkiller33/ParaScore"]
	)

	def _download_and_prepare(self, dl_manager):
	"""Optional: download external resources useful to compute the scores"""
	self.sbert_cosine = evaluate.load('transZ/sbert_cosine')

	def _edit(self, x, y, lang='en'):
	if lang == 'zh':
	x = x.replace(" ", "")
	y = y.replace(" ", "")
	a = len(x)
	b = len(y)
	dis = nltk.edit_distance(x,y)
	return dis/max(a,b)

	def _diverse(self, cands, sources, lang='en'):
	diversity = []
	thresh = 0.35
	for x, y in zip(cands, sources):
	div = self._edit(x, y, lang)
	if div >= thresh:
	ss = thresh
	elif div < thresh:
	ss = -1 + ((thresh + 1) / thresh) * div
	diversity.append(ss)
	return diversity

	def _compute(self, predictions, references, model_type='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', lang='en'):
	"""Returns the scores"""

	score = self.sbert_cosine.compute(predictions=predictions, references=references, model_type=model_type)
	sbert_score = [round(v, 2) for v in score['score']]
	diversity = self._diverse(predictions, references, lang)

	score = [s + 0.05 * d for s, d in zip(sbert_score, diversity)]
	return {
	"score": score,
	}