Spaces:
Runtime error
Runtime error
| import evaluate | |
| import datasets | |
| import moses | |
| from moses import metrics | |
| import pandas as pd | |
| from tdc import Evaluator | |
| from tdc import Oracle | |
| _DESCRIPTION = """ | |
| Comprehensive suite of metrics designed to assess the performance of molecular generation models, for understanding how well a model can produce novel, chemically valid molecules that are relevant to specific research objectives. | |
| """ | |
| _KWARGS_DESCRIPTION = """ | |
| Args: | |
| generated_smiles (`list` of `string`): A collection of SMILES (Simplified Molecular Input Line Entry System) strings generated by the model, ideally encompassing more than 30,000 samples. | |
| train_smiles (`list` of `string`): The dataset of SMILES strings used to train the model, serving as a reference to evaluate the novelty and diversity of the generated molecules. | |
| Returns: | |
| Dectionary item containing various metrics to evaluate model performance | |
| """ | |
| _CITATION = """ | |
| @article{DBLP:journals/corr/abs-1811-12823, | |
| author = {Daniil Polykovskiy and | |
| Alexander Zhebrak and | |
| Benjam{\'{\i}}n S{\'{a}}nchez{-}Lengeling and | |
| Sergey Golovanov and | |
| Oktai Tatanov and | |
| Stanislav Belyaev and | |
| Rauf Kurbanov and | |
| Aleksey Artamonov and | |
| Vladimir Aladinskiy and | |
| Mark Veselov and | |
| Artur Kadurin and | |
| Sergey I. Nikolenko and | |
| Al{\'{a}}n Aspuru{-}Guzik and | |
| Alex Zhavoronkov}, | |
| title = {Molecular Sets {(MOSES):} {A} Benchmarking Platform for Molecular | |
| Generation Models}, | |
| journal = {CoRR}, | |
| volume = {abs/1811.12823}, | |
| year = {2018}, | |
| url = {http://arxiv.org/abs/1811.12823}, | |
| eprinttype = {arXiv}, | |
| eprint = {1811.12823}, | |
| timestamp = {Fri, 26 Nov 2021 15:34:30 +0100}, | |
| biburl = {https://dblp.org/rec/journals/corr/abs-1811-12823.bib}, | |
| bibsource = {dblp computer science bibliography, https://dblp.org} | |
| } | |
| """ | |
| class my_metric(evaluate.Metric): | |
| def _info(self): | |
| return evaluate.MetricInfo( | |
| description=_DESCRIPTION, | |
| citation=_CITATION, | |
| inputs_description=_KWARGS_DESCRIPTION, | |
| features=datasets.Features( | |
| { | |
| "generated_smiles": datasets.Sequence(datasets.Value("string")), | |
| "train_smiles": datasets.Sequence(datasets.Value("string")), | |
| } | |
| if self.config_name == "multilabel" | |
| else { | |
| "generated_smiles": datasets.Value("string"), | |
| "train_smiles": datasets.Value("string"), | |
| } | |
| ), | |
| reference_urls=["https://github.com/molecularsets/moses", "https://tdcommons.ai/functions/oracles/"], | |
| ) | |
| def _compute(self, generated_smiles, train_smiles = None): | |
| Results = metrics.get_all_metrics(gen = generated_smiles, train= train_smiles) | |
| # evaluator = Evaluator(name = 'Diversity') | |
| # Diversity = evaluator(generated_smiles) | |
| evaluator = Evaluator(name = 'KL_Divergence') | |
| KL_Divergence = evaluator(generated_smiles, train_smiles) | |
| # evaluator = Evaluator(name = 'FCD_Distance') | |
| # FCD_Distance = evaluator(generated_smiles, train_smiles) | |
| # evaluator = Evaluator(name = 'Novelty') | |
| # Novelty = evaluator(generated_smiles, train_smiles) | |
| # evaluator = Evaluator(name = 'Validity') | |
| # Validity = evaluator(generated_smiles) | |
| Results.update({ | |
| # "PyTDC_Diversity": Diversity, | |
| "KL_Divergence": KL_Divergence, | |
| # "PyTDC_Validity": Validity,FCD_Distance": FCD_Distance, | |
| # "PyTDC_Novelty": Novelty, | |
| # "PyTDC_ | |
| }) | |
| oracle_list = [ | |
| 'QED', 'SA', 'MPO', 'GSK3B', 'JNK3', | |
| 'DRD2', 'LogP', 'Rediscovery', 'Similarity', | |
| 'Median', 'Isomers', 'Valsartan_SMARTS', 'Hop' | |
| ] | |
| # Iterate through each oracle and compute its score | |
| for oracle_name in oracle_list: | |
| oracle = Oracle(name=oracle_name) | |
| if oracle_name in ['Rediscovery', 'MPO', 'Similarity', 'Median', 'Isomers', 'Hop']: | |
| # Assuming these oracles return a dictionary where values are lists of scores | |
| score = oracle(generated_smiles) | |
| if isinstance(score, dict): | |
| # Convert lists of scores to average score for these specific metrics | |
| score = {key: sum(values)/len(values) for key, values in score.items()} | |
| else: | |
| # Assuming other oracles return a list of scores | |
| score = oracle(generated_smiles) | |
| if isinstance(score, list): | |
| # Convert list of scores to average score | |
| score = sum(score) / len(score) | |
| Results.update({f"PyTDC_{oracle_name}": score}) | |
| return {"results": Results} | |