# Copyright 2020 The HuggingFace Datasets Authors # and 2025 Daniel Duckworth (metric integration) # # Licensed under the Apache License, Version 2.0 # # Metric: ISCO-08 Hierarchical Precision/Recall/Fβ with micro/macro aggregation. # The metric treats each code as belonging to all ancestor prefixes (excluding the root). import evaluate import datasets # Import the implementation from a sibling module file. # If packaging as an evaluate module, place both files in the same module directory. from ham import hierarchical_scores _CITATION = """\ @inproceedings{kosmopoulos2015evaluation, title={Evaluation Measures for Hierarchical Classification: A Unified View and Novel Proposals}, author={Kosmopoulos, Aris and Partalas, Ioannis and Gaussier, Eric and Paliouras, George and Androutsopoulos, Ion}, booktitle={Neurocomputing}, year={2015} } @misc{isco08, title={International Standard Classification of Occupations (ISCO-08)}, howpublished={International Labour Organization}, year={2008} } """ _DESCRIPTION = """\ Hierarchical precision (hP), recall (hR), and Fβ (hFβ) for ISCO-08 codes. Each code is expanded to its ancestor-closure (all non-empty prefixes), and overlap between predicted and reference closures determines hP/hR. This rewards correct depth and penalizes distance in the hierarchy. """ _KWARGS_DESCRIPTION = """ Args: predictions (List[str] | List[int]): Predicted ISCO-08 codes (length 1..4). Strings are recommended to preserve leading zeros. references (List[str] | List[int]): Reference ISCO-08 codes (length 1..4). beta (float, optional): F-measure beta parameter. Default 1.0. average (str, optional): "micro", "macro", or "both". Default "both". return_per_instance (bool, optional): If True, returns a list of per-instance dicts with hP/hR/hFβ. Default False. Returns (dict): If average includes "macro": - macro_hP - macro_hR - macro_hF_beta_mean # mean of per-instance hFβ - macro_hF_beta_from_macroPR # Fβ computed from macro hP/hR If average includes "micro": - micro_hP - micro_hR - micro_hF_beta If return_per_instance: - per_instance: List[{"hP": float, "hR": float, "hF_beta": float}, ...] Examples: >>> import evaluate >>> metric = evaluate.load("path/to/isco_hierachical_accuracy_v2.py") >>> refs = ["2211", "22", "3112"] >>> preds = ["22", "2213", "2211"] >>> metric.compute(references=refs, predictions=preds, beta=1.0, average="both") {'macro_hP': 0.5833333333333334, 'macro_hR': 0.5, 'macro_hF_beta_mean': 0.5, 'macro_hF_beta_from_macroPR': 0.5384615384615384, 'micro_hP': 0.6, 'micro_hR': 0.5, 'micro_hF_beta': 0.5454545454545454} """ # Optional external resources ISCO_CODES_URL = ( "https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN.csv" ) @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) class isco_hierachical_accuracy_v2(evaluate.Metric): # keep class name as requested """Hierarchical ISCO-08 evaluation metric for hP/hR/hFβ.""" def _info(self): # The features describe how inputs are structured when used with a Dataset; # compute(...) can still accept raw Python lists. return evaluate.MetricInfo( module_type="metric", description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, features=datasets.Features( { "predictions": datasets.Sequence(datasets.Value("string")), "references": datasets.Sequence(datasets.Value("string")), } ), homepage="https://isco.ilo.org/en/isco-08/", codebase_urls=["https://github.com/huggingface/evaluate"], reference_urls=[ "https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN%20Vol%201.pdf", "https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN%20Structure%20and%20definitions.xlsx", "https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20-88%20EN%20Index.xlsx", ], ) def _download_and_prepare(self, dl_manager): # No external assets are required. pass def _compute( self, predictions, references, beta: float = 1.0, average: str = "both", return_per_instance: bool = False, ): """ Returns hierarchical precision/recall/Fβ (micro/macro). """ results = hierarchical_scores( y_true=references, y_pred=predictions, beta=beta, average=average, return_per_instance=return_per_instance, ) return results