# Copyright 2020 The HuggingFace Datasets Authors
# and 2025 Daniel Duckworth (metric integration)
#
# Licensed under the Apache License, Version 2.0
#
# Metric: ISCO-08 Hierarchical Precision/Recall/Fβ with micro/macro aggregation.
# The metric treats each code as belonging to all ancestor prefixes (excluding the root).

import evaluate
import datasets

# Import the implementation from a sibling module file.
# If packaging as an evaluate module, place both files in the same module directory.
from ham import hierarchical_scores


_CITATION = """\
@inproceedings{kosmopoulos2015evaluation,
  title={Evaluation Measures for Hierarchical Classification: A Unified View and Novel Proposals},
  author={Kosmopoulos, Aris and Partalas, Ioannis and Gaussier, Eric and Paliouras, George and Androutsopoulos, Ion},
  booktitle={Neurocomputing},
  year={2015}
}
@misc{isco08,
  title={International Standard Classification of Occupations (ISCO-08)},
  howpublished={International Labour Organization},
  year={2008}
}
"""

_DESCRIPTION = """\
Hierarchical precision (hP), recall (hR), and Fβ (hFβ) for ISCO-08 codes.
Each code is expanded to its ancestor-closure (all non-empty prefixes), and
overlap between predicted and reference closures determines hP/hR. This rewards
correct depth and penalizes distance in the hierarchy.
"""

_KWARGS_DESCRIPTION = """
Args:
    predictions (List[str] | List[int]): Predicted ISCO-08 codes (length 1..4).
        Strings are recommended to preserve leading zeros.
    references (List[str] | List[int]): Reference ISCO-08 codes (length 1..4).
    beta (float, optional): F-measure beta parameter. Default 1.0.
    average (str, optional): "micro", "macro", or "both". Default "both".
    return_per_instance (bool, optional): If True, returns a list of per-instance
        dicts with hP/hR/hFβ. Default False.

Returns (dict):
    If average includes "macro":
        - macro_hP
        - macro_hR
        - macro_hF_beta_mean              # mean of per-instance hFβ
        - macro_hF_beta_from_macroPR      # Fβ computed from macro hP/hR
    If average includes "micro":
        - micro_hP
        - micro_hR
        - micro_hF_beta
    If return_per_instance:
        - per_instance: List[{"hP": float, "hR": float, "hF_beta": float}, ...]

Examples:
    >>> import evaluate
    >>> metric = evaluate.load("path/to/isco_hierachical_accuracy_v2.py")
    >>> refs = ["2211", "22", "3112"]
    >>> preds = ["22",   "2213", "2211"]
    >>> metric.compute(references=refs, predictions=preds, beta=1.0, average="both")
    {'macro_hP': 0.5833333333333334, 'macro_hR': 0.5, 'macro_hF_beta_mean': 0.5, 'macro_hF_beta_from_macroPR': 0.5384615384615384, 'micro_hP': 0.6, 'micro_hR': 0.5, 'micro_hF_beta': 0.5454545454545454}
"""

# Optional external resources
ISCO_CODES_URL = (
    "https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN.csv"
)


@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class isco_hierachical_accuracy_v2(evaluate.Metric):  # keep class name as requested
    """Hierarchical ISCO-08 evaluation metric for hP/hR/hFβ."""

    def _info(self):
        # The features describe how inputs are structured when used with a Dataset;
        # compute(...) can still accept raw Python lists.
        return evaluate.MetricInfo(
            module_type="metric",
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=datasets.Features(
                {
                    "predictions": datasets.Sequence(datasets.Value("string")),
                    "references": datasets.Sequence(datasets.Value("string")),
                }
            ),
            homepage="https://isco.ilo.org/en/isco-08/",
            codebase_urls=["https://github.com/huggingface/evaluate"],
            reference_urls=[
                "https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN%20Vol%201.pdf",
                "https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN%20Structure%20and%20definitions.xlsx",
                "https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20-88%20EN%20Index.xlsx",
            ],
        )

    def _download_and_prepare(self, dl_manager):
        # No external assets are required.
        pass

    def _compute(
        self,
        predictions,
        references,
        beta: float = 1.0,
        average: str = "both",
        return_per_instance: bool = False,
    ):
        """
        Returns hierarchical precision/recall/Fβ (micro/macro).
        """
        results = hierarchical_scores(
            y_true=references,
            y_pred=predictions,
            beta=beta,
            average=average,
            return_per_instance=return_per_instance,
        )
        return results