Spaces:

sunhill
/

spice

Runtime error

App Files Files Community

sunhill commited on Sep 25

Commit

03c869a

1 Parent(s): 8ef39b1

compute batch result

Browse files

Files changed (1) hide show

spice.py +55 -2

spice.py CHANGED Viewed

@@ -5,6 +5,7 @@ import shutil
 import subprocess
 import json
 import tempfile
 import evaluate
 import datasets
@@ -179,7 +180,56 @@ class SPICE(evaluate.Metric):
         except (ValueError, TypeError):
             return float("nan")
-    def _compute(self, predictions, references):
         """Returns the scores"""
         assert len(predictions) == len(references), (
             "The number of predictions and references should be the same. "
@@ -241,4 +291,7 @@ class SPICE(evaluate.Metric):
                     k: self.float_convert(v) for k, v in score_tuple.items()
                 }
             scores.append(score_set)
-        return scores

 import subprocess
 import json
 import tempfile
+from typing import List, Dict
 import evaluate
 import datasets
         except (ValueError, TypeError):
             return float("nan")
+    def _compute_batch(self, scores: List[Dict]) -> Dict[str, float]:
+        """Compute average scores over all images in the batch."""
+        aggregate_scores = {}
+        num_images = len(scores)
+        if num_images == 0:
+            return aggregate_scores
+        # Initialize aggregate_scores with zero values
+        for category in scores[0].keys():
+            aggregate_scores[category] = {
+                "pr": 0.0,
+                "re": 0.0,
+                "f": 0.0,
+                "fn": 0.0,
+                "numImages": 0.0,
+                "fp": 0.0,
+                "tp": 0.0,
+            }
+        # Sum up scores for each category
+        for score in scores:
+            for category, score_dict in score.items():
+                for k, v in score_dict.items():
+                    if k in ["fn", "fp", "tp"]:
+                        aggregate_scores[category][k] += v
+                aggregate_scores[category]["numImages"] += 1
+        # Compute average scores
+        for category, score_dict in aggregate_scores.items():
+            tp = score_dict["tp"]
+            fp = score_dict["fp"]
+            fn = score_dict["fn"]
+            precision = tp / (tp + fp) if (tp + fp) > 0 else float("nan")
+            recall = tp / (tp + fn) if (tp + fn) > 0 else float("nan")
+            f_score = (
+                2 * precision * recall / (precision + recall)
+                if precision is not None
+                and recall is not None
+                and (precision + recall) > 0
+                else float("nan")
+            )
+            aggregate_scores[category]["pr"] = precision
+            aggregate_scores[category]["re"] = recall
+            aggregate_scores[category]["f"] = f_score
+        return aggregate_scores
+    def _compute(self, predictions, references, spice_name="All"):
         """Returns the scores"""
         assert len(predictions) == len(references), (
             "The number of predictions and references should be the same. "
                     k: self.float_convert(v) for k, v in score_tuple.items()
                 }
             scores.append(score_set)
+        result_score = {}
+        for k, v in self._compute_batch(scores)[spice_name].items():
+            result_score["spice_" + spice_name.lower() + "_" + k] = v
+        return result_score