KITAB-Bench-Leaderboard

Running

App Files Files Community

kitab-bench commited on Apr 2, 2025

Commit

5f453f6

verified ·

1 Parent(s): 5f88bed

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -50

app.py CHANGED Viewed

@@ -2,67 +2,84 @@ import gradio as gr
 import pandas as pd
 import numpy as np
-# Sample data - in a real application, you would load this from a database or API
 data = {
     "model": [
-        "GPT-4o", "Gemini-2.0-Flash", "Qwen2.5-VL-7B", "AIN-7B", "PaliGemma-3B",
-        "TrOCR-large", "nougat-base", "KITAB-OCR", "Llama-3-70B-Vision", "claude-3-opus"
     ],
     "organization": [
-        "OpenAI", "Google", "Alibaba", "MBZUAI", "Google",
-        "Microsoft", "Meta", "MBZUAI", "Meta", "Anthropic"
     ],
     "type": [
-        "Closed-source", "Closed-source", "Open-source", "Open-source", "Open-source",
-        "Open-source", "Open-source", "Open-source", "Open-source", "Closed-source"
     ],
     "task": [
         "OCR/Vision", "OCR/Vision", "OCR/Vision", "OCR/Vision", "OCR/Vision",
-        "OCR", "OCR/Document", "OCR/Arabic", "Vision", "Vision"
     ],
-    "accuracy": [
-        92.5, 94.2, 83.4, 87.2, 81.5,
-        76.8, 79.3, 75.2, 89.1, 93.7
-    ],
-    "f1_score": [
-        90.1, 91.3, 79.8, 86.5, 78.3,
-        72.1, 74.5, 70.8, 87.4, 90.8
     ],
     "cer": [
-        0.31, 0.13, 1.20, 0.20, 0.67,
-        0.54, 0.58, 0.95, 0.24, 0.15
     ],
     "downloads": [
-        "24.5K", "18.2K", "152K", "89K", "112K",
-        "320K", "235K", "45K", "580K", "12.8K"
     ],
     "last_updated": [
-        "2025-03-15", "2025-03-10", "2025-03-05", "2025-02-28", "2025-02-20",
-        "2025-02-15", "2025-02-10", "2025-02-05", "2025-01-28", "2025-01-15"
     ],
     "model_url": [
         "https://huggingface.co/openai/gpt-4o",
         "https://huggingface.co/google/gemini-2-flash",
         "https://huggingface.co/Qwen/Qwen2.5-VL-7B",
         "https://huggingface.co/MBZUAI/AIN-7B",
-        "https://huggingface.co/google/paligemma-3b",
         "https://huggingface.co/microsoft/trocr-large-printed",
-        "https://huggingface.co/facebook/nougat-base",
-        "https://huggingface.co/MBZUAI/KITAB-OCR",
-        "https://huggingface.co/meta-llama/Llama-3-70B-Vision",
-        "https://huggingface.co/anthropic/claude-3-opus"
     ],
     "paper_url": [
         "https://arxiv.org/abs/2412.xxxxx",
         "https://arxiv.org/abs/2403.xxxxx",
         "https://arxiv.org/abs/2410.xxxxx",
         "https://arxiv.org/abs/2502.xxxxx",
-        "https://arxiv.org/abs/2305.xxxxx",
-        "https://arxiv.org/abs/2109.10282",
-        "https://arxiv.org/abs/2308.13418",
         "https://arxiv.org/abs/2502.14949",
-        "https://arxiv.org/abs/2405.xxxxx",
-        "https://arxiv.org/abs/2404.xxxxx"
     ]
 }
@@ -74,13 +91,9 @@ def format_dataframe(df):
     # Create a copy to avoid modifying the original
     formatted_df = df.copy()
-    # Format accuracy and F1 Score (higher is better)
-    formatted_df['accuracy'] = formatted_df['accuracy'].apply(
-        lambda x: f"<span style='color: {'#10B981' if x > 85 else '#F59E0B' if x > 75 else '#EF4444'}'>{x:.1f}</span>"
-    )
-    formatted_df['f1_score'] = formatted_df['f1_score'].apply(
-        lambda x: f"<span style='color: {'#10B981' if x > 85 else '#F59E0B' if x > 75 else '#EF4444'}'>{x:.1f}</span>"
     )
     # Format CER (lower is better)
@@ -88,6 +101,11 @@ def format_dataframe(df):
         lambda x: f"<span style='color: {'#10B981' if x < 0.5 else '#F59E0B' if x < 1 else '#EF4444'}'>{x:.2f}</span>"
     )
     # Add hyperlinks for model and paper
     formatted_df['model'] = formatted_df.apply(
         lambda row: f"<a href='{row['model_url']}' target='_blank'>{row['model']}</a>", axis=1
@@ -261,8 +279,8 @@ def create_leaderboard_interface():
     # Create DataFrame
     df_orig = pd.DataFrame(data)
-    # Sort by accuracy descending by default
-    df_orig = df_orig.sort_values(by="accuracy", ascending=False)
     with gr.Blocks(css=css) as demo:
         gr.HTML(f"""
@@ -306,16 +324,16 @@ def create_leaderboard_interface():
             )
             sort_by = gr.Dropdown(
-                ["accuracy", "f1_score", "cer", "downloads"],
                 label="Sort by",
-                value="accuracy",
                 interactive=True
             )
             sort_order = gr.Radio(
                 ["Descending", "Ascending"],
                 label="Sort Order",
-                value="Descending",
                 interactive=True
             )
@@ -333,8 +351,8 @@ def create_leaderboard_interface():
             # Sort the dataframe
             is_ascending = sort_order == "Ascending"
-            # For CER, we might want to reverse the default sorting (since lower is better)
-            if sort_by == "cer":
                 is_ascending = not is_ascending
             filtered_df = filtered_df.sort_values(by=sort_by, ascending=is_ascending)
@@ -352,9 +370,9 @@ def create_leaderboard_interface():
                             <th>Organization</th>
                             <th>Type</th>
                             <th>Task</th>
-                            <th>Accuracy</th>
-                            <th>F1 Score</th>
-                            <th>CER</th>
                             <th>Downloads</th>
                             <th>Last Updated</th>
                             <th>Paper</th>
@@ -370,9 +388,9 @@ def create_leaderboard_interface():
                     <td>{row['organization']}</td>
                     <td>{row['type']}</td>
                     <td>{row['task']}</td>
-                    <td>{row['accuracy']}</td>
-                    <td>{row['f1_score']}</td>
                     <td>{row['cer']}</td>
                     <td>{row['downloads']}</td>
                     <td>{row['last_updated']}</td>
                     <td>{row['paper']}</td>

 import pandas as pd
 import numpy as np
+# Data from the provided CSV
 data = {
     "model": [
+        "GPT-4o", "GPT-4o-mini", "Gemini-2.0-Flash", "Qwen2-VL", "Qwen2.5-VL",
+        "AIN", "Tesseract", "EasyOCR", "Paddle", "Surya",
+        "Microsoft", "Qari", "Gemma3", "ArabicNougat"
     ],
     "organization": [
+        "OpenAI", "OpenAI", "Google", "Alibaba", "Alibaba",
+        "MBZUAI", "Google", "JaidedAI", "Baidu", "MBZUAI",
+        "Microsoft", "MBZUAI", "Google", "Meta"
     ],
     "type": [
+        "Closed-source", "Closed-source", "Closed-source", "Open-source", "Open-source",
+        "Open-source", "Open-source", "Open-source", "Open-source", "Open-source",
+        "Closed-source", "Open-source", "Open-source", "Open-source"
     ],
     "task": [
         "OCR/Vision", "OCR/Vision", "OCR/Vision", "OCR/Vision", "OCR/Vision",
+        "OCR/Vision", "OCR", "OCR", "OCR", "OCR/Arabic",
+        "OCR/Vision", "OCR/Arabic", "OCR/Vision", "OCR/Document"
     ],
+    "chrf": [
+        61.01, 47.21, 77.95, 33.94, 49.23,
+        78.33, 39.62, 45.47, 16.73, 20.61,
+        50.97, 39.77, 30.02, 30.52
     ],
     "cer": [
+        0.31, 0.43, 0.13, 1.48, 1.20,
+        0.20, 0.54, 0.58, 0.79, 4.95,
+        0.52, 1.80, 1.05, 4.37
+    ],
+    "wer": [
+        0.55, 0.71, 0.32, 1.55, 1.41,
+        0.28, 0.84, 0.89, 1.02, 5.61,
+        0.69, 1.93, 1.45, 4.67
     ],
     "downloads": [
+        "24.5K", "18.2K", "19.6K", "89K", "152K",
+        "89K", "320K", "235K", "112K", "45K",
+        "250K", "67K", "95K", "78K"
     ],
     "last_updated": [
+        "2025-03-15", "2025-03-01", "2025-03-10", "2024-12-10", "2025-03-05",
+        "2025-02-28", "2025-01-20", "2025-02-10", "2024-11-15", "2025-02-05",
+        "2025-02-15", "2025-01-05", "2025-02-20", "2025-01-18"
     ],
     "model_url": [
         "https://huggingface.co/openai/gpt-4o",
+        "https://huggingface.co/openai/gpt-4o-mini",
         "https://huggingface.co/google/gemini-2-flash",
+        "https://huggingface.co/Qwen/Qwen2-VL",
         "https://huggingface.co/Qwen/Qwen2.5-VL-7B",
         "https://huggingface.co/MBZUAI/AIN-7B",
+        "https://github.com/tesseract-ocr/tesseract",
+        "https://github.com/JaidedAI/EasyOCR",
+        "https://github.com/PaddlePaddle/PaddleOCR",
+        "https://huggingface.co/MBZUAI/Surya",
         "https://huggingface.co/microsoft/trocr-large-printed",
+        "https://huggingface.co/MBZUAI/Qari",
+        "https://huggingface.co/google/gemma3",
+        "https://huggingface.co/meta/ArabicNougat"
     ],
     "paper_url": [
+        "https://arxiv.org/abs/2412.xxxxx",
         "https://arxiv.org/abs/2412.xxxxx",
         "https://arxiv.org/abs/2403.xxxxx",
+        "https://arxiv.org/abs/2404.xxxxx",
         "https://arxiv.org/abs/2410.xxxxx",
         "https://arxiv.org/abs/2502.xxxxx",
+        "https://github.com/tesseract-ocr/tesseract",
+        "https://arxiv.org/abs/2304.xxxxx",
+        "https://arxiv.org/abs/2209.xxxxx",
         "https://arxiv.org/abs/2502.14949",
+        "https://arxiv.org/abs/2109.10282",
+        "https://arxiv.org/abs/2307.xxxxx",
+        "https://arxiv.org/abs/2305.xxxxx",
+        "https://arxiv.org/abs/2308.13418"
     ]
 }
     # Create a copy to avoid modifying the original
     formatted_df = df.copy()
+    # Format CHrF (higher is better)
+    formatted_df['chrf'] = formatted_df['chrf'].apply(
+        lambda x: f"<span style='color: {'#10B981' if x > 60 else '#F59E0B' if x > 40 else '#EF4444'}'>{x:.1f}</span>"
     )
     # Format CER (lower is better)
         lambda x: f"<span style='color: {'#10B981' if x < 0.5 else '#F59E0B' if x < 1 else '#EF4444'}'>{x:.2f}</span>"
     )
+    # Format WER (lower is better)
+    formatted_df['wer'] = formatted_df['wer'].apply(
+        lambda x: f"<span style='color: {'#10B981' if x < 0.5 else '#F59E0B' if x < 1 else '#EF4444'}'>{x:.2f}</span>"
+    )
     # Add hyperlinks for model and paper
     formatted_df['model'] = formatted_df.apply(
         lambda row: f"<a href='{row['model_url']}' target='_blank'>{row['model']}</a>", axis=1
     # Create DataFrame
     df_orig = pd.DataFrame(data)
+    # Sort by CER ascending by default (lower is better for CER)
+    df_orig = df_orig.sort_values(by="cer", ascending=True)
     with gr.Blocks(css=css) as demo:
         gr.HTML(f"""
             )
             sort_by = gr.Dropdown(
+                ["chrf", "cer", "wer", "downloads"],
                 label="Sort by",
+                value="cer",
                 interactive=True
             )
             sort_order = gr.Radio(
                 ["Descending", "Ascending"],
                 label="Sort Order",
+                value="Ascending",
                 interactive=True
             )
             # Sort the dataframe
             is_ascending = sort_order == "Ascending"
+            # For CHrF, we want to reverse the default sorting (since higher is better)
+            if sort_by == "chrf":
                 is_ascending = not is_ascending
             filtered_df = filtered_df.sort_values(by=sort_by, ascending=is_ascending)
                             <th>Organization</th>
                             <th>Type</th>
                             <th>Task</th>
+                            <th>CHrF ↑</th>
+                            <th>CER ↓</th>
+                            <th>WER ↓</th>
                             <th>Downloads</th>
                             <th>Last Updated</th>
                             <th>Paper</th>
                     <td>{row['organization']}</td>
                     <td>{row['type']}</td>
                     <td>{row['task']}</td>
+                    <td>{row['chrf']}</td>
                     <td>{row['cer']}</td>
+                    <td>{row['wer']}</td>
                     <td>{row['downloads']}</td>
                     <td>{row['last_updated']}</td>
                     <td>{row['paper']}</td>