Update app.py
Browse files
app.py
CHANGED
|
@@ -2,67 +2,84 @@ import gradio as gr
|
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
-
#
|
| 6 |
data = {
|
| 7 |
"model": [
|
| 8 |
-
"GPT-4o", "Gemini-2.0-Flash", "Qwen2
|
| 9 |
-
"
|
|
|
|
| 10 |
],
|
| 11 |
"organization": [
|
| 12 |
-
"OpenAI", "
|
| 13 |
-
"
|
|
|
|
| 14 |
],
|
| 15 |
"type": [
|
| 16 |
-
"Closed-source", "Closed-source", "
|
| 17 |
-
"Open-source", "Open-source", "Open-source", "Open-source", "
|
|
|
|
| 18 |
],
|
| 19 |
"task": [
|
| 20 |
"OCR/Vision", "OCR/Vision", "OCR/Vision", "OCR/Vision", "OCR/Vision",
|
| 21 |
-
"OCR", "OCR
|
|
|
|
| 22 |
],
|
| 23 |
-
"
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
"f1_score": [
|
| 28 |
-
90.1, 91.3, 79.8, 86.5, 78.3,
|
| 29 |
-
72.1, 74.5, 70.8, 87.4, 90.8
|
| 30 |
],
|
| 31 |
"cer": [
|
| 32 |
-
0.31, 0.13, 1.
|
| 33 |
-
0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
],
|
| 35 |
"downloads": [
|
| 36 |
-
"24.5K", "18.2K", "
|
| 37 |
-
"
|
|
|
|
| 38 |
],
|
| 39 |
"last_updated": [
|
| 40 |
-
"2025-03-15", "2025-03-
|
| 41 |
-
"2025-02-
|
|
|
|
| 42 |
],
|
| 43 |
"model_url": [
|
| 44 |
"https://huggingface.co/openai/gpt-4o",
|
|
|
|
| 45 |
"https://huggingface.co/google/gemini-2-flash",
|
|
|
|
| 46 |
"https://huggingface.co/Qwen/Qwen2.5-VL-7B",
|
| 47 |
"https://huggingface.co/MBZUAI/AIN-7B",
|
| 48 |
-
"https://
|
|
|
|
|
|
|
|
|
|
| 49 |
"https://huggingface.co/microsoft/trocr-large-printed",
|
| 50 |
-
"https://huggingface.co/
|
| 51 |
-
"https://huggingface.co/
|
| 52 |
-
"https://huggingface.co/meta
|
| 53 |
-
"https://huggingface.co/anthropic/claude-3-opus"
|
| 54 |
],
|
| 55 |
"paper_url": [
|
|
|
|
| 56 |
"https://arxiv.org/abs/2412.xxxxx",
|
| 57 |
"https://arxiv.org/abs/2403.xxxxx",
|
|
|
|
| 58 |
"https://arxiv.org/abs/2410.xxxxx",
|
| 59 |
"https://arxiv.org/abs/2502.xxxxx",
|
| 60 |
-
"https://
|
| 61 |
-
"https://arxiv.org/abs/
|
| 62 |
-
"https://arxiv.org/abs/
|
| 63 |
"https://arxiv.org/abs/2502.14949",
|
| 64 |
-
"https://arxiv.org/abs/
|
| 65 |
-
"https://arxiv.org/abs/
|
|
|
|
|
|
|
| 66 |
]
|
| 67 |
}
|
| 68 |
|
|
@@ -74,13 +91,9 @@ def format_dataframe(df):
|
|
| 74 |
# Create a copy to avoid modifying the original
|
| 75 |
formatted_df = df.copy()
|
| 76 |
|
| 77 |
-
# Format
|
| 78 |
-
formatted_df['
|
| 79 |
-
lambda x: f"<span style='color: {'#10B981' if x >
|
| 80 |
-
)
|
| 81 |
-
|
| 82 |
-
formatted_df['f1_score'] = formatted_df['f1_score'].apply(
|
| 83 |
-
lambda x: f"<span style='color: {'#10B981' if x > 85 else '#F59E0B' if x > 75 else '#EF4444'}'>{x:.1f}</span>"
|
| 84 |
)
|
| 85 |
|
| 86 |
# Format CER (lower is better)
|
|
@@ -88,6 +101,11 @@ def format_dataframe(df):
|
|
| 88 |
lambda x: f"<span style='color: {'#10B981' if x < 0.5 else '#F59E0B' if x < 1 else '#EF4444'}'>{x:.2f}</span>"
|
| 89 |
)
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
# Add hyperlinks for model and paper
|
| 92 |
formatted_df['model'] = formatted_df.apply(
|
| 93 |
lambda row: f"<a href='{row['model_url']}' target='_blank'>{row['model']}</a>", axis=1
|
|
@@ -261,8 +279,8 @@ def create_leaderboard_interface():
|
|
| 261 |
# Create DataFrame
|
| 262 |
df_orig = pd.DataFrame(data)
|
| 263 |
|
| 264 |
-
# Sort by
|
| 265 |
-
df_orig = df_orig.sort_values(by="
|
| 266 |
|
| 267 |
with gr.Blocks(css=css) as demo:
|
| 268 |
gr.HTML(f"""
|
|
@@ -306,16 +324,16 @@ def create_leaderboard_interface():
|
|
| 306 |
)
|
| 307 |
|
| 308 |
sort_by = gr.Dropdown(
|
| 309 |
-
["
|
| 310 |
label="Sort by",
|
| 311 |
-
value="
|
| 312 |
interactive=True
|
| 313 |
)
|
| 314 |
|
| 315 |
sort_order = gr.Radio(
|
| 316 |
["Descending", "Ascending"],
|
| 317 |
label="Sort Order",
|
| 318 |
-
value="
|
| 319 |
interactive=True
|
| 320 |
)
|
| 321 |
|
|
@@ -333,8 +351,8 @@ def create_leaderboard_interface():
|
|
| 333 |
# Sort the dataframe
|
| 334 |
is_ascending = sort_order == "Ascending"
|
| 335 |
|
| 336 |
-
# For
|
| 337 |
-
if sort_by == "
|
| 338 |
is_ascending = not is_ascending
|
| 339 |
|
| 340 |
filtered_df = filtered_df.sort_values(by=sort_by, ascending=is_ascending)
|
|
@@ -352,9 +370,9 @@ def create_leaderboard_interface():
|
|
| 352 |
<th>Organization</th>
|
| 353 |
<th>Type</th>
|
| 354 |
<th>Task</th>
|
| 355 |
-
<th>
|
| 356 |
-
<th>
|
| 357 |
-
<th>
|
| 358 |
<th>Downloads</th>
|
| 359 |
<th>Last Updated</th>
|
| 360 |
<th>Paper</th>
|
|
@@ -370,9 +388,9 @@ def create_leaderboard_interface():
|
|
| 370 |
<td>{row['organization']}</td>
|
| 371 |
<td>{row['type']}</td>
|
| 372 |
<td>{row['task']}</td>
|
| 373 |
-
<td>{row['
|
| 374 |
-
<td>{row['f1_score']}</td>
|
| 375 |
<td>{row['cer']}</td>
|
|
|
|
| 376 |
<td>{row['downloads']}</td>
|
| 377 |
<td>{row['last_updated']}</td>
|
| 378 |
<td>{row['paper']}</td>
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
+
# Data from the provided CSV
|
| 6 |
data = {
|
| 7 |
"model": [
|
| 8 |
+
"GPT-4o", "GPT-4o-mini", "Gemini-2.0-Flash", "Qwen2-VL", "Qwen2.5-VL",
|
| 9 |
+
"AIN", "Tesseract", "EasyOCR", "Paddle", "Surya",
|
| 10 |
+
"Microsoft", "Qari", "Gemma3", "ArabicNougat"
|
| 11 |
],
|
| 12 |
"organization": [
|
| 13 |
+
"OpenAI", "OpenAI", "Google", "Alibaba", "Alibaba",
|
| 14 |
+
"MBZUAI", "Google", "JaidedAI", "Baidu", "MBZUAI",
|
| 15 |
+
"Microsoft", "MBZUAI", "Google", "Meta"
|
| 16 |
],
|
| 17 |
"type": [
|
| 18 |
+
"Closed-source", "Closed-source", "Closed-source", "Open-source", "Open-source",
|
| 19 |
+
"Open-source", "Open-source", "Open-source", "Open-source", "Open-source",
|
| 20 |
+
"Closed-source", "Open-source", "Open-source", "Open-source"
|
| 21 |
],
|
| 22 |
"task": [
|
| 23 |
"OCR/Vision", "OCR/Vision", "OCR/Vision", "OCR/Vision", "OCR/Vision",
|
| 24 |
+
"OCR/Vision", "OCR", "OCR", "OCR", "OCR/Arabic",
|
| 25 |
+
"OCR/Vision", "OCR/Arabic", "OCR/Vision", "OCR/Document"
|
| 26 |
],
|
| 27 |
+
"chrf": [
|
| 28 |
+
61.01, 47.21, 77.95, 33.94, 49.23,
|
| 29 |
+
78.33, 39.62, 45.47, 16.73, 20.61,
|
| 30 |
+
50.97, 39.77, 30.02, 30.52
|
|
|
|
|
|
|
|
|
|
| 31 |
],
|
| 32 |
"cer": [
|
| 33 |
+
0.31, 0.43, 0.13, 1.48, 1.20,
|
| 34 |
+
0.20, 0.54, 0.58, 0.79, 4.95,
|
| 35 |
+
0.52, 1.80, 1.05, 4.37
|
| 36 |
+
],
|
| 37 |
+
"wer": [
|
| 38 |
+
0.55, 0.71, 0.32, 1.55, 1.41,
|
| 39 |
+
0.28, 0.84, 0.89, 1.02, 5.61,
|
| 40 |
+
0.69, 1.93, 1.45, 4.67
|
| 41 |
],
|
| 42 |
"downloads": [
|
| 43 |
+
"24.5K", "18.2K", "19.6K", "89K", "152K",
|
| 44 |
+
"89K", "320K", "235K", "112K", "45K",
|
| 45 |
+
"250K", "67K", "95K", "78K"
|
| 46 |
],
|
| 47 |
"last_updated": [
|
| 48 |
+
"2025-03-15", "2025-03-01", "2025-03-10", "2024-12-10", "2025-03-05",
|
| 49 |
+
"2025-02-28", "2025-01-20", "2025-02-10", "2024-11-15", "2025-02-05",
|
| 50 |
+
"2025-02-15", "2025-01-05", "2025-02-20", "2025-01-18"
|
| 51 |
],
|
| 52 |
"model_url": [
|
| 53 |
"https://huggingface.co/openai/gpt-4o",
|
| 54 |
+
"https://huggingface.co/openai/gpt-4o-mini",
|
| 55 |
"https://huggingface.co/google/gemini-2-flash",
|
| 56 |
+
"https://huggingface.co/Qwen/Qwen2-VL",
|
| 57 |
"https://huggingface.co/Qwen/Qwen2.5-VL-7B",
|
| 58 |
"https://huggingface.co/MBZUAI/AIN-7B",
|
| 59 |
+
"https://github.com/tesseract-ocr/tesseract",
|
| 60 |
+
"https://github.com/JaidedAI/EasyOCR",
|
| 61 |
+
"https://github.com/PaddlePaddle/PaddleOCR",
|
| 62 |
+
"https://huggingface.co/MBZUAI/Surya",
|
| 63 |
"https://huggingface.co/microsoft/trocr-large-printed",
|
| 64 |
+
"https://huggingface.co/MBZUAI/Qari",
|
| 65 |
+
"https://huggingface.co/google/gemma3",
|
| 66 |
+
"https://huggingface.co/meta/ArabicNougat"
|
|
|
|
| 67 |
],
|
| 68 |
"paper_url": [
|
| 69 |
+
"https://arxiv.org/abs/2412.xxxxx",
|
| 70 |
"https://arxiv.org/abs/2412.xxxxx",
|
| 71 |
"https://arxiv.org/abs/2403.xxxxx",
|
| 72 |
+
"https://arxiv.org/abs/2404.xxxxx",
|
| 73 |
"https://arxiv.org/abs/2410.xxxxx",
|
| 74 |
"https://arxiv.org/abs/2502.xxxxx",
|
| 75 |
+
"https://github.com/tesseract-ocr/tesseract",
|
| 76 |
+
"https://arxiv.org/abs/2304.xxxxx",
|
| 77 |
+
"https://arxiv.org/abs/2209.xxxxx",
|
| 78 |
"https://arxiv.org/abs/2502.14949",
|
| 79 |
+
"https://arxiv.org/abs/2109.10282",
|
| 80 |
+
"https://arxiv.org/abs/2307.xxxxx",
|
| 81 |
+
"https://arxiv.org/abs/2305.xxxxx",
|
| 82 |
+
"https://arxiv.org/abs/2308.13418"
|
| 83 |
]
|
| 84 |
}
|
| 85 |
|
|
|
|
| 91 |
# Create a copy to avoid modifying the original
|
| 92 |
formatted_df = df.copy()
|
| 93 |
|
| 94 |
+
# Format CHrF (higher is better)
|
| 95 |
+
formatted_df['chrf'] = formatted_df['chrf'].apply(
|
| 96 |
+
lambda x: f"<span style='color: {'#10B981' if x > 60 else '#F59E0B' if x > 40 else '#EF4444'}'>{x:.1f}</span>"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
)
|
| 98 |
|
| 99 |
# Format CER (lower is better)
|
|
|
|
| 101 |
lambda x: f"<span style='color: {'#10B981' if x < 0.5 else '#F59E0B' if x < 1 else '#EF4444'}'>{x:.2f}</span>"
|
| 102 |
)
|
| 103 |
|
| 104 |
+
# Format WER (lower is better)
|
| 105 |
+
formatted_df['wer'] = formatted_df['wer'].apply(
|
| 106 |
+
lambda x: f"<span style='color: {'#10B981' if x < 0.5 else '#F59E0B' if x < 1 else '#EF4444'}'>{x:.2f}</span>"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
# Add hyperlinks for model and paper
|
| 110 |
formatted_df['model'] = formatted_df.apply(
|
| 111 |
lambda row: f"<a href='{row['model_url']}' target='_blank'>{row['model']}</a>", axis=1
|
|
|
|
| 279 |
# Create DataFrame
|
| 280 |
df_orig = pd.DataFrame(data)
|
| 281 |
|
| 282 |
+
# Sort by CER ascending by default (lower is better for CER)
|
| 283 |
+
df_orig = df_orig.sort_values(by="cer", ascending=True)
|
| 284 |
|
| 285 |
with gr.Blocks(css=css) as demo:
|
| 286 |
gr.HTML(f"""
|
|
|
|
| 324 |
)
|
| 325 |
|
| 326 |
sort_by = gr.Dropdown(
|
| 327 |
+
["chrf", "cer", "wer", "downloads"],
|
| 328 |
label="Sort by",
|
| 329 |
+
value="cer",
|
| 330 |
interactive=True
|
| 331 |
)
|
| 332 |
|
| 333 |
sort_order = gr.Radio(
|
| 334 |
["Descending", "Ascending"],
|
| 335 |
label="Sort Order",
|
| 336 |
+
value="Ascending",
|
| 337 |
interactive=True
|
| 338 |
)
|
| 339 |
|
|
|
|
| 351 |
# Sort the dataframe
|
| 352 |
is_ascending = sort_order == "Ascending"
|
| 353 |
|
| 354 |
+
# For CHrF, we want to reverse the default sorting (since higher is better)
|
| 355 |
+
if sort_by == "chrf":
|
| 356 |
is_ascending = not is_ascending
|
| 357 |
|
| 358 |
filtered_df = filtered_df.sort_values(by=sort_by, ascending=is_ascending)
|
|
|
|
| 370 |
<th>Organization</th>
|
| 371 |
<th>Type</th>
|
| 372 |
<th>Task</th>
|
| 373 |
+
<th>CHrF ↑</th>
|
| 374 |
+
<th>CER ↓</th>
|
| 375 |
+
<th>WER ↓</th>
|
| 376 |
<th>Downloads</th>
|
| 377 |
<th>Last Updated</th>
|
| 378 |
<th>Paper</th>
|
|
|
|
| 388 |
<td>{row['organization']}</td>
|
| 389 |
<td>{row['type']}</td>
|
| 390 |
<td>{row['task']}</td>
|
| 391 |
+
<td>{row['chrf']}</td>
|
|
|
|
| 392 |
<td>{row['cer']}</td>
|
| 393 |
+
<td>{row['wer']}</td>
|
| 394 |
<td>{row['downloads']}</td>
|
| 395 |
<td>{row['last_updated']}</td>
|
| 396 |
<td>{row['paper']}</td>
|