Spaces:
Running
Running
| { | |||
| "benchmarks": { | |||
| "sweVerified": { | |||
| "name": "SWE-bench Verified", | |||
| "models": [ | |||
| { | |||
| "model_id": "zai-org/GLM-5", | |||
| "short_name": "GLM-5", | |||
| "provider": "zai-org", | |||
| "score": 77.8, | |||
| "date": "2026-02-11" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |||
| "short_name": "Qwen3.5-397B-A17B", | |||
| "provider": "Qwen", | |||
| "score": 76.4, | |||
| "date": "2026-02-16" | |||
| }, | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2.5", | |||
| "short_name": "MiniMax-M2.5", | |||
| "provider": "MiniMaxAI", | |||
| "score": 75.8, | |||
| "date": "2026-02-12" | |||
| }, | |||
| { | |||
| "model_id": "stepfun-ai/Step-3.5-Flash", | |||
| "short_name": "Step-3.5-Flash", | |||
| "provider": "stepfun-ai", | |||
| "score": 74.4, | |||
| "date": "2026-02-01" | |||
| }, | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2.1", | |||
| "short_name": "MiniMax-M2.1", | |||
| "provider": "MiniMaxAI", | |||
| "score": 74.0, | |||
| "date": "2025-12-20" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-4.7", | |||
| "short_name": "GLM-4.7", | |||
| "provider": "zai-org", | |||
| "score": 73.8, | |||
| "date": "2025-12-22" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-27B", | |||
| "short_name": "Qwen3.5-27B", | |||
| "provider": "Qwen", | |||
| "score": 72.4, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-122B-A10B", | |||
| "short_name": "Qwen3.5-122B-A10B", | |||
| "provider": "Qwen", | |||
| "score": 72.0, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2-Thinking", | |||
| "short_name": "Kimi-K2-Thinking", | |||
| "provider": "moonshotai", | |||
| "score": 71.3, | |||
| "date": "2025-11-04" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2.5", | |||
| "short_name": "Kimi-K2.5", | |||
| "provider": "moonshotai", | |||
| "score": 70.8, | |||
| "date": "2026-01-01" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-Coder-Next", | |||
| "short_name": "Qwen3-Coder-Next", | |||
| "provider": "Qwen", | |||
| "score": 70.6, | |||
| "date": "2026-01-30" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |||
| "short_name": "DeepSeek-V3.2", | |||
| "provider": "deepseek-ai", | |||
| "score": 70.0, | |||
| "date": "2025-12-01" | |||
| }, | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2", | |||
| "short_name": "MiniMax-M2", | |||
| "provider": "MiniMaxAI", | |||
| "score": 69.4, | |||
| "date": "2025-10-22" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |||
| "short_name": "Qwen3.5-35B-A3B", | |||
| "provider": "Qwen", | |||
| "score": 69.2, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "GAIR/OpenSWE-72B", | |||
| "short_name": "OpenSWE-72B", | |||
| "provider": "GAIR", | |||
| "score": 66.0, | |||
| "date": "2026-03-15" | |||
| }, | |||
| { | |||
| "model_id": "openai/gpt-oss-120b", | |||
| "short_name": "gpt-oss-120b", | |||
| "provider": "openai", | |||
| "score": 62.4, | |||
| "date": "2025-08-04" | |||
| }, | |||
| { | |||
| "model_id": "GAIR/OpenSWE-32B", | |||
| "short_name": "OpenSWE-32B", | |||
| "provider": "GAIR", | |||
| "score": 62.4, | |||
| "date": "2026-03-15" | |||
| }, | |||
| { | |||
| "model_id": "openai/gpt-oss-20b", | |||
| "short_name": "gpt-oss-20b", | |||
| "provider": "openai", | |||
| "score": 60.7, | |||
| "date": "2025-08-04" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "provider": "nvidia", | |||
| "score": 60.47, | |||
| "date": "2026-03-10" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-4.7-Flash", | |||
| "short_name": "GLM-4.7-Flash", | |||
| "provider": "zai-org", | |||
| "score": 59.2, | |||
| "date": "2026-01-19" | |||
| }, | |||
| { | |||
| "model_id": "facebook/cwm", | |||
| "short_name": "cwm", | |||
| "provider": "facebook", | |||
| "score": 53.9, | |||
| "date": "2025-08-25" | |||
| }, | |||
| { | |||
| "model_id": "SWE-Lego/SWE-Lego-Qwen3-32B", | |||
| "short_name": "SWE-Lego-Qwen3-32B", | |||
| "provider": "SWE-Lego", | |||
| "score": 52.6, | |||
| "date": "2026-01-05" | |||
| }, | |||
| { | |||
| "model_id": "SWE-Lego/SWE-Lego-Qwen3-8B", | |||
| "short_name": "SWE-Lego-Qwen3-8B", | |||
| "provider": "SWE-Lego", | |||
| "score": 42.2, | |||
| "date": "2025-12-29" | |||
| } | |||
| ] | |||
| }, | |||
| "swePro": { | |||
| "name": "SWE-bench Pro", | |||
| "models": [ | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2.5", | |||
| "short_name": "MiniMax-M2.5", | |||
| "provider": "MiniMaxAI", | |||
| "score": 55.4, | |||
| "date": "2026-02-12" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2.5", | |||
| "short_name": "Kimi-K2.5", | |||
| "provider": "moonshotai", | |||
| "score": 50.7, | |||
| "date": "2026-01-01" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-Coder-Next", | |||
| "short_name": "Qwen3-Coder-Next", | |||
| "provider": "Qwen", | |||
| "score": 44.3, | |||
| "date": "2026-01-30" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", | |||
| "short_name": "Qwen3-Coder-480B-A35B-Instruct", | |||
| "provider": "Qwen", | |||
| "score": 38.7, | |||
| "date": "2025-07-22" | |||
| }, | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2.1", | |||
| "short_name": "MiniMax-M2.1", | |||
| "provider": "MiniMaxAI", | |||
| "score": 36.81, | |||
| "date": "2025-12-20" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2-Instruct", | |||
| "short_name": "Kimi-K2-Instruct", | |||
| "provider": "moonshotai", | |||
| "score": 27.67, | |||
| "date": "2025-07-11" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-235B-A22B", | |||
| "short_name": "Qwen3-235B-A22B", | |||
| "provider": "Qwen", | |||
| "score": 21.41, | |||
| "date": "2025-04-27" | |||
| }, | |||
| { | |||
| "model_id": "openai/gpt-oss-120b", | |||
| "short_name": "gpt-oss-120b", | |||
| "provider": "openai", | |||
| "score": 16.2, | |||
| "date": "2025-08-04" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |||
| "short_name": "DeepSeek-V3.2", | |||
| "provider": "deepseek-ai", | |||
| "score": 15.56, | |||
| "date": "2025-12-01" | |||
| }, | |||
| { | |||
| "model_id": "google/gemma-3-27b-it", | |||
| "short_name": "gemma-3-27b-it", | |||
| "provider": "google", | |||
| "score": 11.38, | |||
| "date": "2025-03-01" | |||
| }, | |||
| { | |||
| "model_id": "meta-llama/Llama-3.1-405B-Instruct", | |||
| "short_name": "Llama-3.1-405B-Instruct", | |||
| "provider": "meta-llama", | |||
| "score": 11.18, | |||
| "date": "2024-07-16" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-4.6", | |||
| "short_name": "GLM-4.6", | |||
| "provider": "zai-org", | |||
| "score": 9.67, | |||
| "date": "2025-09-29" | |||
| }, | |||
| { | |||
| "model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", | |||
| "short_name": "Llama-4-Maverick-17B-128E-Instruct", | |||
| "provider": "meta-llama", | |||
| "score": 5.24, | |||
| "date": "2025-04-01" | |||
| } | |||
| ] | |||
| }, | |||
| "mmluPro": { | |||
| "name": "MMLU-Pro", | |||
| "models": [ | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2.1", | |||
| "short_name": "MiniMax-M2.1", | |||
| "provider": "MiniMaxAI", | |||
| "score": 88.0, | |||
| "date": "2025-12-20" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |||
| "short_name": "Qwen3.5-397B-A17B", | |||
| "provider": "Qwen", | |||
| "score": 87.8, | |||
| "date": "2026-02-16" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2.5", | |||
| "short_name": "Kimi-K2.5", | |||
| "provider": "moonshotai", | |||
| "score": 87.1, | |||
| "date": "2026-01-01" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-122B-A10B", | |||
| "short_name": "Qwen3.5-122B-A10B", | |||
| "provider": "Qwen", | |||
| "score": 86.7, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-27B", | |||
| "short_name": "Qwen3.5-27B", | |||
| "provider": "Qwen", | |||
| "score": 86.1, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |||
| "short_name": "Qwen3.5-35B-A3B", | |||
| "provider": "Qwen", | |||
| "score": 85.3, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-R1-0528", | |||
| "short_name": "DeepSeek-R1-0528", | |||
| "provider": "deepseek-ai", | |||
| "score": 85.0, | |||
| "date": "2025-05-28" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |||
| "short_name": "DeepSeek-V3.2", | |||
| "provider": "deepseek-ai", | |||
| "score": 85.0, | |||
| "date": "2025-12-01" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2-Thinking", | |||
| "short_name": "Kimi-K2-Thinking", | |||
| "provider": "moonshotai", | |||
| "score": 84.6, | |||
| "date": "2025-11-04" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", | |||
| "short_name": "Qwen3-235B-A22B-Thinking-2507", | |||
| "provider": "Qwen", | |||
| "score": 84.4, | |||
| "date": "2025-07-25" | |||
| }, | |||
| { | |||
| "model_id": "stepfun-ai/Step-3.5-Flash", | |||
| "short_name": "Step-3.5-Flash", | |||
| "provider": "stepfun-ai", | |||
| "score": 84.4, | |||
| "date": "2026-02-01" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-4.7", | |||
| "short_name": "GLM-4.7", | |||
| "provider": "zai-org", | |||
| "score": 84.3, | |||
| "date": "2025-12-22" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-R1", | |||
| "short_name": "DeepSeek-R1", | |||
| "provider": "deepseek-ai", | |||
| "score": 84.0, | |||
| "date": "2025-01-20" | |||
| }, | |||
| { | |||
| "model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B", | |||
| "short_name": "K-EXAONE-236B-A23B", | |||
| "provider": "LGAI-EXAONE", | |||
| "score": 83.8, | |||
| "date": "2025-12-26" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "provider": "nvidia", | |||
| "score": 83.73, | |||
| "date": "2026-03-10" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-9B", | |||
| "short_name": "Qwen3.5-9B", | |||
| "provider": "Qwen", | |||
| "score": 82.5, | |||
| "date": "2026-02-27" | |||
| }, | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2", | |||
| "short_name": "MiniMax-M2", | |||
| "provider": "MiniMaxAI", | |||
| "score": 82.0, | |||
| "date": "2025-10-22" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-V3-0324", | |||
| "short_name": "DeepSeek-V3-0324", | |||
| "provider": "deepseek-ai", | |||
| "score": 81.2, | |||
| "date": "2025-03-24" | |||
| }, | |||
| { | |||
| "model_id": "jdopensource/JoyAI-LLM-Flash", | |||
| "short_name": "JoyAI-LLM-Flash", | |||
| "provider": "jdopensource", | |||
| "score": 81.02, | |||
| "date": "2026-02-14" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct", | |||
| "short_name": "Qwen3-Next-80B-A3B-Instruct", | |||
| "provider": "Qwen", | |||
| "score": 80.6, | |||
| "date": "2025-09-09" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/Nemotron-Cascade-2-30B-A3B", | |||
| "short_name": "Nemotron-Cascade-2-30B-A3B", | |||
| "provider": "nvidia", | |||
| "score": 79.8, | |||
| "date": "2026-03-18" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-4B", | |||
| "short_name": "Qwen3.5-4B", | |||
| "provider": "Qwen", | |||
| "score": 79.1, | |||
| "date": "2026-02-27" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", | |||
| "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", | |||
| "provider": "nvidia", | |||
| "score": 78.3, | |||
| "date": "2025-12-04" | |||
| }, | |||
| { | |||
| "model_id": "meituan-longcat/LongCat-Flash-Lite", | |||
| "short_name": "LongCat-Flash-Lite", | |||
| "provider": "meituan-longcat", | |||
| "score": 78.29, | |||
| "date": "2026-01-27" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", | |||
| "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", | |||
| "provider": "nvidia", | |||
| "score": 78.1, | |||
| "date": "2025-12-06" | |||
| }, | |||
| { | |||
| "model_id": "arcee-ai/Trinity-Large-Preview", | |||
| "short_name": "Trinity-Large-Preview", | |||
| "provider": "arcee-ai", | |||
| "score": 75.2, | |||
| "date": "2026-01-27" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-4B-Thinking-2507", | |||
| "short_name": "Qwen3-4B-Thinking-2507", | |||
| "provider": "Qwen", | |||
| "score": 74.0, | |||
| "date": "2025-08-05" | |||
| }, | |||
| { | |||
| "model_id": "tiiuae/Falcon-H1R-7B", | |||
| "short_name": "Falcon-H1R-7B", | |||
| "provider": "tiiuae", | |||
| "score": 72.1, | |||
| "date": "2025-10-29" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-4B-Instruct-2507", | |||
| "short_name": "Qwen3-4B-Instruct-2507", | |||
| "provider": "Qwen", | |||
| "score": 69.6, | |||
| "date": "2025-08-05" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-V3", | |||
| "short_name": "DeepSeek-V3", | |||
| "provider": "deepseek-ai", | |||
| "score": 64.4, | |||
| "date": "2024-12-25" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-2B", | |||
| "short_name": "Qwen3.5-2B", | |||
| "provider": "Qwen", | |||
| "score": 55.3, | |||
| "date": "2026-02-28" | |||
| }, | |||
| { | |||
| "model_id": "meta-llama/Llama-3.1-8B-Instruct", | |||
| "short_name": "Llama-3.1-8B-Instruct", | |||
| "provider": "meta-llama", | |||
| "score": 48.3, | |||
| "date": "2024-07-18" | |||
| }, | |||
| { | |||
| "model_id": "LiquidAI/LFM2.5-1.2B-Instruct", | |||
| "short_name": "LFM2.5-1.2B-Instruct", | |||
| "provider": "LiquidAI", | |||
| "score": 44.35, | |||
| "date": "2026-01-06" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-0.8B", | |||
| "short_name": "Qwen3.5-0.8B", | |||
| "provider": "Qwen", | |||
| "score": 29.7, | |||
| "date": "2026-02-28" | |||
| } | |||
| ] | |||
| }, | |||
| "gpqa": { | |||
| "name": "GPQA Diamond", | |||
| "models": [ | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |||
| "short_name": "Qwen3.5-397B-A17B", | |||
| "provider": "Qwen", | |||
| "score": 88.4, | |||
| "date": "2026-02-16" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2.5", | |||
| "short_name": "Kimi-K2.5", | |||
| "provider": "moonshotai", | |||
| "score": 87.6, | |||
| "date": "2026-01-01" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-122B-A10B", | |||
| "short_name": "Qwen3.5-122B-A10B", | |||
| "provider": "Qwen", | |||
| "score": 86.6, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-5", | |||
| "short_name": "GLM-5", | |||
| "provider": "zai-org", | |||
| "score": 86.0, | |||
| "date": "2026-02-11" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-4.7", | |||
| "short_name": "GLM-4.7", | |||
| "provider": "zai-org", | |||
| "score": 85.7, | |||
| "date": "2025-12-22" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-27B", | |||
| "short_name": "Qwen3.5-27B", | |||
| "provider": "Qwen", | |||
| "score": 85.5, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2.5", | |||
| "short_name": "MiniMax-M2.5", | |||
| "provider": "MiniMaxAI", | |||
| "score": 85.2, | |||
| "date": "2026-02-12" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2-Thinking", | |||
| "short_name": "Kimi-K2-Thinking", | |||
| "provider": "moonshotai", | |||
| "score": 84.5, | |||
| "date": "2025-11-04" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |||
| "short_name": "Qwen3.5-35B-A3B", | |||
| "provider": "Qwen", | |||
| "score": 84.2, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "Nanbeige/Nanbeige4.1-3B", | |||
| "short_name": "Nanbeige4.1-3B", | |||
| "provider": "Nanbeige", | |||
| "score": 83.8, | |||
| "date": "2026-02-10" | |||
| }, | |||
| { | |||
| "model_id": "stepfun-ai/Step-3.5-Flash", | |||
| "short_name": "Step-3.5-Flash", | |||
| "provider": "stepfun-ai", | |||
| "score": 83.5, | |||
| "date": "2026-02-01" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "provider": "nvidia", | |||
| "score": 82.7, | |||
| "date": "2026-03-10" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |||
| "short_name": "DeepSeek-V3.2", | |||
| "provider": "deepseek-ai", | |||
| "score": 82.4, | |||
| "date": "2025-12-01" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-9B", | |||
| "short_name": "Qwen3.5-9B", | |||
| "provider": "Qwen", | |||
| "score": 81.7, | |||
| "date": "2026-02-27" | |||
| }, | |||
| { | |||
| "model_id": "openai/gpt-oss-120b", | |||
| "short_name": "gpt-oss-120b", | |||
| "provider": "openai", | |||
| "score": 80.9, | |||
| "date": "2025-08-04" | |||
| }, | |||
| { | |||
| "model_id": "meituan-longcat/LongCat-Flash-Thinking-2601", | |||
| "short_name": "LongCat-Flash-Thinking-2601", | |||
| "provider": "meituan-longcat", | |||
| "score": 80.5, | |||
| "date": "2026-01-14" | |||
| }, | |||
| { | |||
| "model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B", | |||
| "short_name": "K-EXAONE-236B-A23B", | |||
| "provider": "LGAI-EXAONE", | |||
| "score": 79.1, | |||
| "date": "2025-12-26" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-4B", | |||
| "short_name": "Qwen3.5-4B", | |||
| "provider": "Qwen", | |||
| "score": 76.2, | |||
| "date": "2026-02-27" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/Nemotron-Cascade-2-30B-A3B", | |||
| "short_name": "Nemotron-Cascade-2-30B-A3B", | |||
| "provider": "nvidia", | |||
| "score": 76.1, | |||
| "date": "2026-03-18" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-4.7-Flash", | |||
| "short_name": "GLM-4.7-Flash", | |||
| "provider": "zai-org", | |||
| "score": 75.2, | |||
| "date": "2026-01-19" | |||
| }, | |||
| { | |||
| "model_id": "jdopensource/JoyAI-LLM-Flash", | |||
| "short_name": "JoyAI-LLM-Flash", | |||
| "provider": "jdopensource", | |||
| "score": 74.43, | |||
| "date": "2026-02-14" | |||
| }, | |||
| { | |||
| "model_id": "openai/gpt-oss-20b", | |||
| "short_name": "gpt-oss-20b", | |||
| "provider": "openai", | |||
| "score": 74.2, | |||
| "date": "2025-08-04" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-R1", | |||
| "short_name": "DeepSeek-R1", | |||
| "provider": "deepseek-ai", | |||
| "score": 71.5, | |||
| "date": "2025-01-20" | |||
| }, | |||
| { | |||
| "model_id": "mistralai/Mistral-Small-4-119B-2603", | |||
| "short_name": "Mistral-Small-4-119B-2603", | |||
| "provider": "mistralai", | |||
| "score": 71.2, | |||
| "date": "2026-01-23" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-4B-Thinking-2507", | |||
| "short_name": "Qwen3-4B-Thinking-2507", | |||
| "provider": "Qwen", | |||
| "score": 65.8, | |||
| "date": "2025-08-05" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-4B-Instruct-2507", | |||
| "short_name": "Qwen3-4B-Instruct-2507", | |||
| "provider": "Qwen", | |||
| "score": 62.0, | |||
| "date": "2025-08-05" | |||
| }, | |||
| { | |||
| "model_id": "LiquidAI/LFM2.5-1.2B-Instruct", | |||
| "short_name": "LFM2.5-1.2B-Instruct", | |||
| "provider": "LiquidAI", | |||
| "score": 38.89, | |||
| "date": "2026-01-06" | |||
| }, | |||
| { | |||
| "model_id": "meta-llama/Llama-3.1-8B-Instruct", | |||
| "short_name": "Llama-3.1-8B-Instruct", | |||
| "provider": "meta-llama", | |||
| "score": 30.4, | |||
| "date": "2024-07-18" | |||
| }, | |||
| { | |||
| "model_id": "TeichAI/Qwen3.5-4B-Claude-Opus-Reasoning", | |||
| "short_name": "Qwen3.5-4B-Claude-Opus-Reasoning", | |||
| "provider": "TeichAI", | |||
| "score": 28.28, | |||
| "date": "2026-03-24" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-0.8B", | |||
| "short_name": "Qwen3.5-0.8B", | |||
| "provider": "Qwen", | |||
| "score": 11.9, | |||
| "date": "2026-02-28" | |||
| } | |||
| ] | |||
| }, | |||
| "hle": { | |||
| "name": "HLE", | |||
| "models": [ | |||
| { | |||
| "model_id": "zai-org/GLM-5", | |||
| "short_name": "GLM-5", | |||
| "provider": "zai-org", | |||
| "score": 50.4, | |||
| "date": "2026-02-11" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2.5", | |||
| "short_name": "Kimi-K2.5", | |||
| "provider": "moonshotai", | |||
| "score": 50.2, | |||
| "date": "2026-01-01" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-27B", | |||
| "short_name": "Qwen3.5-27B", | |||
| "provider": "Qwen", | |||
| "score": 48.5, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |||
| "short_name": "Qwen3.5-397B-A17B", | |||
| "provider": "Qwen", | |||
| "score": 48.3, | |||
| "date": "2026-02-16" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-122B-A10B", | |||
| "short_name": "Qwen3.5-122B-A10B", | |||
| "provider": "Qwen", | |||
| "score": 47.5, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2-Thinking", | |||
| "short_name": "Kimi-K2-Thinking", | |||
| "provider": "moonshotai", | |||
| "score": 44.9, | |||
| "date": "2025-11-04" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-4.7", | |||
| "short_name": "GLM-4.7", | |||
| "provider": "zai-org", | |||
| "score": 42.8, | |||
| "date": "2025-12-22" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |||
| "short_name": "DeepSeek-V3.2", | |||
| "provider": "deepseek-ai", | |||
| "score": 40.8, | |||
| "date": "2025-12-01" | |||
| }, | |||
| { | |||
| "model_id": "miromind-ai/MiroThinker-v1.5-235B", | |||
| "short_name": "MiroThinker-v1.5-235B", | |||
| "provider": "miromind-ai", | |||
| "score": 39.2, | |||
| "date": "2026-01-04" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/Nemotron-Orchestrator-8B", | |||
| "short_name": "Nemotron-Orchestrator-8B", | |||
| "provider": "nvidia", | |||
| "score": 37.1, | |||
| "date": "2025-11-25" | |||
| }, | |||
| { | |||
| "model_id": "miromind-ai/MiroThinker-v1.5-30B", | |||
| "short_name": "MiroThinker-v1.5-30B", | |||
| "provider": "miromind-ai", | |||
| "score": 31.0, | |||
| "date": "2026-01-04" | |||
| }, | |||
| { | |||
| "model_id": "meituan-longcat/LongCat-Flash-Thinking-2601", | |||
| "short_name": "LongCat-Flash-Thinking-2601", | |||
| "provider": "meituan-longcat", | |||
| "score": 25.2, | |||
| "date": "2026-01-14" | |||
| }, | |||
| { | |||
| "model_id": "stepfun-ai/Step-3.5-Flash", | |||
| "short_name": "Step-3.5-Flash", | |||
| "provider": "stepfun-ai", | |||
| "score": 23.1, | |||
| "date": "2026-02-01" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "provider": "nvidia", | |||
| "score": 22.82, | |||
| "date": "2026-03-10" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |||
| "short_name": "Qwen3.5-35B-A3B", | |||
| "provider": "Qwen", | |||
| "score": 22.4, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "Nanbeige/Nanbeige4.1-3B", | |||
| "short_name": "Nanbeige4.1-3B", | |||
| "provider": "Nanbeige", | |||
| "score": 22.29, | |||
| "date": "2026-02-10" | |||
| }, | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2.1", | |||
| "short_name": "MiniMax-M2.1", | |||
| "provider": "MiniMaxAI", | |||
| "score": 22.2, | |||
| "date": "2025-12-20" | |||
| }, | |||
| { | |||
| "model_id": "XiaomiMiMo/MiMo-V2-Flash", | |||
| "short_name": "MiMo-V2-Flash", | |||
| "provider": "XiaomiMiMo", | |||
| "score": 22.1, | |||
| "date": "2025-12-16" | |||
| }, | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2.5", | |||
| "short_name": "MiniMax-M2.5", | |||
| "provider": "MiniMaxAI", | |||
| "score": 19.4, | |||
| "date": "2026-02-12" | |||
| }, | |||
| { | |||
| "model_id": "openbmb/AgentCPM-Explore", | |||
| "short_name": "AgentCPM-Explore", | |||
| "provider": "openbmb", | |||
| "score": 19.1, | |||
| "date": "2026-01-11" | |||
| }, | |||
| { | |||
| "model_id": "openai/gpt-oss-120b", | |||
| "short_name": "gpt-oss-120b", | |||
| "provider": "openai", | |||
| "score": 19.0, | |||
| "date": "2025-08-04" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/Nemotron-Cascade-2-30B-A3B", | |||
| "short_name": "Nemotron-Cascade-2-30B-A3B", | |||
| "provider": "nvidia", | |||
| "score": 17.7, | |||
| "date": "2026-03-18" | |||
| }, | |||
| { | |||
| "model_id": "openai/gpt-oss-20b", | |||
| "short_name": "gpt-oss-20b", | |||
| "provider": "openai", | |||
| "score": 17.3, | |||
| "date": "2025-08-04" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", | |||
| "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", | |||
| "provider": "nvidia", | |||
| "score": 15.5, | |||
| "date": "2025-12-04" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", | |||
| "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", | |||
| "provider": "nvidia", | |||
| "score": 15.5, | |||
| "date": "2025-12-06" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-4.7-Flash", | |||
| "short_name": "GLM-4.7-Flash", | |||
| "provider": "zai-org", | |||
| "score": 14.4, | |||
| "date": "2026-01-19" | |||
| }, | |||
| { | |||
| "model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B", | |||
| "short_name": "K-EXAONE-236B-A23B", | |||
| "provider": "LGAI-EXAONE", | |||
| "score": 13.6, | |||
| "date": "2025-12-26" | |||
| }, | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2", | |||
| "short_name": "MiniMax-M2", | |||
| "provider": "MiniMaxAI", | |||
| "score": 12.5, | |||
| "date": "2025-10-22" | |||
| }, | |||
| { | |||
| "model_id": "tiiuae/Falcon-H1R-7B", | |||
| "short_name": "Falcon-H1R-7B", | |||
| "provider": "tiiuae", | |||
| "score": 11.1, | |||
| "date": "2025-10-29" | |||
| }, | |||
| { | |||
| "model_id": "HelpingAI/Dhanishtha-2.0-0126", | |||
| "short_name": "Dhanishtha-2.0-0126", | |||
| "provider": "HelpingAI", | |||
| "score": 9.92, | |||
| "date": "2026-01-01" | |||
| } | |||
| ] | |||
| }, | |||
| "aime2026": { | |||
| "name": "AIME 2026", | |||
| "models": [ | |||
| { | |||
| "model_id": "stepfun-ai/Step-3.5-Flash", | |||
| "short_name": "Step-3.5-Flash", | |||
| "provider": "stepfun-ai", | |||
| "score": 96.67, | |||
| "date": "2026-02-01" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2.5", | |||
| "short_name": "Kimi-K2.5", | |||
| "provider": "moonshotai", | |||
| "score": 95.83, | |||
| "date": "2026-01-01" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-5", | |||
| "short_name": "GLM-5", | |||
| "provider": "zai-org", | |||
| "score": 95.83, | |||
| "date": "2026-02-11" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |||
| "short_name": "DeepSeek-V3.2", | |||
| "provider": "deepseek-ai", | |||
| "score": 94.17, | |||
| "date": "2025-12-01" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |||
| "short_name": "Qwen3.5-397B-A17B", | |||
| "provider": "Qwen", | |||
| "score": 93.33, | |||
| "date": "2026-02-16" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |||
| "short_name": "Qwen3.5-35B-A3B", | |||
| "provider": "Qwen", | |||
| "score": 93.33, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-9B", | |||
| "short_name": "Qwen3.5-9B", | |||
| "provider": "Qwen", | |||
| "score": 92.5, | |||
| "date": "2026-02-27" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-27B", | |||
| "short_name": "Qwen3.5-27B", | |||
| "provider": "Qwen", | |||
| "score": 90.83, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "provider": "nvidia", | |||
| "score": 90.0, | |||
| "date": "2026-03-10" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", | |||
| "short_name": "Qwen3-30B-A3B-Thinking-2507", | |||
| "provider": "Qwen", | |||
| "score": 87.5, | |||
| "date": "2025-07-29" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-4B-Thinking-2507", | |||
| "short_name": "Qwen3-4B-Thinking-2507", | |||
| "provider": "Qwen", | |||
| "score": 82.5, | |||
| "date": "2025-08-05" | |||
| }, | |||
| { | |||
| "model_id": "lm-provers/QED-Nano", | |||
| "short_name": "QED-Nano", | |||
| "provider": "lm-provers", | |||
| "score": 82.5, | |||
| "date": "2026-02-12" | |||
| } | |||
| ] | |||
| }, | |||
| "hmmt2026": { | |||
| "name": "HMMT Feb 2026", | |||
| "models": [ | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |||
| "short_name": "Qwen3.5-397B-A17B", | |||
| "provider": "Qwen", | |||
| "score": 87.88, | |||
| "date": "2026-02-16" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2.5", | |||
| "short_name": "Kimi-K2.5", | |||
| "provider": "moonshotai", | |||
| "score": 87.12, | |||
| "date": "2026-01-01" | |||
| }, | |||
| { | |||
| "model_id": "stepfun-ai/Step-3.5-Flash", | |||
| "short_name": "Step-3.5-Flash", | |||
| "provider": "stepfun-ai", | |||
| "score": 86.36, | |||
| "date": "2026-02-01" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-5", | |||
| "short_name": "GLM-5", | |||
| "provider": "zai-org", | |||
| "score": 86.36, | |||
| "date": "2026-02-11" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "provider": "nvidia", | |||
| "score": 84.85, | |||
| "date": "2026-03-10" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |||
| "short_name": "DeepSeek-V3.2", | |||
| "provider": "deepseek-ai", | |||
| "score": 84.09, | |||
| "date": "2025-12-01" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |||
| "short_name": "Qwen3.5-35B-A3B", | |||
| "provider": "Qwen", | |||
| "score": 81.82, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-27B", | |||
| "short_name": "Qwen3.5-27B", | |||
| "provider": "Qwen", | |||
| "score": 81.06, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", | |||
| "short_name": "Qwen3-30B-A3B-Thinking-2507", | |||
| "provider": "Qwen", | |||
| "score": 78.79, | |||
| "date": "2025-07-29" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-9B", | |||
| "short_name": "Qwen3.5-9B", | |||
| "provider": "Qwen", | |||
| "score": 71.21, | |||
| "date": "2026-02-27" | |||
| }, | |||
| { | |||
| "model_id": "lm-provers/QED-Nano", | |||
| "short_name": "QED-Nano", | |||
| "provider": "lm-provers", | |||
| "score": 62.88, | |||
| "date": "2026-02-12" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-4B-Thinking-2507", | |||
| "short_name": "Qwen3-4B-Thinking-2507", | |||
| "provider": "Qwen", | |||
| "score": 53.03, | |||
| "date": "2025-08-05" | |||
| } | |||
| ] | |||
| }, | |||
| "olmOcr": { | |||
| "name": "olmOCR-bench", | |||
| "models": [ | |||
| { | |||
| "model_id": "datalab-to/chandra-ocr-2", | |||
| "short_name": "chandra-ocr-2", | |||
| "provider": "datalab-to", | |||
| "score": 85.9, | |||
| "date": "2026-03-16" | |||
| }, | |||
| { | |||
| "model_id": "rednote-hilab/dots.mocr", | |||
| "short_name": "dots.mocr", | |||
| "provider": "rednote-hilab", | |||
| "score": 83.9, | |||
| "date": "2026-03-19" | |||
| }, | |||
| { | |||
| "model_id": "lightonai/LightOnOCR-2-1B", | |||
| "short_name": "LightOnOCR-2-1B", | |||
| "provider": "lightonai", | |||
| "score": 83.2, | |||
| "date": "2026-01-16" | |||
| }, | |||
| { | |||
| "model_id": "datalab-to/chandra", | |||
| "short_name": "chandra", | |||
| "provider": "datalab-to", | |||
| "score": 83.1, | |||
| "date": "2025-10-21" | |||
| }, | |||
| { | |||
| "model_id": "infly/Infinity-Parser-7B", | |||
| "short_name": "Infinity-Parser-7B", | |||
| "provider": "infly", | |||
| "score": 82.5, | |||
| "date": "2025-10-17" | |||
| }, | |||
| { | |||
| "model_id": "allenai/olmOCR-2-7B-1025-FP8", | |||
| "short_name": "olmOCR-2-7B-1025-FP8", | |||
| "provider": "allenai", | |||
| "score": 82.4, | |||
| "date": "2025-10-06" | |||
| }, | |||
| { | |||
| "model_id": "PaddlePaddle/PaddleOCR-VL", | |||
| "short_name": "PaddleOCR-VL", | |||
| "provider": "PaddlePaddle", | |||
| "score": 80.0, | |||
| "date": "2025-10-16" | |||
| }, | |||
| { | |||
| "model_id": "baidu/Qianfan-OCR", | |||
| "short_name": "Qianfan-OCR", | |||
| "provider": "baidu", | |||
| "score": 79.8, | |||
| "date": "2026-03-18" | |||
| }, | |||
| { | |||
| "model_id": "rednote-hilab/dots.ocr", | |||
| "short_name": "dots.ocr", | |||
| "provider": "rednote-hilab", | |||
| "score": 79.1, | |||
| "date": "2025-07-30" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-OCR-2", | |||
| "short_name": "DeepSeek-OCR-2", | |||
| "provider": "deepseek-ai", | |||
| "score": 76.3, | |||
| "date": "2026-01-27" | |||
| }, | |||
| { | |||
| "model_id": "lightonai/LightOnOCR-1B-1025", | |||
| "short_name": "LightOnOCR-1B-1025", | |||
| "provider": "lightonai", | |||
| "score": 76.1, | |||
| "date": "2025-10-20" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-OCR", | |||
| "short_name": "DeepSeek-OCR", | |||
| "provider": "deepseek-ai", | |||
| "score": 75.7, | |||
| "date": "2025-10-17" | |||
| }, | |||
| { | |||
| "model_id": "opendatalab/MinerU2.5-2509-1.2B", | |||
| "short_name": "MinerU2.5-2509-1.2B", | |||
| "provider": "opendatalab", | |||
| "score": 75.2, | |||
| "date": "2025-09-17" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-OCR", | |||
| "short_name": "GLM-OCR", | |||
| "provider": "zai-org", | |||
| "score": 75.2, | |||
| "date": "2026-01-30" | |||
| }, | |||
| { | |||
| "model_id": "FireRedTeam/FireRed-OCR", | |||
| "short_name": "FireRed-OCR", | |||
| "provider": "FireRedTeam", | |||
| "score": 70.2, | |||
| "date": "2026-02-28" | |||
| }, | |||
| { | |||
| "model_id": "nanonets/Nanonets-OCR2-3B", | |||
| "short_name": "Nanonets-OCR2-3B", | |||
| "provider": "nanonets", | |||
| "score": 69.5, | |||
| "date": "2025-10-13" | |||
| } | |||
| ] | |||
| }, | |||
| "terminalBench": { | |||
| "name": "Terminal-Bench 2.0", | |||
| "models": [ | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |||
| "short_name": "Qwen3.5-397B-A17B", | |||
| "provider": "Qwen", | |||
| "score": 52.5, | |||
| "date": "2026-02-16" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-5", | |||
| "short_name": "GLM-5", | |||
| "provider": "zai-org", | |||
| "score": 52.4, | |||
| "date": "2026-02-11" | |||
| }, | |||
| { | |||
| "model_id": "stepfun-ai/Step-3.5-Flash", | |||
| "short_name": "Step-3.5-Flash", | |||
| "provider": "stepfun-ai", | |||
| "score": 51.0, | |||
| "date": "2026-02-01" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-122B-A10B", | |||
| "short_name": "Qwen3.5-122B-A10B", | |||
| "provider": "Qwen", | |||
| "score": 49.4, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2.5", | |||
| "short_name": "Kimi-K2.5", | |||
| "provider": "moonshotai", | |||
| "score": 43.2, | |||
| "date": "2026-01-01" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-27B", | |||
| "short_name": "Qwen3.5-27B", | |||
| "provider": "Qwen", | |||
| "score": 41.6, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |||
| "short_name": "Qwen3.5-35B-A3B", | |||
| "provider": "Qwen", | |||
| "score": 40.5, | |||
| "date": "2026-02-24" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |||
| "short_name": "DeepSeek-V3.2", | |||
| "provider": "deepseek-ai", | |||
| "score": 39.6, | |||
| "date": "2025-12-01" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-Coder-Next", | |||
| "short_name": "Qwen3-Coder-Next", | |||
| "provider": "Qwen", | |||
| "score": 36.2, | |||
| "date": "2026-01-30" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2-Thinking", | |||
| "short_name": "Kimi-K2-Thinking", | |||
| "provider": "moonshotai", | |||
| "score": 35.7, | |||
| "date": "2025-11-04" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-4.7", | |||
| "short_name": "GLM-4.7", | |||
| "provider": "zai-org", | |||
| "score": 33.4, | |||
| "date": "2025-12-22" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |||
| "provider": "nvidia", | |||
| "score": 31.0, | |||
| "date": "2026-03-10" | |||
| }, | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2", | |||
| "short_name": "MiniMax-M2", | |||
| "provider": "MiniMaxAI", | |||
| "score": 30.0, | |||
| "date": "2025-10-22" | |||
| }, | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2.1", | |||
| "short_name": "MiniMax-M2.1", | |||
| "provider": "MiniMaxAI", | |||
| "score": 29.2, | |||
| "date": "2025-12-20" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2-Instruct", | |||
| "short_name": "Kimi-K2-Instruct", | |||
| "provider": "moonshotai", | |||
| "score": 27.8, | |||
| "date": "2025-07-11" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/Nemotron-Terminal-32B", | |||
| "short_name": "Nemotron-Terminal-32B", | |||
| "provider": "nvidia", | |||
| "score": 27.4, | |||
| "date": "2026-02-17" | |||
| }, | |||
| { | |||
| "model_id": "zai-org/GLM-4.6", | |||
| "short_name": "GLM-4.6", | |||
| "provider": "zai-org", | |||
| "score": 24.5, | |||
| "date": "2025-09-29" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", | |||
| "short_name": "Qwen3-Coder-480B-A35B-Instruct", | |||
| "provider": "Qwen", | |||
| "score": 23.9, | |||
| "date": "2025-07-22" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/Nemotron-Terminal-14B", | |||
| "short_name": "Nemotron-Terminal-14B", | |||
| "provider": "nvidia", | |||
| "score": 20.2, | |||
| "date": "2026-02-17" | |||
| }, | |||
| { | |||
| "model_id": "nvidia/Nemotron-Terminal-8B", | |||
| "short_name": "Nemotron-Terminal-8B", | |||
| "provider": "nvidia", | |||
| "score": 13.0, | |||
| "date": "2026-02-17" | |||
| } | |||
| ] | |||
| }, | |||
| "evasionBench": { | |||
| "name": "EvasionBench", | |||
| "models": [ | |||
| { | |||
| "model_id": "zai-org/GLM-4.7", | |||
| "short_name": "GLM-4.7", | |||
| "provider": "zai-org", | |||
| "score": 82.91, | |||
| "date": "2025-12-22" | |||
| }, | |||
| { | |||
| "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", | |||
| "short_name": "Qwen3-Coder-480B-A35B-Instruct", | |||
| "provider": "Qwen", | |||
| "score": 78.16, | |||
| "date": "2025-07-22" | |||
| }, | |||
| { | |||
| "model_id": "MiniMaxAI/MiniMax-M2.1", | |||
| "short_name": "MiniMax-M2.1", | |||
| "provider": "MiniMaxAI", | |||
| "score": 71.31, | |||
| "date": "2025-12-20" | |||
| }, | |||
| { | |||
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |||
| "short_name": "DeepSeek-V3.2", | |||
| "provider": "deepseek-ai", | |||
| "score": 66.88, | |||
| "date": "2025-12-01" | |||
| }, | |||
| { | |||
| "model_id": "moonshotai/Kimi-K2-Instruct-0905", | |||
| "short_name": "Kimi-K2-Instruct-0905", | |||
| "provider": "moonshotai", | |||
| "score": 66.68, | |||
| "date": "2025-09-03" | |||
| } | |||
| ] | |||
| } | |||
| }, | |||
| "logos": { | |||
| "moonshotai": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F641c1e77c3983aa9490f8121%2FX1yT2rsaIbR9cdYGEVu0X.jpeg%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "baidu": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F64f187a2cc1c03340ac30498%2FTYYUxK8xD1AxExFMWqbZD.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "meituan-longcat": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F68a2a29ab9d4c5698e02c747%2FCDCAx7X7rXDt7xjI-DoxG.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "arcee-ai": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F6435718aaaef013d1aec3b8b%2FGZPnGkfMn8Ino6JbkL4fJ.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "openbmb": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F1670387859384-633fe7784b362488336bbfad.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "infly": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F63ed9862679c2cc40abb55d2%2F0n6g0jngiKkRjaEoAvPmM.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "LiquidAI": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F61b8e2ba285851687028d395%2FEsTgVtnM2IqVRKgPdfqcB.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "lightonai": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F1651597775471-62715572ab9243b5d40cbb1d.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "opendatalab": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F639c3afa7432f2f5d16b7296%2FyqxxBknyeqkGnYsjoaR4M.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "miromind-ai": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F682c41fb2f8a52030ec93ce0%2FCna52_IapEXuNBsyI3lvR.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "tiiuae": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F61a8d1aac664736898ffc84f%2FAT6cAB5ZNwCcqFMal71WD.jpeg%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "GAIR": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F6144a0c4ff1146bbd84d9865%2FNqAuVddq2ci-AsFcFNbav.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "meta-llama": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F646cf8084eefb026fb8fd8bc%2FoCTqufkdTkjyGodsx1vo1.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "FireRedTeam": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F66ec07ef12bd743cfe91004e%2FPK3bgl6aF2RzW1QFKkq8R.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "nvidia": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F65df9200dc3292a8983e5017%2FVs5FPVCH-VZBipV3qKTuy.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "HelpingAI": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F6612aedf09f16e7347dfa7e1%2FjHRLPBTlyykFwrd6-Mak_.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "facebook": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F1592839207516-noauth.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "openai": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F68783facef79a05727260de3%2FUPX5RQxiPGA-ZbBmArIKq.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "lm-provers": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F5f0c746619cb630495b814fd%2FTd4sH4W-LIdR89AqHCuw3.jpeg%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "Nanbeige": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F646f0d118ff94af23bc44aab%2FGXHCollpMRgvYqUXQ2BQ7.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "LGAI-EXAONE": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F66a899a72f11aaf66001a8dc%2FUfdrP3GMo9pNT62BaMnhw.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "rednote-hilab": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F6807a1d6504547b3554b9c73%2FWgnnQDsz7FqnyTtv8mmRO.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "datalab-to": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F67ab6afe315e622f597bf9e8%2FYOgg0gVYVXZC1PDIHFTWK.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "SWE-Lego": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F60fc2fcca6bdebbe52dfdaf4%2FAeuYwUH-CQCt893qnmAGa.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "PaddlePaddle": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F1654942635336-5f3ff69679c1ba4c353d0c5a.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "zai-org": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F62dc173789b4cf157d36ebee%2Fi_pxzM2ZDo3Ub-BEgIkE9.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "TeichAI": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F6837935ac3b7ffe0d2559ce9%2F-AxyvV4wfUY8uo87kNKkK.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "XiaomiMiMo": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F680cb7d1233834890a64acee%2F5w_4aLfF-7MAyaIPOV498.jpeg%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "nanonets": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F641fc216a390e539522d511f%2FXtxh40e8zSzkuKtCr58DH.jpeg%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "google": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F5dd96eb166059660ed1ee413%2FWtA3YYitedOr9n02eHfJe.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "stepfun-ai": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F66935cee39002fc0569c2943%2FQv8QPbkgoKE3wR4jTzHiy.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "deepseek-ai": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F6538815d1bdb3c40db94fbfa%2FxMBly9PUMphrFVMxLX4kq.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "allenai": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F652db071b62cf1f8463221e2%2FCxxwFiaomTa1MCX_B7-pT.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "jdopensource": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F68c0e2ab44ea28a974e3074b%2Fg-4gTubd16qUtwmGZ0n4h.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "MiniMaxAI": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F676e38ad04af5bec20bc9faf%2FdUd-LsZEX0H_d4qefO_g6.jpeg%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "mistralai": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F634c17653d11eaedd88b314d%2F9OgyfKstSZtbmsmuG8MbU.png%26quot%3B%3C%2Fspan%3E%3Cspan class="hljs-punctuation">, | |||
| "Qwen": "/static-proxy?url=https%3A%2F%2Fcdn-avatars.huggingface.co%2Fv1%2Fproduction%2Fuploads%2F620760a26e3b7210c2ff1943%2F-s1gyJfvbE1RgO5iBeNOi.png%26quot%3B%3C%2Fspan%3E%3C!----%3E%3C%2Ftd%3E%3C%2Ftr%3E%3Ctr id="L1457"> | }, | ||
| "colors": { | |||
| "FireRedTeam": "#6366f1", | |||
| "GAIR": "#0d9488", | |||
| "HelpingAI": "#d97706", | |||
| "LGAI-EXAONE": "#e11d48", | |||
| "LiquidAI": "#7c3aed", | |||
| "MiniMaxAI": "#16a34a", | |||
| "Nanbeige": "#2563eb", | |||
| "PaddlePaddle": "#ea580c", | |||
| "Qwen": "#8b5cf6", | |||
| "SWE-Lego": "#0891b2", | |||
| "TeichAI": "#c026d3", | |||
| "XiaomiMiMo": "#65a30d", | |||
| "allenai": "#dc2626", | |||
| "arcee-ai": "#0284c7", | |||
| "baidu": "#a21caf", | |||
| "datalab-to": "#059669", | |||
| "deepseek-ai": "#9333ea", | |||
| "facebook": "#ca8a04", | |||
| "google": "#be185d", | |||
| "infly": "#0369a1", | |||
| "jdopensource": "#6366f1", | |||
| "lightonai": "#0d9488", | |||
| "lm-provers": "#d97706", | |||
| "meituan-longcat": "#e11d48", | |||
| "meta-llama": "#7c3aed", | |||
| "miromind-ai": "#16a34a", | |||
| "mistralai": "#2563eb", | |||
| "moonshotai": "#ea580c", | |||
| "nanonets": "#8b5cf6", | |||
| "nvidia": "#0891b2", | |||
| "openai": "#c026d3", | |||
| "openbmb": "#65a30d", | |||
| "opendatalab": "#dc2626", | |||
| "rednote-hilab": "#0284c7", | |||
| "stepfun-ai": "#a21caf", | |||
| "tiiuae": "#059669", | |||
| "zai-org": "#9333ea" | |||
| }, | |||
| "generated_at": "2026-03-26T08:00:29.372201+00:00" | |||
| } |