feat(config): add enable_thinking flag (default False) + fix HMMT bench gold answers

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
transcrilive
2026-05-10 13:08:41 +02:00
parent 1c4b9e8652
commit 81e8ac88cc
4 changed files with 17 additions and 6 deletions

View File

@@ -21,7 +21,7 @@ _HMMT_2025_SUBSET = [
{
"id": "hmmt-1",
"question": "Find the number of positive integers n <= 100 such that n^2 + n is divisible by 6.",
"answer": "100",
"answer": "66",
},
{
"id": "hmmt-2",
@@ -41,7 +41,7 @@ _HMMT_2025_SUBSET = [
{
"id": "hmmt-5",
"question": "What is the remainder when 2^100 is divided by 125?",
"answer": "76",
"answer": "1",
},
]