{"kind":"bench_version","version":{"version":"bench-2026.07","label":"CrawlDex Bench 2026.07","retired":false,"suiteTaskCount":120,"holdoutTaskCount":12,"publishedAt":"2026-06-10T00:00:00.000Z"},"indexable":true,"source":"fixture","canonical_path":"/bench/bench-2026.07","markdown_path":"/bench/bench-2026.07.md","latest_verified_at":"2026-06-12T02:15:00.000Z","leaderboard":[{"id":"fixture-codex-browser-bench-2026-07","version":"bench-2026.07","stack":"codex-browser","stackLabel":"Codex Browser","status":"verified","submittedAt":"2026-06-11T17:30:00.000Z","verifiedAt":"2026-06-12T02:15:00.000Z","overallScore":84,"categoryScores":[{"category":"subscriptions","title":"Subscriptions","score":88,"evaluable":30,"success":19,"handoff":7,"partial":3,"blocked":1},{"category":"commerce","title":"Commerce","score":82,"evaluable":28,"success":16,"handoff":7,"partial":3,"blocked":2},{"category":"travel","title":"Travel","score":79,"evaluable":22,"success":11,"handoff":6,"partial":3,"blocked":2},{"category":"finance","title":"Finance","score":86,"evaluable":20,"success":12,"handoff":5,"partial":2,"blocked":1},{"category":"dev_saas_api","title":"Developer SaaS/API","score":85,"evaluable":12,"success":9,"handoff":2,"partial":1,"blocked":0}],"taskFamilies":[{"task":"subscriptions.cancel","taskTitle":"Cancel a subscription","category":"subscriptions","categoryTitle":"Subscriptions","score":88,"evaluable":24,"success":15,"handoff":6,"partial":2,"blocked":1},{"task":"commerce.return_order","taskTitle":"Return an order","category":"commerce","categoryTitle":"Commerce","score":82,"evaluable":18,"success":10,"handoff":5,"partial":2,"blocked":1},{"task":"travel.cancel_booking","taskTitle":"Cancel a booking","category":"travel","categoryTitle":"Travel","score":78,"evaluable":16,"success":8,"handoff":5,"partial":2,"blocked":1},{"task":"finance.dispute_charge","taskTitle":"Dispute a charge","category":"finance","categoryTitle":"Finance","score":86,"evaluable":20,"success":12,"handoff":5,"partial":2,"blocked":1},{"task":"dev_saas_api.find_openapi_spec","taskTitle":"Find an OpenAPI spec","category":"dev_saas_api","categoryTitle":"Developer SaaS/API","score":85,"evaluable":12,"success":9,"handoff":2,"partial":1,"blocked":0}],"traceSummary":{"totalTasks":120,"evaluableTasks":112,"reachedOfficialSurface":106,"correctEvidence":98,"classificationMatches":101,"blockedByCaptcha":4,"heldOutTasks":12,"publicTraceCount":108}},{"id":"fixture-playwright-reference-bench-2026-07","version":"bench-2026.07","stack":"playwright-reference","stackLabel":"Playwright Reference","status":"verified","submittedAt":"2026-06-11T12:10:00.000Z","verifiedAt":"2026-06-11T20:45:00.000Z","overallScore":72,"categoryScores":[{"category":"subscriptions","title":"Subscriptions","score":76,"evaluable":30,"success":14,"handoff":7,"partial":6,"blocked":3},{"category":"commerce","title":"Commerce","score":70,"evaluable":28,"success":12,"handoff":7,"partial":5,"blocked":4},{"category":"travel","title":"Travel","score":68,"evaluable":22,"success":8,"handoff":6,"partial":4,"blocked":4},{"category":"finance","title":"Finance","score":73,"evaluable":20,"success":9,"handoff":5,"partial":4,"blocked":2},{"category":"dev_saas_api","title":"Developer SaaS/API","score":78,"evaluable":12,"success":8,"handoff":2,"partial":1,"blocked":1}],"taskFamilies":[{"task":"subscriptions.cancel","taskTitle":"Cancel a subscription","category":"subscriptions","categoryTitle":"Subscriptions","score":76,"evaluable":20,"success":11,"handoff":5,"partial":3,"blocked":1},{"task":"commerce.return_order","taskTitle":"Return an order","category":"commerce","categoryTitle":"Commerce","score":70,"evaluable":18,"success":9,"handoff":4,"partial":3,"blocked":2},{"task":"travel.cancel_booking","taskTitle":"Cancel a booking","category":"travel","categoryTitle":"Travel","score":68,"evaluable":16,"success":6,"handoff":5,"partial":3,"blocked":2},{"task":"finance.dispute_charge","taskTitle":"Dispute a charge","category":"finance","categoryTitle":"Finance","score":73,"evaluable":16,"success":8,"handoff":4,"partial":3,"blocked":1},{"task":"dev_saas_api.find_openapi_spec","taskTitle":"Find an OpenAPI spec","category":"dev_saas_api","categoryTitle":"Developer SaaS/API","score":78,"evaluable":12,"success":8,"handoff":2,"partial":1,"blocked":1}],"traceSummary":{"totalTasks":120,"evaluableTasks":106,"reachedOfficialSurface":94,"correctEvidence":82,"classificationMatches":86,"blockedByCaptcha":6,"heldOutTasks":12,"publicTraceCount":102}}],"categories":[{"category":"commerce","title":"Commerce","stacks":[{"stack":"codex-browser","stackLabel":"Codex Browser","score":82,"evaluable":28},{"stack":"playwright-reference","stackLabel":"Playwright Reference","score":70,"evaluable":28}]},{"category":"dev_saas_api","title":"Developer SaaS/API","stacks":[{"stack":"codex-browser","stackLabel":"Codex Browser","score":85,"evaluable":12},{"stack":"playwright-reference","stackLabel":"Playwright Reference","score":78,"evaluable":12}]},{"category":"finance","title":"Finance","stacks":[{"stack":"codex-browser","stackLabel":"Codex Browser","score":86,"evaluable":20},{"stack":"playwright-reference","stackLabel":"Playwright Reference","score":73,"evaluable":20}]},{"category":"subscriptions","title":"Subscriptions","stacks":[{"stack":"codex-browser","stackLabel":"Codex Browser","score":88,"evaluable":30},{"stack":"playwright-reference","stackLabel":"Playwright Reference","score":76,"evaluable":30}]},{"category":"travel","title":"Travel","stacks":[{"stack":"codex-browser","stackLabel":"Codex Browser","score":79,"evaluable":22},{"stack":"playwright-reference","stackLabel":"Playwright Reference","score":68,"evaluable":22}]}]}