codeflash-internal/tiles/codeflash-internal-rules/evals/summary.json
2026-02-14 22:25:30 -05:00

70 lines
3.7 KiB
JSON

{
"package_name": "codeflash-internal-rules",
"total_scenarios": 5,
"total_capabilities_tested": 13,
"scenarios": [
{
"id": 1,
"name": "Implement a Ruby Optimizer Handler",
"description": "Create a new language handler for Ruby following the registry pattern, protocol system, async conventions, and monorepo directory structure",
"capabilities_tested": [5, 6, 0, 3, 4],
"capability_names": ["registry-handler-pattern", "feature-dispatcher-protocol", "python-async-endpoints", "lazy-imports-in-routers", "monorepo-directory-placement"],
"rules_covered": ["multi-language-handlers", "code-style", "architecture", "optimization-patterns"]
},
{
"id": 2,
"name": "Add a Stripe Webhook Endpoint to cf-api",
"description": "Add a webhook endpoint ensuring correct registration order relative to body parser and testing with DI pattern",
"capabilities_tested": [11, 4, 9],
"capability_names": ["cfapi-webhook-and-di-patterns", "monorepo-directory-placement", "python-test-conventions"],
"rules_covered": ["code-style", "architecture", "testing-rules"]
},
{
"id": 3,
"name": "Fix a Bug in the Optimizer Router and Commit",
"description": "Fix a language alias bug, write tests, and commit following git conventions with proper branch naming, conventional commits, and pre-commit checks",
"capabilities_tested": [7, 8, 3],
"capability_names": ["conventional-commits-and-branching", "pre-commit-and-tooling", "lazy-imports-in-routers"],
"rules_covered": ["git-conventions", "code-style", "optimization-patterns"]
},
{
"id": 4,
"name": "Review a Pull Request",
"description": "Review a PR containing security issues, test typos, and breaking changes while following the guidelines to skip style and suggestion comments",
"capabilities_tested": [10, 0, 9],
"capability_names": ["pr-review-guidelines", "python-async-endpoints", "python-test-conventions"],
"rules_covered": ["testing-rules", "code-style"]
},
{
"id": 5,
"name": "Add a JS/TS Postprocessor for Optimization Deduplication",
"description": "Implement a postprocessor following optimization patterns (AST dedup, model distribution), code style (libcst for transforms, llm.py for calls, Jinja2 prompts), and test conventions",
"capabilities_tested": [12, 1, 2, 9],
"capability_names": ["optimization-postprocessing", "libcst-for-transforms", "llm-calls-via-llm-py", "python-test-conventions"],
"rules_covered": ["optimization-patterns", "code-style", "testing-rules"]
}
],
"capability_coverage": {
"0": {"name": "python-async-endpoints", "tested_in": [1, 4]},
"1": {"name": "libcst-for-transforms", "tested_in": [5]},
"2": {"name": "llm-calls-via-llm-py", "tested_in": [5]},
"3": {"name": "lazy-imports-in-routers", "tested_in": [1, 3]},
"4": {"name": "monorepo-directory-placement", "tested_in": [1, 2]},
"5": {"name": "registry-handler-pattern", "tested_in": [1]},
"6": {"name": "feature-dispatcher-protocol", "tested_in": [1]},
"7": {"name": "conventional-commits-and-branching", "tested_in": [3]},
"8": {"name": "pre-commit-and-tooling", "tested_in": [3]},
"9": {"name": "python-test-conventions", "tested_in": [2, 4, 5]},
"10": {"name": "pr-review-guidelines", "tested_in": [4]},
"11": {"name": "cfapi-webhook-and-di-patterns", "tested_in": [2]},
"12": {"name": "optimization-postprocessing", "tested_in": [5]}
},
"rules_coverage": {
"code-style": [1, 2, 3, 4, 5],
"architecture": [1, 2],
"optimization-patterns": [1, 3, 5],
"git-conventions": [3],
"testing-rules": [2, 4, 5],
"multi-language-handlers": [1]
}
}