- Optimize codeflash-optimize SKILL.md (review score 17% → 98%, eval 87% → 100%) - Fix frontmatter (allowed-tools format, argument-hint under metadata) - Lead description with concrete actions, explicit agent launch parameters - Add multi-run variance detection to eval system (--runs N flag) - score.py aggregate command: min/max/avg/stddev per criterion, flaky detection - check-regression.sh defaults to 3 runs for reliable regression detection - Add per-criterion regression tracking to baseline-scores.json (v3) - Reports exactly which criteria regressed, not just total score drops - Rename evals/ → codeflash-evals/ to avoid tessl directory conflicts - Switch tessl to managed mode, gitignore vendored tiles and symlinks
20 lines
338 B
TOML
20 lines
338 B
TOML
[project]
|
|
name = "data-aggregator"
|
|
version = "0.1.0"
|
|
requires-python = ">=3.11"
|
|
dependencies = []
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["src/aggregator"]
|
|
|
|
[tool.pytest.ini_options]
|
|
testpaths = ["tests"]
|
|
|
|
[dependency-groups]
|
|
dev = [
|
|
"pytest>=9.0.2",
|
|
]
|