mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
feat: add Java stop hook to enforce optimization effort (30-attempt cap)
Blocks session exit when the LLM hasn't proven its optimization with real JMH benchmarks, hasn't tried enough techniques, or has strategies remaining. Caps at 30 blocks to prevent infinite loops.
This commit is contained in:
parent
df5d529882
commit
45badaf3f0
3 changed files with 191 additions and 0 deletions
3
Makefile
3
Makefile
|
|
@ -16,6 +16,9 @@ build: clean
|
|||
rsync -a plugin/languages/$$lang/agents/ dist-$$lang/agents/; \
|
||||
rsync -a plugin/languages/$$lang/references/ dist-$$lang/references/; \
|
||||
rsync -a plugin/languages/$$lang/skills/ dist-$$lang/skills/; \
|
||||
if [ -d "plugin/languages/$$lang/hooks" ]; then \
|
||||
rsync -a plugin/languages/$$lang/hooks/ dist-$$lang/hooks/; \
|
||||
fi; \
|
||||
find dist-$$lang -type f -name '*.md' -exec \
|
||||
sed -i.bak "s|languages/$$lang/references/|references/|g" {} +; \
|
||||
find dist-$$lang -type f -name '*.md' -exec \
|
||||
|
|
|
|||
68
plugin/languages/java/hooks/hooks.json
Normal file
68
plugin/languages/java/hooks/hooks.json
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
{
|
||||
"hooks": {
|
||||
"SessionStart": [
|
||||
{
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/session-start.sh\"",
|
||||
"timeout": 5
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node \"${CLAUDE_PLUGIN_ROOT}/vendor/codex/scripts/session-lifecycle-hook.mjs\" SessionStart",
|
||||
"timeout": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"SessionEnd": [
|
||||
{
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/session-end.sh\"",
|
||||
"timeout": 5
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node \"${CLAUDE_PLUGIN_ROOT}/vendor/codex/scripts/session-lifecycle-hook.mjs\" SessionEnd",
|
||||
"timeout": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"PreCompact": [
|
||||
{
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/pre-compact.sh\"",
|
||||
"timeout": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"Stop": [
|
||||
{
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/stop-optimization-gate.sh\"",
|
||||
"timeout": 10,
|
||||
"statusMessage": "Checking Java optimization session — verifying effort and benchmarks..."
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node \"${CLAUDE_PLUGIN_ROOT}/vendor/codex/scripts/stop-review-gate-hook.mjs\"",
|
||||
"timeout": 900
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
120
plugin/languages/java/hooks/stop-optimization-gate.sh
Executable file
120
plugin/languages/java/hooks/stop-optimization-gate.sh
Executable file
|
|
@ -0,0 +1,120 @@
|
|||
#!/bin/bash
|
||||
# Java Stop Hook: blocks session exit when optimization is incomplete.
|
||||
#
|
||||
# Forces the LLM to keep working — you have NOT proven your optimization is good.
|
||||
# You have NOT scanned thoroughly. You have NOT benchmarked properly.
|
||||
# Put in the effort. Verify with real JMH benchmarks. Don't give up.
|
||||
#
|
||||
# Cap: blocks a maximum of 30 times per session, then allows stop.
|
||||
# Exit 0 = allow stop, Exit 2 = block stop.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
INPUT=$(cat)
|
||||
|
||||
CWD=$(echo "$INPUT" | jq -r '.cwd // empty' 2>/dev/null)
|
||||
CWD="${CWD:-${CLAUDE_PROJECT_DIR:-$(pwd)}}"
|
||||
|
||||
CODEFLASH_DIR="$CWD/.codeflash"
|
||||
|
||||
# No .codeflash/ directory = no optimization session = allow stop
|
||||
if [ ! -d "$CODEFLASH_DIR" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
HANDOFF="$CODEFLASH_DIR/HANDOFF.md"
|
||||
RESULTS="$CODEFLASH_DIR/results.tsv"
|
||||
STOP_COUNTER="$CODEFLASH_DIR/.stop-attempts"
|
||||
|
||||
# --- Counter logic: cap at 30 blocks ---
|
||||
ATTEMPTS=0
|
||||
if [ -f "$STOP_COUNTER" ]; then
|
||||
ATTEMPTS=$(cat "$STOP_COUNTER" 2>/dev/null | tr -d '[:space:]')
|
||||
ATTEMPTS=${ATTEMPTS:-0}
|
||||
fi
|
||||
|
||||
# If we've already blocked 30 times, allow stop
|
||||
if [ "$ATTEMPTS" -ge 30 ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# No HANDOFF.md = no active session
|
||||
if [ ! -f "$HANDOFF" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check session status — only block if active
|
||||
SESSION_STATUS=$(grep -i 'session status:' "$HANDOFF" 2>/dev/null | head -1 | sed 's/.*[Ss]ession [Ss]tatus:\s*//' | tr -d '[:space:]' | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
case "$SESSION_STATUS" in
|
||||
""|completed|plateau|done|idle)
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
# Count experiments from results.tsv
|
||||
TOTAL=0
|
||||
KEEPS=0
|
||||
if [ -f "$RESULTS" ]; then
|
||||
TOTAL=$(tail -n +2 "$RESULTS" 2>/dev/null | grep -c '[^\s]' 2>/dev/null || echo 0)
|
||||
KEEPS=$(tail -n +2 "$RESULTS" 2>/dev/null | grep -ci 'keep' 2>/dev/null || echo 0)
|
||||
fi
|
||||
|
||||
# Check strategy plan status
|
||||
STRATEGIES_REMAINING=0
|
||||
if [ -f "$CODEFLASH_DIR/strategy-plan.md" ]; then
|
||||
STRATEGIES_REMAINING=$(grep -ci 'Phase\|TODO\|PENDING\|remaining' "$CODEFLASH_DIR/strategy-plan.md" 2>/dev/null || echo 0)
|
||||
fi
|
||||
|
||||
# Check if JMH comparison was done (authoritative measurement)
|
||||
JMH_DONE=false
|
||||
if [ -f "$RESULTS" ]; then
|
||||
if tail -n +2 "$RESULTS" 2>/dev/null | grep -qi 'jmh\|benchmark.*compared\|verified'; then
|
||||
JMH_DONE=true
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Increment counter ---
|
||||
ATTEMPTS=$((ATTEMPTS + 1))
|
||||
echo "$ATTEMPTS" > "$STOP_COUNTER"
|
||||
|
||||
# --- Build the blocking message ---
|
||||
# This message is intentionally aggressive to force the LLM to keep working.
|
||||
|
||||
REASON="STOP BLOCKED (attempt $ATTEMPTS/30). You are NOT done.
|
||||
|
||||
YOU DO NOT HAVE EVIDENCE THAT YOUR OPTIMIZATION IS GOOD. You have not scanned the codebase thoroughly enough. Your benchmarking is insufficient or missing entirely. DO NOT STOP.
|
||||
|
||||
Current state: $TOTAL experiments run, $KEEPS keeps."
|
||||
|
||||
if [ "$JMH_DONE" = "false" ]; then
|
||||
REASON="$REASON
|
||||
|
||||
CRITICAL: No JMH benchmark comparison found. You MUST run a proper JMH benchmark comparing original vs optimized code before you can claim any improvement. Without JMH numbers, you have ZERO evidence. Run the benchmark NOW."
|
||||
fi
|
||||
|
||||
if [ "$STRATEGIES_REMAINING" -gt 0 ]; then
|
||||
REASON="$REASON
|
||||
|
||||
You still have strategies remaining in your plan. You have NOT exhausted your optimization approaches. Keep trying different techniques — the exploration ladder has 10 categories. Use them."
|
||||
fi
|
||||
|
||||
if [ "$TOTAL" -lt 10 ]; then
|
||||
REASON="$REASON
|
||||
|
||||
You've only run $TOTAL experiments. The minimum per-target budget is 10 attempts. You are giving up far too early. A single failed attempt does NOT mean a target is unoptimizable. Try fundamentally different approaches: algorithm change, memory layout, concurrency redesign, JIT-aware coding, architecture change."
|
||||
fi
|
||||
|
||||
REASON="$REASON
|
||||
|
||||
What you MUST do before stopping:
|
||||
1. Run JMH benchmarks comparing original (base-sha) vs current optimized code
|
||||
2. Verify ALL keeps with proper micro-benchmarks (not just tests passing)
|
||||
3. Try at least 3 more fundamentally different techniques on your current target
|
||||
4. Re-profile after fixes — new bottlenecks become visible after optimizations
|
||||
5. Check the exploration ladder — have you tried ALL 10 categories?
|
||||
|
||||
DO NOT give up. DO NOT claim 'already optimal' without 10+ attempts with different techniques. Put in the effort. The breakthrough comes from attempt 5-15, not attempt 1-3."
|
||||
|
||||
echo "$REASON" >&2
|
||||
exit 2
|
||||
Loading…
Reference in a new issue