feat: add Java stop hook to enforce optimization effort (30-attempt cap)

Blocks session exit when the LLM hasn't proven its optimization with
real JMH benchmarks, hasn't tried enough techniques, or has strategies
remaining. Caps at 30 blocks to prevent infinite loops.
This commit is contained in:
HeshamHM28 2026-04-30 16:59:06 +03:00
parent df5d529882
commit 45badaf3f0
3 changed files with 191 additions and 0 deletions

View file

@ -16,6 +16,9 @@ build: clean
rsync -a plugin/languages/$$lang/agents/ dist-$$lang/agents/; \ rsync -a plugin/languages/$$lang/agents/ dist-$$lang/agents/; \
rsync -a plugin/languages/$$lang/references/ dist-$$lang/references/; \ rsync -a plugin/languages/$$lang/references/ dist-$$lang/references/; \
rsync -a plugin/languages/$$lang/skills/ dist-$$lang/skills/; \ rsync -a plugin/languages/$$lang/skills/ dist-$$lang/skills/; \
if [ -d "plugin/languages/$$lang/hooks" ]; then \
rsync -a plugin/languages/$$lang/hooks/ dist-$$lang/hooks/; \
fi; \
find dist-$$lang -type f -name '*.md' -exec \ find dist-$$lang -type f -name '*.md' -exec \
sed -i.bak "s|languages/$$lang/references/|references/|g" {} +; \ sed -i.bak "s|languages/$$lang/references/|references/|g" {} +; \
find dist-$$lang -type f -name '*.md' -exec \ find dist-$$lang -type f -name '*.md' -exec \

View file

@ -0,0 +1,68 @@
{
"hooks": {
"SessionStart": [
{
"hooks": [
{
"type": "command",
"command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/session-start.sh\"",
"timeout": 5
},
{
"type": "command",
"command": "node \"${CLAUDE_PLUGIN_ROOT}/vendor/codex/scripts/session-lifecycle-hook.mjs\" SessionStart",
"timeout": 5
}
]
}
],
"SessionEnd": [
{
"hooks": [
{
"type": "command",
"command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/session-end.sh\"",
"timeout": 5
},
{
"type": "command",
"command": "node \"${CLAUDE_PLUGIN_ROOT}/vendor/codex/scripts/session-lifecycle-hook.mjs\" SessionEnd",
"timeout": 5
}
]
}
],
"PreCompact": [
{
"hooks": [
{
"type": "command",
"command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/pre-compact.sh\"",
"timeout": 5
}
]
}
],
"Stop": [
{
"hooks": [
{
"type": "command",
"command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/stop-optimization-gate.sh\"",
"timeout": 10,
"statusMessage": "Checking Java optimization session — verifying effort and benchmarks..."
}
]
},
{
"hooks": [
{
"type": "command",
"command": "node \"${CLAUDE_PLUGIN_ROOT}/vendor/codex/scripts/stop-review-gate-hook.mjs\"",
"timeout": 900
}
]
}
]
}
}

View file

@ -0,0 +1,120 @@
#!/bin/bash
# Java Stop Hook: blocks session exit when optimization is incomplete.
#
# Forces the LLM to keep working — you have NOT proven your optimization is good.
# You have NOT scanned thoroughly. You have NOT benchmarked properly.
# Put in the effort. Verify with real JMH benchmarks. Don't give up.
#
# Cap: blocks a maximum of 30 times per session, then allows stop.
# Exit 0 = allow stop, Exit 2 = block stop.
set -euo pipefail
INPUT=$(cat)
CWD=$(echo "$INPUT" | jq -r '.cwd // empty' 2>/dev/null)
CWD="${CWD:-${CLAUDE_PROJECT_DIR:-$(pwd)}}"
CODEFLASH_DIR="$CWD/.codeflash"
# No .codeflash/ directory = no optimization session = allow stop
if [ ! -d "$CODEFLASH_DIR" ]; then
exit 0
fi
HANDOFF="$CODEFLASH_DIR/HANDOFF.md"
RESULTS="$CODEFLASH_DIR/results.tsv"
STOP_COUNTER="$CODEFLASH_DIR/.stop-attempts"
# --- Counter logic: cap at 30 blocks ---
ATTEMPTS=0
if [ -f "$STOP_COUNTER" ]; then
ATTEMPTS=$(cat "$STOP_COUNTER" 2>/dev/null | tr -d '[:space:]')
ATTEMPTS=${ATTEMPTS:-0}
fi
# If we've already blocked 30 times, allow stop
if [ "$ATTEMPTS" -ge 30 ]; then
exit 0
fi
# No HANDOFF.md = no active session
if [ ! -f "$HANDOFF" ]; then
exit 0
fi
# Check session status — only block if active
SESSION_STATUS=$(grep -i 'session status:' "$HANDOFF" 2>/dev/null | head -1 | sed 's/.*[Ss]ession [Ss]tatus:\s*//' | tr -d '[:space:]' | tr '[:upper:]' '[:lower:]')
case "$SESSION_STATUS" in
""|completed|plateau|done|idle)
exit 0
;;
esac
# Count experiments from results.tsv
TOTAL=0
KEEPS=0
if [ -f "$RESULTS" ]; then
TOTAL=$(tail -n +2 "$RESULTS" 2>/dev/null | grep -c '[^\s]' 2>/dev/null || echo 0)
KEEPS=$(tail -n +2 "$RESULTS" 2>/dev/null | grep -ci 'keep' 2>/dev/null || echo 0)
fi
# Check strategy plan status
STRATEGIES_REMAINING=0
if [ -f "$CODEFLASH_DIR/strategy-plan.md" ]; then
STRATEGIES_REMAINING=$(grep -ci 'Phase\|TODO\|PENDING\|remaining' "$CODEFLASH_DIR/strategy-plan.md" 2>/dev/null || echo 0)
fi
# Check if JMH comparison was done (authoritative measurement)
JMH_DONE=false
if [ -f "$RESULTS" ]; then
if tail -n +2 "$RESULTS" 2>/dev/null | grep -qi 'jmh\|benchmark.*compared\|verified'; then
JMH_DONE=true
fi
fi
# --- Increment counter ---
ATTEMPTS=$((ATTEMPTS + 1))
echo "$ATTEMPTS" > "$STOP_COUNTER"
# --- Build the blocking message ---
# This message is intentionally aggressive to force the LLM to keep working.
REASON="STOP BLOCKED (attempt $ATTEMPTS/30). You are NOT done.
YOU DO NOT HAVE EVIDENCE THAT YOUR OPTIMIZATION IS GOOD. You have not scanned the codebase thoroughly enough. Your benchmarking is insufficient or missing entirely. DO NOT STOP.
Current state: $TOTAL experiments run, $KEEPS keeps."
if [ "$JMH_DONE" = "false" ]; then
REASON="$REASON
CRITICAL: No JMH benchmark comparison found. You MUST run a proper JMH benchmark comparing original vs optimized code before you can claim any improvement. Without JMH numbers, you have ZERO evidence. Run the benchmark NOW."
fi
if [ "$STRATEGIES_REMAINING" -gt 0 ]; then
REASON="$REASON
You still have strategies remaining in your plan. You have NOT exhausted your optimization approaches. Keep trying different techniques — the exploration ladder has 10 categories. Use them."
fi
if [ "$TOTAL" -lt 10 ]; then
REASON="$REASON
You've only run $TOTAL experiments. The minimum per-target budget is 10 attempts. You are giving up far too early. A single failed attempt does NOT mean a target is unoptimizable. Try fundamentally different approaches: algorithm change, memory layout, concurrency redesign, JIT-aware coding, architecture change."
fi
REASON="$REASON
What you MUST do before stopping:
1. Run JMH benchmarks comparing original (base-sha) vs current optimized code
2. Verify ALL keeps with proper micro-benchmarks (not just tests passing)
3. Try at least 3 more fundamentally different techniques on your current target
4. Re-profile after fixes — new bottlenecks become visible after optimizations
5. Check the exploration ladder — have you tried ALL 10 categories?
DO NOT give up. DO NOT claim 'already optimal' without 10+ attempts with different techniques. Put in the effort. The breakthrough comes from attempt 5-15, not attempt 1-3."
echo "$REASON" >&2
exit 2