feat: enhance Java optimization flow with extended sessions, Pareto tracking, and MCP visibility

Add iterative optimization capabilities inspired by Kimi K2.6: thread topology & spin-wait
strategies, allocation profiling, cross-function scope, behavioral equivalence verification,
Pareto frontier tracking with chart generation, extended session protocol (10-15+ hours),
session interruption detection/recovery via hooks, and MCP endpoint visibility so users
can follow the profiling pipeline.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
HeshamHM28 2026-04-28 16:17:45 +03:00
parent f4101615c2
commit 6274dca2d5
19 changed files with 3085 additions and 15 deletions

View file

@ -1,11 +1,15 @@
[project]
name = "codeflash-mcp"
version = "0.1.0"
requires-python = ">=3.9"
requires-python = ">=3.10"
dependencies = [
"codeflash-core",
"mcp[cli]>=1.0.0",
]
[project.scripts]
codeflash-mcp = "codeflash_mcp.server:main"
[build-system]
requires = ["uv_build>=0.7.2,<0.8"]
build-backend = "uv_build"

View file

@ -1 +1 @@
"""MCP server for codeflash — stub package."""
"""MCP server for codeflash — exposes profiling and analysis tools."""

View file

@ -0,0 +1,303 @@
"""Discover optimization flows from JFR profiling data.
A flow is a ranked optimization target: a bottleneck method, its dimension
(CPU, lock contention, I/O, GC, allocation), its impact score, and the
code path leading to it. This is the Java equivalent of the Python CLI's
"discover functions → rank → iterate" pattern.
"""
from __future__ import annotations
import json
import logging
from typing import Any
log = logging.getLogger(__name__)
def discover_flows(
jfr_path: str,
source_package: str = ".",
max_flows: int = 20,
jfr_cmd: str = "jfr",
) -> list[dict[str, Any]]:
"""Discover and rank optimization flows from a JFR recording.
Combines CPU, contention, I/O, GC, and allocation analysis into a
single ranked list of flows. Each flow is an independent optimization
target with a clear root cause and estimated impact.
Returns flows sorted by estimated wall-clock impact (highest first).
"""
from ._jfr_parser import (
build_call_graph,
find_bottlenecks,
find_hotpath,
parse_jfr,
parse_jfr_contention,
parse_jfr_gc,
parse_jfr_io,
)
flows: list[dict[str, Any]] = []
seen_methods: set[str] = set()
# --- CPU flows ---
cpu_parsed = parse_jfr(
jfr_path, source_package, jfr_cmd=jfr_cmd
)
cpu_graph = build_call_graph(cpu_parsed)
cpu_bottlenecks = find_bottlenecks(
cpu_graph, top_n=max_flows, source_only=(source_package != ".")
)
if not cpu_bottlenecks:
cpu_bottlenecks = find_bottlenecks(
cpu_graph, top_n=max_flows, source_only=False
)
hotpath = find_hotpath(cpu_graph)
# Estimate total CPU ms from samples (1 sample ≈ 20ms)
total_cpu_est_ms = cpu_parsed["total_samples"] * 20
for b in cpu_bottlenecks:
method = b["method"]
if method in seen_methods:
continue
seen_methods.add(method)
callers_str = ", ".join(
c["method"] for c in b.get("callers", [])[:2]
)
callees_str = ", ".join(
c["method"] for c in b.get("callees", [])[:2]
)
impact_ms = b["self_samples"] * 20
flows.append({
"rank": 0,
"method": method,
"dimension": "CPU",
"impact_ms": impact_ms,
"impact_pct": b["self_pct"],
"evidence": (
f"{b['self_samples']} self-samples "
f"({b['self_pct']}% of CPU)"
),
"root_cause": _infer_cpu_cause(b, hotpath),
"callers": callers_str,
"callees": callees_str,
"action": _suggest_cpu_action(b),
"status": "pending",
})
# --- Lock contention flows ---
contention = parse_jfr_contention(
jfr_path, source_package, max_flows, jfr_cmd
)
for site in contention.get("contention_sites", []):
method = site["method"]
if method in seen_methods:
# Merge: add contention dimension to existing CPU flow
for f in flows:
if f["method"] == method:
f["dimension"] = f"CPU + lock_contention"
f["evidence"] += (
f"; also {site['total_ms']}ms "
f"lock contention ({site['count']}x)"
)
f["impact_ms"] += site["total_ms"]
break
continue
seen_methods.add(method)
flows.append({
"rank": 0,
"method": method,
"dimension": "lock_contention",
"impact_ms": site["total_ms"],
"impact_pct": site["pct"],
"evidence": (
f"{site['total_ms']}ms total wait, "
f"{site['count']}x events, "
f"max {site['max_ms']}ms, "
f"monitors: {', '.join(site.get('monitor_classes', []))}"
),
"root_cause": (
f"Threads blocking on "
f"{', '.join(site.get('monitor_classes', [])[:2])}"
),
"callers": "",
"callees": "",
"action": (
"Replace synchronized with ReentrantLock/StampedLock, "
"reduce critical section scope, or eliminate shared state"
),
"status": "pending",
})
# --- I/O flows ---
io = parse_jfr_io(
jfr_path, source_package, max_flows, jfr_cmd
)
for site in io.get("io_sites", []):
method = site["method"]
if method in seen_methods:
for f in flows:
if f["method"] == method:
f["dimension"] += " + IO"
f["evidence"] += (
f"; also {site['total_ms']}ms I/O "
f"({site['count']}x ops)"
)
f["impact_ms"] += site["total_ms"]
break
continue
seen_methods.add(method)
io_types = ", ".join(site.get("io_types", []))
paths = ", ".join(site.get("paths", [])[:2])
flows.append({
"rank": 0,
"method": method,
"dimension": "IO",
"impact_ms": site["total_ms"],
"impact_pct": site["pct"],
"evidence": (
f"{site['total_ms']}ms total, "
f"{site['count']}x ops, "
f"{site['total_bytes']} bytes, "
f"types: {io_types}"
),
"root_cause": (
f"Slow {io_types} on {paths}" if paths
else f"Slow {io_types} operations"
),
"callers": "",
"callees": "",
"action": (
"Buffer I/O, batch small reads/writes, use memory-mapped "
"files, or move to async I/O"
),
"status": "pending",
})
# --- GC flows (allocation sites driving pauses) ---
gc = parse_jfr_gc(
jfr_path, source_package, max_flows, jfr_cmd
)
gc_pause_ms = gc.get("total_gc_pause_ms", 0)
for site in gc.get("allocation_pressure", []):
method = site["method"]
if method in seen_methods:
for f in flows:
if f["method"] == method:
f["dimension"] += " + GC_pressure"
f["evidence"] += (
f"; also {site['bytes_mb']}MB alloc "
f"({site['pct']}% of total)"
)
break
continue
seen_methods.add(method)
# Impact: proportion of GC pause attributable to this allocator
alloc_impact_ms = gc_pause_ms * site["pct"] / 100
flows.append({
"rank": 0,
"method": method,
"dimension": "GC_pressure",
"impact_ms": round(alloc_impact_ms, 1),
"impact_pct": site["pct"],
"evidence": (
f"{site['bytes_mb']}MB allocated ({site['pct']}% "
f"of total), {site['count']}x allocs, "
f"types: {', '.join(site.get('top_types', [])[:3])}, "
f"total GC pause: {gc_pause_ms}ms"
),
"root_cause": (
f"High allocation rate of "
f"{', '.join(site.get('top_types', [])[:2])}"
),
"callers": "",
"callees": "",
"action": (
"Reduce allocations: reuse objects, use primitives, "
"pre-size collections, avoid autoboxing in hot loops"
),
"status": "pending",
})
# --- Sort by impact and assign ranks ---
flows.sort(key=lambda f: f["impact_ms"], reverse=True)
for i, flow in enumerate(flows):
flow["rank"] = i + 1
return flows[:max_flows]
def render_flow_list(flows: list[dict[str, Any]]) -> str:
"""Render the flow list as text for display."""
if not flows:
return "No optimization flows discovered."
lines: list[str] = []
for f in flows:
status_icon = {
"pending": "[ ]",
"in_progress": "[>]",
"done": "[x]",
"skipped": "[-]",
}.get(f["status"], "[ ]")
lines.append(
f"{status_icon} #{f['rank']} {f['method']} "
f"[{f['dimension']}] ~{f['impact_ms']}ms"
)
lines.append(f" Evidence: {f['evidence']}")
lines.append(f" Root cause: {f['root_cause']}")
lines.append(f" Action: {f['action']}")
lines.append("")
return "\n".join(lines)
def _infer_cpu_cause(
bottleneck: dict[str, Any],
hotpath: list[dict[str, Any]],
) -> str:
"""Infer root cause for a CPU bottleneck."""
callees = bottleneck.get("callees", [])
self_pct = bottleneck.get("self_pct", 0)
total_pct = bottleneck.get("total_pct", 0)
if self_pct > total_pct * 0.7:
return "CPU-bound: most time in own code, not callees"
if callees:
top_callee = callees[0]["method"]
return f"Bottleneck in callee: {top_callee}"
return "High CPU self-time"
def _suggest_cpu_action(bottleneck: dict[str, Any]) -> str:
"""Suggest optimization action for a CPU bottleneck."""
method = bottleneck["method"].lower()
callees = bottleneck.get("callees", [])
if any("regex" in c["method"].lower() for c in callees):
return "Precompile regex, or replace with String methods"
if any("hash" in c["method"].lower() for c in callees):
return "Check hash function cost, consider caching hash codes"
if any("sort" in c["method"].lower() for c in callees):
return "Check sort necessity, use partial sort or pre-sorted data"
if any("stream" in c["method"].lower() for c in callees):
return "Replace stream pipeline with explicit loop"
if "iterator" in method or "foreach" in method:
return "Check iteration overhead, consider index-based loop"
if "tostring" in method:
return "Cache or avoid repeated toString() calls"
return "Profile deeper with line-level profiling, check algorithm complexity"

View file

@ -0,0 +1,725 @@
"""Parse JFR recordings into weighted call graphs and bottleneck analysis."""
from __future__ import annotations
import json
import logging
import subprocess
from collections import defaultdict
from typing import Any
log = logging.getLogger(__name__)
def _run_jfr(
jfr_path: str,
events: str,
jfr_cmd: str = "jfr",
stack_depth: int = 64,
) -> list[dict[str, Any]]:
"""Run jfr print --json and return the events list."""
cmd = [
jfr_cmd,
"print",
"--json",
"--events",
events,
"--stack-depth",
str(stack_depth),
jfr_path,
]
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=120,
)
if result.returncode != 0:
msg = f"jfr print failed: {result.stderr.strip()}"
raise RuntimeError(msg)
data = json.loads(result.stdout)
return data.get("recording", {}).get("events", [])
def _extract_stack(
event: dict[str, Any],
source_package: str = ".",
) -> list[dict[str, Any]]:
"""Extract stack frames from a JFR event."""
stack_trace = event.get("values", {}).get("stackTrace", {})
frames_raw = stack_trace.get("frames", [])
frames: list[dict[str, Any]] = []
for frame in frames_raw:
method = frame.get("method", {})
type_name = method.get("type", {}).get("name", "")
method_name = method.get("name", "")
if not type_name or not method_name:
continue
qualified = f"{type_name}.{method_name}"
is_source = source_package == "." or type_name.startswith(
source_package
)
frames.append({"method": qualified, "is_source": is_source})
return frames
def _top_source_frame(
frames: list[dict[str, Any]],
) -> str:
"""Find the first source frame in a stack, or the leaf."""
for frame in frames:
if frame["is_source"]:
return frame["method"]
return frames[0]["method"] if frames else "unknown"
# ---------------------------------------------------------------------------
# CPU execution samples
# ---------------------------------------------------------------------------
def parse_jfr(
jfr_path: str,
source_package: str = ".",
stack_depth: int = 64,
jfr_cmd: str = "jfr",
) -> dict[str, Any]:
"""Parse a JFR recording and return execution sample stacks."""
events = _run_jfr(
jfr_path, "jdk.ExecutionSample", jfr_cmd, stack_depth
)
stacks: list[list[dict[str, Any]]] = []
for event in events:
if event.get("type") != "jdk.ExecutionSample":
continue
frames = _extract_stack(event, source_package)
if frames:
stacks.append(frames)
return {"stacks": stacks, "total_samples": len(stacks)}
def build_call_graph(
parsed: dict[str, Any],
) -> dict[str, Any]:
"""Build a weighted call graph from parsed JFR stacks."""
self_samples: dict[str, int] = defaultdict(int)
total_samples: dict[str, int] = defaultdict(int)
edge_weights: dict[tuple[str, str], int] = defaultdict(int)
is_source: dict[str, bool] = {}
for stack in parsed["stacks"]:
if not stack:
continue
leaf = stack[0]["method"]
self_samples[leaf] += 1
seen_in_stack: set[str] = set()
for frame in stack:
method = frame["method"]
if method not in seen_in_stack:
total_samples[method] += 1
seen_in_stack.add(method)
if method not in is_source:
is_source[method] = frame["is_source"]
for i in range(len(stack) - 1):
callee = stack[i]["method"]
caller = stack[i + 1]["method"]
edge_weights[(caller, callee)] += 1
total = parsed["total_samples"]
nodes = {}
for method in set(self_samples) | set(total_samples):
self_count = self_samples.get(method, 0)
total_count = total_samples.get(method, 0)
nodes[method] = {
"self_samples": self_count,
"total_samples": total_count,
"self_pct": round(self_count / total * 100, 1) if total else 0,
"total_pct": (
round(total_count / total * 100, 1) if total else 0
),
"is_source": is_source.get(method, False),
}
edges = [
{
"caller": caller,
"callee": callee,
"samples": weight,
"pct": round(weight / total * 100, 1) if total else 0,
}
for (caller, callee), weight in sorted(
edge_weights.items(), key=lambda x: x[1], reverse=True
)
]
return {"nodes": nodes, "edges": edges, "total_samples": total}
def find_hotpath(
graph: dict[str, Any], max_depth: int = 10
) -> list[dict[str, Any]]:
"""Find the hottest execution path by following heaviest edges."""
nodes = graph["nodes"]
forward: dict[str, list[tuple[str, int]]] = defaultdict(list)
has_caller: set[str] = set()
for edge in graph["edges"]:
forward[edge["caller"]].append(
(edge["callee"], edge["samples"])
)
has_caller.add(edge["callee"])
roots = [
m for m in nodes if m not in has_caller and m in forward
]
if not roots:
roots = sorted(
nodes,
key=lambda m: nodes[m]["total_samples"],
reverse=True,
)[:1]
if not roots:
return []
best_root = max(roots, key=lambda m: nodes[m]["total_samples"])
path: list[dict[str, Any]] = []
visited: set[str] = set()
current = best_root
for _ in range(max_depth):
if current in visited:
break
visited.add(current)
node = nodes.get(current, {})
path.append({
"method": current,
"self_pct": node.get("self_pct", 0),
"total_pct": node.get("total_pct", 0),
"is_source": node.get("is_source", False),
})
callees = forward.get(current, [])
if not callees:
break
callees.sort(key=lambda x: x[1], reverse=True)
current = callees[0][0]
return path
def find_bottlenecks(
graph: dict[str, Any],
top_n: int = 15,
source_only: bool = True,
) -> list[dict[str, Any]]:
"""Find bottleneck methods ranked by self-sample count."""
nodes = graph["nodes"]
edges = graph["edges"]
callers_of: dict[str, list[dict[str, Any]]] = defaultdict(list)
callees_of: dict[str, list[dict[str, Any]]] = defaultdict(list)
for edge in edges:
callers_of[edge["callee"]].append({
"method": edge["caller"],
"samples": edge["samples"],
})
callees_of[edge["caller"]].append({
"method": edge["callee"],
"samples": edge["samples"],
})
ranked = sorted(
nodes.items(),
key=lambda x: x[1]["self_samples"],
reverse=True,
)
bottlenecks: list[dict[str, Any]] = []
for method, info in ranked:
if source_only and not info["is_source"]:
continue
if info["self_samples"] == 0:
continue
top_callers = sorted(
callers_of.get(method, []),
key=lambda x: x["samples"],
reverse=True,
)[:3]
top_callees = sorted(
callees_of.get(method, []),
key=lambda x: x["samples"],
reverse=True,
)[:3]
bottlenecks.append({
"method": method,
"self_samples": info["self_samples"],
"self_pct": info["self_pct"],
"total_samples": info["total_samples"],
"total_pct": info["total_pct"],
"callers": top_callers,
"callees": top_callees,
})
if len(bottlenecks) >= top_n:
break
return bottlenecks
# ---------------------------------------------------------------------------
# Lock contention
# ---------------------------------------------------------------------------
def parse_jfr_contention(
jfr_path: str,
source_package: str = ".",
top_n: int = 15,
jfr_cmd: str = "jfr",
) -> dict[str, Any]:
"""Parse lock contention events from a JFR recording."""
event_types = "jdk.JavaMonitorWait,jdk.JavaMonitorEnter"
events = _run_jfr(jfr_path, event_types, jfr_cmd, stack_depth=32)
sites: dict[str, dict[str, Any]] = defaultdict(
lambda: {
"total_duration_ns": 0,
"count": 0,
"monitor_classes": set(),
"max_duration_ns": 0,
"stacks": [],
}
)
total_wait_ns = 0
total_events = 0
for event in events:
values = event.get("values", {})
duration_ns = _parse_duration_ns(values.get("duration", ""))
monitor_class = (
values.get("monitorClass", {}).get("name", "unknown")
)
frames = _extract_stack(event, source_package)
source_frame = _top_source_frame(frames)
site = sites[source_frame]
site["total_duration_ns"] += duration_ns
site["count"] += 1
site["monitor_classes"].add(monitor_class)
if duration_ns > site["max_duration_ns"]:
site["max_duration_ns"] = duration_ns
if len(site["stacks"]) < 3:
stack_str = " -> ".join(
f["method"] for f in frames[:5]
)
if stack_str not in site["stacks"]:
site["stacks"].append(stack_str)
total_wait_ns += duration_ns
total_events += 1
ranked = sorted(
sites.items(),
key=lambda x: x[1]["total_duration_ns"],
reverse=True,
)[:top_n]
return {
"total_contention_ms": round(total_wait_ns / 1_000_000, 1),
"total_events": total_events,
"contention_sites": [
{
"method": method,
"total_ms": round(
info["total_duration_ns"] / 1_000_000, 1
),
"pct": (
round(
info["total_duration_ns"]
/ total_wait_ns
* 100,
1,
)
if total_wait_ns
else 0
),
"count": info["count"],
"max_ms": round(
info["max_duration_ns"] / 1_000_000, 1
),
"monitor_classes": list(info["monitor_classes"]),
"example_stacks": info["stacks"],
}
for method, info in ranked
],
}
# ---------------------------------------------------------------------------
# I/O analysis
# ---------------------------------------------------------------------------
def parse_jfr_io(
jfr_path: str,
source_package: str = ".",
top_n: int = 15,
jfr_cmd: str = "jfr",
) -> dict[str, Any]:
"""Parse file and socket I/O events from a JFR recording."""
event_types = (
"jdk.FileRead,jdk.FileWrite,"
"jdk.SocketRead,jdk.SocketWrite"
)
events = _run_jfr(jfr_path, event_types, jfr_cmd, stack_depth=32)
sites: dict[str, dict[str, Any]] = defaultdict(
lambda: {
"total_duration_ns": 0,
"total_bytes": 0,
"count": 0,
"max_duration_ns": 0,
"io_types": set(),
"paths": set(),
}
)
total_io_ns = 0
total_events = 0
for event in events:
event_type = event.get("type", "")
values = event.get("values", {})
duration_ns = _parse_duration_ns(values.get("duration", ""))
bytes_rw = values.get("bytesRead", 0) or values.get(
"bytesWritten", 0
) or 0
path = values.get("path", "") or values.get("host", "")
frames = _extract_stack(event, source_package)
source_frame = _top_source_frame(frames)
site = sites[source_frame]
site["total_duration_ns"] += duration_ns
site["total_bytes"] += bytes_rw
site["count"] += 1
if duration_ns > site["max_duration_ns"]:
site["max_duration_ns"] = duration_ns
site["io_types"].add(event_type.split(".")[-1])
if path and len(site["paths"]) < 3:
site["paths"].add(str(path)[:80])
total_io_ns += duration_ns
total_events += 1
ranked = sorted(
sites.items(),
key=lambda x: x[1]["total_duration_ns"],
reverse=True,
)[:top_n]
return {
"total_io_ms": round(total_io_ns / 1_000_000, 1),
"total_events": total_events,
"io_sites": [
{
"method": method,
"total_ms": round(
info["total_duration_ns"] / 1_000_000, 1
),
"pct": (
round(
info["total_duration_ns"]
/ total_io_ns
* 100,
1,
)
if total_io_ns
else 0
),
"total_bytes": info["total_bytes"],
"count": info["count"],
"max_ms": round(
info["max_duration_ns"] / 1_000_000, 1
),
"io_types": list(info["io_types"]),
"paths": list(info["paths"]),
}
for method, info in ranked
],
}
# ---------------------------------------------------------------------------
# GC analysis
# ---------------------------------------------------------------------------
def parse_jfr_gc(
jfr_path: str,
source_package: str = ".",
top_n: int = 15,
jfr_cmd: str = "jfr",
) -> dict[str, Any]:
"""Parse GC events and correlate with allocation sites."""
gc_events_str = (
"jdk.GarbageCollection,jdk.G1GarbageCollection,"
"jdk.GCPhasePause,jdk.YoungGarbageCollection,"
"jdk.OldGarbageCollection"
)
gc_events = _run_jfr(jfr_path, gc_events_str, jfr_cmd)
pauses: list[dict[str, Any]] = []
total_pause_ns = 0
for event in gc_events:
event_type = event.get("type", "")
values = event.get("values", {})
duration_ns = _parse_duration_ns(values.get("duration", ""))
cause = values.get("cause", "") or values.get("name", "")
gc_name = values.get("name", event_type.split(".")[-1])
if duration_ns > 0:
pauses.append({
"type": gc_name,
"cause": cause,
"duration_ms": round(duration_ns / 1_000_000, 2),
})
total_pause_ns += duration_ns
pauses.sort(key=lambda x: x["duration_ms"], reverse=True)
# Allocation sites driving GC pressure
alloc_events_str = (
"jdk.ObjectAllocationInNewTLAB,"
"jdk.ObjectAllocationOutsideTLAB"
)
alloc_events = _run_jfr(
jfr_path, alloc_events_str, jfr_cmd, stack_depth=16
)
alloc_sites: dict[str, dict[str, Any]] = defaultdict(
lambda: {"bytes": 0, "count": 0, "types": set()}
)
for event in alloc_events:
values = event.get("values", {})
alloc_size = values.get("allocationSize", 0)
obj_class = (
values.get("objectClass", {}).get("name", "unknown")
)
frames = _extract_stack(event, source_package)
source_frame = _top_source_frame(frames)
site = alloc_sites[source_frame]
site["bytes"] += alloc_size
site["count"] += 1
if len(site["types"]) < 5:
site["types"].add(obj_class)
ranked_alloc = sorted(
alloc_sites.items(),
key=lambda x: x[1]["bytes"],
reverse=True,
)[:top_n]
total_alloc = sum(s["bytes"] for _, s in ranked_alloc)
return {
"total_gc_pause_ms": round(total_pause_ns / 1_000_000, 1),
"gc_event_count": len(pauses),
"longest_pauses": pauses[:10],
"pause_by_type": _group_pauses(pauses),
"allocation_pressure": [
{
"method": method,
"bytes": info["bytes"],
"bytes_mb": round(info["bytes"] / 1_048_576, 2),
"pct": (
round(info["bytes"] / total_alloc * 100, 1)
if total_alloc
else 0
),
"count": info["count"],
"top_types": list(info["types"]),
}
for method, info in ranked_alloc
],
}
def _group_pauses(
pauses: list[dict[str, Any]],
) -> list[dict[str, Any]]:
"""Group GC pauses by type and sum durations."""
groups: dict[str, dict[str, Any]] = defaultdict(
lambda: {"count": 0, "total_ms": 0.0, "max_ms": 0.0}
)
for p in pauses:
g = groups[p["type"]]
g["count"] += 1
g["total_ms"] += p["duration_ms"]
g["max_ms"] = max(g["max_ms"], p["duration_ms"])
return [
{
"type": gc_type,
"count": info["count"],
"total_ms": round(info["total_ms"], 1),
"max_ms": round(info["max_ms"], 2),
}
for gc_type, info in sorted(
groups.items(),
key=lambda x: x[1]["total_ms"],
reverse=True,
)
]
# ---------------------------------------------------------------------------
# Wall-clock breakdown (combines all dimensions)
# ---------------------------------------------------------------------------
def parse_jfr_wall(
jfr_path: str,
source_package: str = ".",
top_n: int = 15,
jfr_cmd: str = "jfr",
) -> dict[str, Any]:
"""Combine CPU, contention, I/O, and GC into a wall-clock breakdown."""
cpu_parsed = parse_jfr(jfr_path, source_package, jfr_cmd=jfr_cmd)
cpu_graph = build_call_graph(cpu_parsed)
cpu_bottlenecks = find_bottlenecks(
cpu_graph, top_n=top_n, source_only=False
)
contention = parse_jfr_contention(
jfr_path, source_package, top_n, jfr_cmd
)
io = parse_jfr_io(jfr_path, source_package, top_n, jfr_cmd)
gc = parse_jfr_gc(jfr_path, source_package, top_n, jfr_cmd)
# Thread park/sleep for waiting time
park_events_str = "jdk.ThreadPark,jdk.ThreadSleep"
park_events = _run_jfr(
jfr_path, park_events_str, jfr_cmd, stack_depth=16
)
total_park_ns = 0
park_sites: dict[str, int] = defaultdict(int)
for event in park_events:
values = event.get("values", {})
duration_ns = _parse_duration_ns(values.get("duration", ""))
frames = _extract_stack(event, source_package)
source_frame = _top_source_frame(frames)
park_sites[source_frame] += duration_ns
total_park_ns += duration_ns
top_park = sorted(
park_sites.items(), key=lambda x: x[1], reverse=True
)[:top_n]
# Estimate time breakdown
contention_ms = contention["total_contention_ms"]
io_ms = io["total_io_ms"]
gc_ms = gc["total_gc_pause_ms"]
park_ms = round(total_park_ns / 1_000_000, 1)
total_wall_ms = contention_ms + io_ms + gc_ms + park_ms
# CPU samples don't have direct ms — estimate from sample count
cpu_samples = cpu_parsed["total_samples"]
return {
"wall_clock_breakdown": {
"contention_ms": contention_ms,
"io_ms": io_ms,
"gc_pause_ms": gc_ms,
"parking_sleep_ms": park_ms,
"cpu_samples": cpu_samples,
"note": (
"CPU time is in samples (not ms). "
"Other dimensions are measured in wall-clock ms."
),
},
"dominant_dimension": _dominant_dimension(
cpu_samples, contention_ms, io_ms, gc_ms, park_ms
),
"cpu_bottlenecks": cpu_bottlenecks[:5],
"contention_sites": contention["contention_sites"][:5],
"io_sites": io["io_sites"][:5],
"gc_summary": {
"total_pause_ms": gc_ms,
"event_count": gc["gc_event_count"],
"top_allocators": gc["allocation_pressure"][:5],
},
"parking_sites": [
{
"method": method,
"total_ms": round(ns / 1_000_000, 1),
}
for method, ns in top_park
],
}
def _dominant_dimension(
cpu_samples: int,
contention_ms: float,
io_ms: float,
gc_ms: float,
park_ms: float,
) -> str:
"""Guess which dimension dominates wall-clock time."""
# Rough heuristic: 1 CPU sample ≈ 20ms (JFR default interval)
cpu_est_ms = cpu_samples * 20
dims = {
"CPU": cpu_est_ms,
"lock_contention": contention_ms,
"IO": io_ms,
"GC": gc_ms,
"parking_sleep": park_ms,
}
if not any(dims.values()):
return "unknown (insufficient data)"
dominant = max(dims, key=lambda k: dims[k])
dominant_val = dims[dominant]
total = sum(dims.values())
pct = round(dominant_val / total * 100) if total else 0
return f"{dominant} ({pct}% of estimated wall time)"
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _parse_duration_ns(duration_str: Any) -> int:
"""Parse a JFR duration value to nanoseconds."""
if isinstance(duration_str, (int, float)):
return int(duration_str)
if not isinstance(duration_str, str) or not duration_str:
return 0
s = duration_str.strip()
try:
if s.endswith(" ns"):
return int(float(s[:-3]))
if s.endswith(" us"):
return int(float(s[:-3]) * 1_000)
if s.endswith(" ms"):
return int(float(s[:-3]) * 1_000_000)
if s.endswith(" s"):
return int(float(s[:-2]) * 1_000_000_000)
return int(float(s))
except ValueError:
return 0

View file

@ -0,0 +1,87 @@
"""Render call graphs as structured output for LLM consumption."""
from __future__ import annotations
from typing import Any
def render_hotpath_text(path: list[dict[str, Any]]) -> str:
"""Render the hotpath as a human-readable chain."""
if not path:
return "No hotpath detected."
parts: list[str] = []
for step in path:
method = _short_name(step["method"])
pct = step["total_pct"]
marker = "" if step["is_source"] else " [external]"
parts.append(f"{method} ({pct}%){marker}")
return " -> ".join(parts)
def render_bottleneck_text(
bottlenecks: list[dict[str, Any]],
) -> str:
"""Render bottlenecks as a ranked text report."""
if not bottlenecks:
return "No bottlenecks detected."
lines: list[str] = []
for i, b in enumerate(bottlenecks, 1):
lines.append(
f"{i}. {b['method']} "
f"[self: {b['self_pct']}%, total: {b['total_pct']}%, "
f"{b['self_samples']} samples]"
)
if b["callers"]:
caller_parts = [
f"{c['method']} ({c['samples']}x)"
for c in b["callers"]
]
lines.append(f" <- called by: {', '.join(caller_parts)}")
if b["callees"]:
callee_parts = [
f"{c['method']} ({c['samples']}x)"
for c in b["callees"]
]
lines.append(f" -> calls: {', '.join(callee_parts)}")
lines.append("")
return "\n".join(lines)
def render_analysis(
graph: dict[str, Any],
hotpath: list[dict[str, Any]],
bottlenecks: list[dict[str, Any]],
) -> dict[str, Any]:
"""Render the full analysis as a structured dict for MCP tool output."""
return {
"summary": {
"total_samples": graph["total_samples"],
"total_methods": len(graph["nodes"]),
"total_edges": len(graph["edges"]),
"source_methods": sum(
1
for n in graph["nodes"].values()
if n["is_source"]
),
},
"hotpath": render_hotpath_text(hotpath),
"hotpath_detail": hotpath,
"bottlenecks": render_bottleneck_text(bottlenecks),
"bottleneck_detail": bottlenecks,
"top_edges": graph["edges"][:20],
}
def _short_name(qualified: str) -> str:
"""Shorten com.example.foo.Bar.method to Bar.method."""
parts = qualified.rsplit(".", 2)
if len(parts) >= 2:
return f"{parts[-2]}.{parts[-1]}"
return qualified

View file

@ -0,0 +1,303 @@
"""Codeflash MCP server — profiling and analysis tools for Claude."""
from __future__ import annotations
import shutil
from mcp.server.fastmcp import FastMCP
from ._jfr_parser import (
build_call_graph,
find_bottlenecks,
find_hotpath,
parse_jfr,
parse_jfr_contention,
parse_jfr_gc,
parse_jfr_io,
parse_jfr_wall,
)
from ._renderer import render_analysis
mcp = FastMCP("codeflash")
def _find_jfr_cmd() -> str:
"""Locate the jfr CLI tool."""
import os
jfr = shutil.which("jfr")
if jfr:
return jfr
for jdk_version in ("21", "17"):
path = (
f"/Library/Java/JavaVirtualMachines/openjdk-{jdk_version}.jdk"
f"/Contents/Home/bin/jfr"
)
if os.path.isfile(path):
return path
return "jfr"
@mcp.tool()
def analyze_jfr(
jfr_path: str,
source_package: str = ".",
top_n: int = 15,
stack_depth: int = 64,
) -> dict:
"""Analyze a JFR file for CPU bottlenecks via execution sample call graph.
Builds a weighted call graph from CPU samples, finds the hottest
execution path, and ranks bottleneck methods by self-time.
Args:
jfr_path: Path to the .jfr recording file.
source_package: Java package prefix for "your code" (e.g. "org.apache.kafka"). Use "." for all.
top_n: Number of bottleneck methods to return.
stack_depth: Maximum stack depth to extract.
"""
jfr_cmd = _find_jfr_cmd()
parsed = parse_jfr(
jfr_path,
source_package=source_package,
stack_depth=stack_depth,
jfr_cmd=jfr_cmd,
)
graph = build_call_graph(parsed)
hotpath = find_hotpath(graph)
source_only = source_package != "."
bottlenecks = find_bottlenecks(
graph, top_n=top_n, source_only=source_only
)
if not bottlenecks and source_only:
bottlenecks = find_bottlenecks(
graph, top_n=top_n, source_only=False
)
return render_analysis(graph, hotpath, bottlenecks)
@mcp.tool()
def analyze_jfr_contention(
jfr_path: str,
source_package: str = ".",
top_n: int = 15,
) -> dict:
"""Analyze lock contention from a JFR recording.
Finds which threads are blocked waiting for monitors, on which locks,
for how long, and which code paths trigger the contention.
Use when: throughput is low despite low CPU usage, threads appear
to stall, or you suspect synchronized blocks are bottlenecks.
Args:
jfr_path: Path to the .jfr recording file.
source_package: Java package prefix to filter by.
top_n: Number of contention sites to return.
"""
return parse_jfr_contention(
jfr_path, source_package, top_n, _find_jfr_cmd()
)
@mcp.tool()
def analyze_jfr_io(
jfr_path: str,
source_package: str = ".",
top_n: int = 15,
) -> dict:
"""Analyze file and network I/O bottlenecks from a JFR recording.
Finds which file reads/writes and socket operations are slowest,
how much data they move, and which code paths trigger them.
Use when: wall-clock time is high but CPU is low, disk or network
latency is suspected, or you see threads spending time in I/O waits.
Args:
jfr_path: Path to the .jfr recording file.
source_package: Java package prefix to filter by.
top_n: Number of I/O sites to return.
"""
return parse_jfr_io(
jfr_path, source_package, top_n, _find_jfr_cmd()
)
@mcp.tool()
def analyze_jfr_gc(
jfr_path: str,
source_package: str = ".",
top_n: int = 15,
) -> dict:
"""Analyze GC pressure and pause times from a JFR recording.
Shows GC pause durations and frequency, groups by GC type,
and identifies which allocation sites drive GC pressure.
Use when: you see latency spikes, GC logs show long pauses,
or memory allocation rate is high.
Args:
jfr_path: Path to the .jfr recording file.
source_package: Java package prefix to filter allocation sites.
top_n: Number of allocation sites to return.
"""
return parse_jfr_gc(
jfr_path, source_package, top_n, _find_jfr_cmd()
)
@mcp.tool()
def analyze_jfr_wall(
jfr_path: str,
source_package: str = ".",
top_n: int = 10,
) -> dict:
"""Full wall-clock breakdown combining CPU, locks, I/O, GC, and parking.
This is the most comprehensive analysis tool. It answers: "of the total
time the user waited, how much was CPU, how much was lock contention,
how much was I/O, how much was GC, and how much was thread parking?"
Use this FIRST to determine which dimension dominates, then drill
into specific tools (analyze_jfr for CPU, analyze_jfr_contention
for locks, etc.) for detailed analysis.
Args:
jfr_path: Path to the .jfr recording file.
source_package: Java package prefix for "your code".
top_n: Number of sites per dimension to return.
"""
return parse_jfr_wall(
jfr_path, source_package, top_n, _find_jfr_cmd()
)
@mcp.tool()
def analyze_jfr_allocations(
jfr_path: str,
source_package: str = ".",
top_n: int = 15,
) -> dict:
"""Analyze heap allocation hotspots from a JFR recording.
Ranks methods by total bytes allocated, shows which object types
are being created, and identifies the code driving memory pressure.
Args:
jfr_path: Path to the .jfr recording file.
source_package: Java package prefix to filter by.
top_n: Number of allocation sites to return.
"""
from ._jfr_parser import _extract_stack, _run_jfr, _top_source_frame
jfr_cmd = _find_jfr_cmd()
events_str = (
"jdk.ObjectAllocationInNewTLAB,"
"jdk.ObjectAllocationOutsideTLAB"
)
events = _run_jfr(jfr_path, events_str, jfr_cmd, stack_depth=16)
from collections import defaultdict
alloc_sites: dict[str, dict] = defaultdict(
lambda: {"bytes": 0, "count": 0, "types": set()}
)
for event in events:
values = event.get("values", {})
alloc_size = values.get("allocationSize", 0)
obj_class = (
values.get("objectClass", {}).get("name", "unknown")
)
frames = _extract_stack(event, source_package)
source_frame = _top_source_frame(frames)
site = alloc_sites[source_frame]
site["bytes"] += alloc_size
site["count"] += 1
if len(site["types"]) < 5:
site["types"].add(obj_class)
ranked = sorted(
alloc_sites.items(),
key=lambda x: x[1]["bytes"],
reverse=True,
)[:top_n]
total_bytes = sum(s["bytes"] for _, s in ranked)
return {
"total_alloc_bytes": total_bytes,
"total_alloc_mb": round(total_bytes / 1_048_576, 1),
"allocation_sites": [
{
"method": method,
"bytes": info["bytes"],
"mb": round(info["bytes"] / 1_048_576, 2),
"pct": (
round(info["bytes"] / total_bytes * 100, 1)
if total_bytes
else 0
),
"count": info["count"],
"top_types": list(info["types"]),
}
for method, info in ranked
],
}
@mcp.tool()
def discover_optimization_flows(
jfr_path: str,
source_package: str = ".",
max_flows: int = 20,
) -> dict:
"""Discover and rank ALL optimization flows from a JFR recording.
This is the starting point for systematic optimization. It combines
CPU, lock contention, I/O, GC, and allocation analysis into a single
ranked worklist like the Python CLI's function list, but for Java
code paths.
Each flow is an independent optimization target with:
- method: the bottleneck method
- dimension: what kind of bottleneck (CPU, lock, IO, GC)
- impact_ms: estimated wall-clock impact
- root_cause: why it's slow
- action: suggested fix approach
- status: pending/in_progress/done/skipped
Call this ONCE after profiling, then iterate through flows one by one.
After fixing a flow, re-profile and call this again to see updated ranks.
Args:
jfr_path: Path to the .jfr recording file.
source_package: Java package prefix for "your code".
max_flows: Maximum number of flows to return.
"""
from ._flow_discovery import discover_flows, render_flow_list
flows = discover_flows(
jfr_path, source_package, max_flows, _find_jfr_cmd()
)
return {
"total_flows": len(flows),
"flow_list_text": render_flow_list(flows),
"flows": flows,
}
def main() -> None:
"""Run the MCP server."""
mcp.run()
if __name__ == "__main__":
main()

View file

@ -9,6 +9,10 @@
"license": "BSL-1.1",
"keywords": ["optimization", "performance", "profiling", "python", "javascript", "typescript"],
"mcpServers": {
"codeflash": {
"command": "uv",
"args": ["run", "--directory", "${CLAUDE_PLUGIN_ROOT}/../packages/codeflash-mcp", "codeflash-mcp"]
},
"context7": {
"type": "http",
"url": "https://mcp.context7.com/mcp",

View file

@ -50,6 +50,24 @@ if [ -f ".codeflash/setup.md" ]; then
[ -n "$SETUP" ] && STATE="${STATE}\nSetup:\n${SETUP}\n"
fi
# Strategy plan (critical for long sessions — tells the agent what to do next)
if [ -f ".codeflash/strategy-plan.md" ]; then
PLAN=$(head -30 ".codeflash/strategy-plan.md" 2>/dev/null)
[ -n "$PLAN" ] && STATE="${STATE}\nStrategy plan:\n${PLAN}\n"
fi
# Pareto frontier (optimization trajectory — shows what's been achieved)
if [ -f ".codeflash/pareto-frontier.md" ]; then
PARETO=$(cat ".codeflash/pareto-frontier.md" 2>/dev/null)
[ -n "$PARETO" ] && STATE="${STATE}\nPareto frontier:\n${PARETO}\n"
fi
# Learnings from previous sessions
if [ -f ".codeflash/learnings.md" ]; then
LEARN=$(head -20 ".codeflash/learnings.md" 2>/dev/null)
[ -n "$LEARN" ] && STATE="${STATE}\nLearnings:\n${LEARN}\n"
fi
[ -z "$STATE" ] && exit 0
# Output as JSON with systemMessage for the compaction model

View file

@ -68,8 +68,27 @@ fi
# Build session context from existing state
MSG=""
# Detect interrupted session (active but not completed/plateau)
SESSION_STATUS=""
if [ -f "$DATA_DIR/HANDOFF.md" ]; then
MSG="Previous session state found at .codeflash/HANDOFF.md — read it before starting new work."
SESSION_STATUS=$(grep "Session status:" "$DATA_DIR/HANDOFF.md" 2>/dev/null | head -1 | sed 's/.*Session status: *//')
fi
if [ "$SESSION_STATUS" = "active" ]; then
# Session was interrupted — this is the highest priority signal
KEPT=$(grep -c "^.*keep" "$DATA_DIR/results.tsv" 2>/dev/null || echo "0")
TOTAL=$(wc -l < "$DATA_DIR/results.tsv" 2>/dev/null | tr -d ' ' || echo "0")
TOTAL=$((TOTAL - 1)) # subtract header
[ "$TOTAL" -lt 0 ] && TOTAL=0
MSG="INTERRUPTED SESSION DETECTED: A previous optimization session on branch $(git branch --show-current 2>/dev/null) was interrupted (Session status: active). $TOTAL experiments completed ($KEPT kept)."
MSG="$MSG Read .codeflash/HANDOFF.md, .codeflash/results.tsv, and .codeflash/strategy-plan.md to understand where it stopped."
MSG="$MSG Ask the user: 'I found an interrupted optimization session. Would you like me to continue where it left off, or start fresh?'"
if [ -f "$DATA_DIR/pareto-frontier.md" ]; then
MSG="$MSG Pareto frontier at .codeflash/pareto-frontier.md shows the optimization trajectory so far."
fi
elif [ -f "$DATA_DIR/HANDOFF.md" ]; then
MSG="Previous session state found at .codeflash/HANDOFF.md (status: ${SESSION_STATUS:-unknown}) — read it before starting new work."
fi
if [ -f "$DATA_DIR/results.tsv" ]; then
@ -81,6 +100,10 @@ if [ -f "$DATA_DIR/learnings.md" ]; then
MSG="${MSG:+$MSG }Learnings from previous sessions at .codeflash/learnings.md."
fi
if [ -f "$DATA_DIR/strategy-plan.md" ]; then
MSG="${MSG:+$MSG }Strategy plan at .codeflash/strategy-plan.md — check remaining strategies."
fi
[ -z "$MSG" ] && exit 0
cat <<EOF

View file

@ -44,6 +44,9 @@ You are an autonomous concurrency and async performance optimization agent for J
| **Unbounded fan-out** (thousands of async calls, no throttle) | YES | Prevents downstream overload |
| **1-by-1 I/O** (individual inserts/sends that could batch) | YES | 5-10x throughput with smart batching |
| **No retry backoff** (immediate retries on transient failure) | YES | Prevents retry storms |
| **Spin-wait on shared core** (busy-spin stealing CPU from co-located threads) | YES | 2-10x throughput |
| **Thread topology mismatch** (more engine threads than cores, wrong spin level) | YES | 50-200%+ throughput |
| **Queue between adjacent pipeline stages** (BlockingQueue where ring buffer fits) | YES | 2-5x throughput |
| **Already concurrent with good bounds** | **Skip** | -- |
### Top Antipatterns
@ -55,6 +58,9 @@ You are an autonomous concurrency and async performance optimization agent for J
- `ConcurrentHashMap.get()` then put() -> `computeIfAbsent()` (race condition between get and put)
- `Collections.synchronizedMap` -> `ConcurrentHashMap` (global lock -> lock striping)
- Blocking I/O in platform thread pool -> virtual threads JDK 21+ (200 threads at 1MB -> 10K at 1KB)
- Busy-spin on shared core (`while (!cond) {}`) -> spin-then-yield with `Thread.onSpinWait()` (2-10x throughput, frees core for other threads)
- N engine threads on M<N cores -> reduce to M engines with core pinning (eliminates context switches, 50-200%+ throughput)
- `BlockingQueue` between pipeline stages -> pre-allocated ring buffer / Disruptor (zero allocation, no lock contention)
**MEDIUM impact:**
- `ReentrantLock` for read-heavy -> `StampedLock` optimistic read (avoids lock acquisition entirely)
@ -68,6 +74,8 @@ You are an autonomous concurrency and async performance optimization agent for J
- Individual DB inserts in loop -> smart batching with size/time flush (amortizes round-trip + commit overhead, 5-10x gain)
- Immediate retry on failure -> exponential backoff with `delayedExecutor` (prevents retry storms against recovering services)
- Non-final fields on shared objects -> `final` fields / `record` / immutable DTOs (safe publication without synchronization)
- Spin count too high/low in adaptive spinners -> tune to 95th percentile of observed wait times (right-size CPU vs latency tradeoff)
- Thread.yield() in tight loop -> Thread.onSpinWait() (JDK 9+, x86 PAUSE hint reduces power and improves SMT throughput)
## Reasoning Checklist
@ -161,7 +169,7 @@ Update `.codeflash/results.tsv` AND `.codeflash/HANDOFF.md` immediately after ev
- Already uses optimal lock granularity
- Limited by Amdahl's law (serial fraction dominates)
Strategy rotation: lock elimination -> parallelization -> thread pool tuning -> virtual thread migration -> lock-free structures -> architectural restructuring
Strategy rotation: lock elimination -> parallelization -> thread pool tuning -> spin-wait tuning -> thread topology reconfiguration -> virtual thread migration -> lock-free structures -> ring buffer / Disruptor replacement -> architectural restructuring
## Results Schema
@ -185,6 +193,7 @@ For code examples, virtual thread migration guide, JMH concurrency templates, an
- **`../references/e2e-benchmarks.md`** -- JMH-based E2E comparison using git worktrees, min-score comparison
- **`../references/async/guide.md`** -- Lock hierarchies, virtual threads, CompletableFuture, structured concurrency, thread pool sizing
- **`../references/data-structures/guide.md`** -- Concurrent collection selection
- **`../references/topology/guide.md`** -- Thread topology optimization: spin-wait strategy ladder, topology reconfiguration patterns, LMAX/Disruptor, core pinning, engine consolidation
## Session End

View file

@ -22,11 +22,19 @@ description: >
color: purple
memory: project
tools: ["Read", "Edit", "Write", "Bash", "Grep", "Glob", "Agent", "WebFetch", "SendMessage", "TeamCreate", "TeamDelete", "TaskCreate", "TaskList", "TaskUpdate", "mcp__context7__resolve-library-id", "mcp__context7__query-docs"]
tools: ["Read", "Edit", "Write", "Bash", "Grep", "Glob", "Agent", "WebFetch", "SendMessage", "TeamCreate", "TeamDelete", "TaskCreate", "TaskList", "TaskUpdate", "mcp__context7__resolve-library-id", "mcp__context7__query-docs", "mcp__codeflash__analyze_jfr", "mcp__codeflash__analyze_jfr_allocations", "mcp__codeflash__analyze_jfr_contention", "mcp__codeflash__analyze_jfr_io", "mcp__codeflash__analyze_jfr_gc", "mcp__codeflash__analyze_jfr_wall", "mcp__codeflash__discover_optimization_flows"]
---
**Read `${CLAUDE_PLUGIN_ROOT}/references/shared/agent-base-protocol.md` at session start** for shared operational rules.
**CRITICAL — POST-COMPACTION RECOVERY**: If you just experienced context compaction (you don't remember recent experiments), IMMEDIATELY read these files before doing ANYTHING else:
1. `.codeflash/HANDOFF.md` — your session state (branch, experiments, what to do next)
2. `.codeflash/strategy-plan.md` — your remaining strategies and execution order
3. `.codeflash/results.tsv` — your experiment history (what worked, what didn't)
4. `.codeflash/pareto-frontier.md` — your optimization trajectory
5. `.codeflash/learnings.md` — insights from this and previous sessions
Then continue the experiment loop from where you left off. Do NOT restart from scratch. Do NOT re-profile unless >3 KEEPs have happened since your last profile.
You are the primary optimization agent for Java/Kotlin. You profile across ALL performance dimensions, identify how bottlenecks interact across domains, and autonomously revise your strategy based on profiling feedback.
**You are the default optimizer.** The router sends all requests to you unless the user explicitly asked for a single domain. You dispatch domain-specialist agents (codeflash-java-cpu, codeflash-java-memory, codeflash-java-async, codeflash-java-structure) for targeted single-domain work when profiling reveals it's appropriate.
@ -58,6 +66,9 @@ These are the interactions that single-domain agents miss. This is your core adv
| **Hibernate N+1 -> CPU + Async + Memory** | CPU in Hibernate engine; sequential JDBC | JOIN FETCH, @EntityGraph, batch fetch |
| **Large ResultSet -> GC-driven CPU spikes** | Large list in heap; GC during processing | Cursor pagination, streaming setFetchSize |
| **Library overhead -> CPU ceiling** | >15% cumtime in external library code; domain agents plateau citing "external library" | Audit actual usage surface, implement focused JDK stdlib replacement |
| **Spin-wait strategy mismatch -> CPU waste** | High CPU% in `Thread.yield()` or busy-wait loops; throughput plateaus | Right-size spin: busy-spin -> yield -> park -> queue based on contention level (Topology) |
| **Thread topology over-provisioning -> contention** | More matching/risk threads than cores; lock contention in Disruptor/LMAX | Reduce thread count, pin to cores, consolidate engine loops (Topology) |
| **Allocation rate -> throughput ceiling** | JFR `ObjectAllocationInNewTLAB` hotspots in matching loop; GC pauses proportional to throughput | Pre-allocate, object pooling, flyweight patterns (Memory+CPU) |
## Library Boundary Breaking
@ -77,6 +88,55 @@ All three conditions must hold: (1) >15% CPU in library internals, (2) domain ag
**Read `../references/library-replacement.md`** for the full assessment methodology, replacement tables, and verification requirements.
## Thread Topology & Spin-Wait Strategies
High-performance systems (matching engines, message brokers, event processors) often have a fixed thread topology — a set of dedicated threads with specific roles (matching engines, risk engines, sequencers, journalers). The topology is as important as the code running on each thread.
**When to reconfigure topology:**
- Profiling shows >20% CPU in spin-wait or parking across engine threads
- Thread count exceeds available physical cores (causes involuntary context switches)
- Multiple engine threads contend on the same lock or data structure
- JFR shows high `jdk.ThreadPark` or `jdk.JavaMonitorWait` between engine threads
### Spin-Wait Strategy Ladder
Each level trades latency for CPU efficiency. Profile to find the right level for each wait point:
| Strategy | Latency | CPU cost | When to use |
|----------|---------|----------|-------------|
| **Busy-spin** (`while (!condition) {}`) | <1μs | 100% core | Ultra-low-latency hot path, dedicated core |
| **Busy-spin + `Thread.onSpinWait()`** (JDK 9+) | <1μs | ~80% core | Same, with x86 PAUSE hint to save power |
| **Yield spin** (`Thread.yield()` in loop) | 1-10μs | Variable | Moderate contention, shared cores |
| **Timed park** (`LockSupport.parkNanos()`) | 10-50μs | ~0% idle | Infrequent events, batch processing |
| **Blocking queue** (`BlockingQueue.take()`) | 50-500μs | ~0% idle | Background processing, I/O-bound consumers |
### Topology Patterns
| Pattern | Description | When to apply |
|---------|-------------|---------------|
| **Reduce thread count** | N engines -> fewer engines on fewer cores | Threads > physical cores, context switch overhead |
| **Pin threads to cores** | `taskset` / `Thread.setAffinity()` via JNI | Cache thrashing between engine threads |
| **Consolidate engine loops** | Merge 2+ engines into 1 with batched processing | Engines share data, sequential dependency |
| **Separate read/write paths** | Dedicated threads for reads vs writes | Read-heavy with occasional writes (LMAX pattern) |
| **Event-loop + worker pool** | Single sequencer -> fan-out to workers | Ordering required on input, parallelism on processing |
### Profiling Thread Topology
```bash
# Count active engine threads and their CPU consumption
jfr print --events jdk.ExecutionSample /tmp/codeflash-profile.jfr 2>/dev/null | \
grep "thread:" | sort | uniq -c | sort -rn | head -20
# Detect spin-wait CPU waste
jfr print --events jdk.ExecutionSample /tmp/codeflash-profile.jfr 2>/dev/null | \
grep -E "Thread\.(yield|onSpinWait)|LockSupport\.park|\.spin" | head -20
# Check involuntary context switches (OS-level)
cat /proc/<pid>/status 2>/dev/null | grep -i "voluntary\|nonvoluntary"
```
**Read `../references/topology/guide.md`** for the full thread topology optimization methodology, LMAX/Disruptor patterns, and spin-wait tuning techniques.
## Self-Directed Profiling
You MUST profile before making any code changes. The unified profiling script below is your starting point -- run it first, then use deeper tools as needed. Do NOT skip profiling to "just read the code and fix obvious issues."
@ -115,6 +175,104 @@ The script reports: CPU execution hotspots (JFR ExecutionSample), memory allocat
**Choosing what to profile:** Use the test or benchmark that exercises the code path the user cares about. If the user said "make X faster", profile whatever runs X. If they gave a general request, use the project's test suite or a representative benchmark. Do NOT profile `mvn compile` unless the user specifically asked about build/startup time.
### Structured JFR analysis (MANDATORY after profiling)
After running the profiling script, the JFR recording is at `/tmp/codeflash-profile.jfr`. **Do NOT read raw profiler output.** Always use the MCP analysis tools — they parse the JFR file properly and return structured call graphs with bottleneck analysis.
**MCP Visibility Rule**: Before EVERY MCP tool call, print a status line so the user can follow the profiling pipeline:
```
[mcp] <tool_name><what it does and why you're calling it>
```
Examples:
```
[mcp] analyze_jfr_wall → determining dominant dimension (CPU vs lock vs I/O vs GC) to guide drill-down
[mcp] analyze_jfr → CPU is dominant (72%); extracting top-15 CPU hotspot methods
[mcp] analyze_jfr_contention → lock contention at 23%; identifying contested monitors
[mcp] analyze_jfr_allocations → GC pressure detected; finding allocation hotspots driving collection
[mcp] analyze_jfr_io → I/O wait at 15%; identifying slow file/network paths
[mcp] analyze_jfr_gc → GC pauses averaging 45ms; analyzing collector behavior and pause causes
[mcp] discover_optimization_flows → building ranked worklist of optimization targets from profiling data
```
This is NOT optional. Every MCP call must have a visible `[mcp]` line. The user should be able to read these lines and understand the full profiling strategy without looking at the raw tool output.
**Step 1: Wall-clock breakdown FIRST** — determines which dimension dominates:
```
[mcp] analyze_jfr_wall → determining dominant dimension (CPU vs lock vs I/O vs GC) to guide drill-down
analyze_jfr_wall(jfr_path="/tmp/codeflash-profile.jfr", source_package="org.apache.kafka")
```
This returns: CPU vs lock contention vs I/O vs GC vs parking breakdown, the dominant dimension, and top sites per dimension. Use this to decide WHERE to drill in.
**Step 2: Drill into the dominant dimension:**
```
# If CPU-dominated:
[mcp] analyze_jfr → CPU is dominant; extracting hotspot methods and call graph
analyze_jfr(jfr_path="/tmp/codeflash-profile.jfr", source_package="org.apache.kafka", top_n=15)
# If lock-contention-dominated:
[mcp] analyze_jfr_contention → lock contention is dominant; identifying contested monitors and wait sites
analyze_jfr_contention(jfr_path="/tmp/codeflash-profile.jfr", source_package="org.apache.kafka")
# If I/O-dominated:
[mcp] analyze_jfr_io → I/O wait is dominant; identifying slow file/network operations
analyze_jfr_io(jfr_path="/tmp/codeflash-profile.jfr", source_package="org.apache.kafka")
# If GC-dominated:
[mcp] analyze_jfr_gc → GC is dominant; analyzing collector behavior and pause causes
analyze_jfr_gc(jfr_path="/tmp/codeflash-profile.jfr", source_package="org.apache.kafka")
# For allocation pressure (drives GC):
[mcp] analyze_jfr_allocations → allocation pressure detected; finding sites driving GC overhead
analyze_jfr_allocations(jfr_path="/tmp/codeflash-profile.jfr", source_package="org.apache.kafka")
```
These tools return structured data: call graphs with edges, bottleneck methods with callers/callees, lock monitor classes, I/O paths and byte counts, GC pause durations, and allocation sites. Use this to decide what to optimize — the graph shows WHERE time is spent and WHY.
### Flow-Based Iteration (MANDATORY workflow)
This is the core optimization loop. It works like the Python CLI's function-by-function iteration, but for flows (code paths with bottlenecks).
**Step 1: Discover flows** — call ONCE after the initial profiling:
```
[mcp] discover_optimization_flows → building ranked worklist of optimization targets from profiling data
discover_optimization_flows(jfr_path="/tmp/codeflash-profile.jfr", source_package="org.apache.kafka", max_flows=20)
```
This returns a ranked worklist. Each flow has: method, dimension (CPU/lock/IO/GC), impact_ms, root_cause, and suggested action. Create a TaskCreate for each flow.
**Step 2: Iterate through flows, highest impact first:**
```
for each flow in the ranked list:
1. TaskUpdate → mark flow as in_progress
2. Read the source code of the bottleneck method
3. Understand the root cause (use the dimension-specific tool if needed)
4. Implement the fix
5. Run module tests to verify correctness
6. Run JMH benchmark to measure improvement
7. If improved: commit. If not: revert and mark as skipped
8. TaskUpdate → mark flow as done/skipped
9. Move to the next flow
```
**Step 3: Re-profile after every 3-5 fixes:**
```
Re-run the profiling script, then:
[mcp] discover_optimization_flows → re-ranking worklist after recent fixes; previously-hidden bottlenecks may now be visible
discover_optimization_flows(...)
```
The flow list will change: fixed flows disappear, new flows may appear as previously-hidden bottlenecks become visible. Update your task list.
**NEVER skip a flow without trying.** If a flow looks hard, still attempt it. Mark it as skipped only after you've investigated and determined it can't be improved (e.g., already optimal, external library, JIT behavior).
**NEVER stop the loop early.** Continue until all flows are done/skipped, or you hit max-turns. If you finish all flows, re-profile to discover the next tier.
### Manual JFR commands (when the script can't inject flags)
Some build configurations (e.g., Maven surefire with `reuseForks=false`) don't inherit MAVEN_OPTS. Fall back to explicit flag injection:
@ -171,6 +329,93 @@ After the unified profile, cross-reference CPU hotspots with allocation sites an
9. **Correctness?** Thread safety, null handling, exception contracts.
10. **Production context?** Server/CLI/batch/library changes what "improvement" means.
## Behavioral Equivalence Verification
**Correctness is non-negotiable. A 200% speedup that changes behavior is a bug, not an optimization.** Every optimization must pass multi-layer verification BEFORE being considered for KEEP.
### Layer 1: Output Snapshot (MANDATORY — every experiment)
Before ANY code change, capture the function's output on representative inputs:
```bash
# Create a simple test harness that prints outputs in deterministic order
# Run on ORIGINAL code and save output
mvn test -pl <module> -Dtest=<TestClass> 2>&1 | tee /tmp/original-output.txt
# After optimization, run same tests and compare
mvn test -pl <module> -Dtest=<TestClass> 2>&1 | tee /tmp/optimized-output.txt
diff /tmp/original-output.txt /tmp/optimized-output.txt
```
**If outputs differ: DISCARD immediately.** Do not proceed to benchmarking. Do not investigate "maybe it's just ordering" — if outputs changed, the optimization changed behavior.
### Layer 2: Full Test Suite (MANDATORY — every experiment)
```bash
# Run ALL tests, not just the target's tests
mvn test # or ./gradlew test
```
Cross-function optimizations (topology changes, thread consolidation) can break tests in unrelated modules. Always run the full suite.
### Layer 3: Edge Case Verification (MANDATORY for architectural changes)
For cross-function, topology, or concurrency changes, verify edge cases explicitly:
```bash
# Null/empty inputs
# Boundary values (0, 1, Integer.MAX_VALUE, empty collections)
# Concurrent access (if applicable)
# Error paths (exceptions, timeouts, connection failures)
```
Create targeted test cases if the existing suite doesn't cover these. The test cases themselves should be committed as part of the optimization.
### Layer 4: Serialization Safety (MANDATORY when changing types)
If you changed collection types, return types, or data structures:
```bash
# Check if the type is serialized anywhere
grep -rn "Serializable\|ObjectOutputStream\|Jackson\|@JsonProperty\|protobuf\|Kryo" \
--include="*.java" src/ | grep -i "<changed_class>"
# Check if the type crosses module/API boundaries
grep -rn "<changed_class>" --include="*.java" src/ | grep -v "test/"
```
**If the type is serialized or crosses API boundaries**: verify wire compatibility. An `ArrayList``List.of()` change breaks Java serialization. A `HashMap``EnumMap` change breaks Jackson if the map is a JSON field.
### Layer 5: Concurrency Safety (MANDATORY for async/topology changes)
```bash
# Run tests with stress (surfaces race conditions)
mvn test -Dsurefire.rerunFailingTestsCount=5
# If available, run JCStress tests
./gradlew jcstress
# Manual check: any shared mutable state without synchronization?
# Any compound operations on ConcurrentHashMap (get-then-put)?
# Any non-final fields on objects published to other threads?
```
**If any race condition is found: DISCARD immediately.** Race conditions are bugs, not tradeoffs.
### Verification Summary Line
After every experiment, print the verification status:
```
[experiment N] Verification: output=MATCH, tests=PASS(142/142), serialization=SAFE, concurrency=N/A
```
Or on failure:
```
[experiment N] Verification: output=MISMATCH — DISCARD (behavior changed)
```
## Team Orchestration
| Situation | Action |
@ -310,7 +555,7 @@ For each target in the unified target table, list every strategy that could fix
**Memory strategies:** autoboxing elimination, collection right-sizing, object pooling/reuse, cache bounding, leak fix, off-heap migration, escape analysis restructuring, string deduplication
**Async strategies:** lock elimination, parallelization, thread pool tuning, virtual thread migration, lock-free structures, batching/coalescing, executor isolation
**Async strategies:** lock elimination, parallelization, thread pool tuning, virtual thread migration, lock-free structures, batching/coalescing, executor isolation, spin-wait tuning, thread topology reconfiguration, core pinning, engine consolidation
**Structure strategies:** circular dep breaking, static init deferral, class loading optimization, ServiceLoader lazy loading, JPMS module optimization, dead code removal
@ -484,6 +729,76 @@ Also update the shared task list:
- After baseline: `TaskUpdate("Baseline profiling" -> completed)`
- At completion/plateau: `TaskUpdate("Experiment loop" -> completed)`
## Pareto Frontier Tracking
Track a multi-objective Pareto frontier across experiments. Each experiment measures multiple dimensions — a candidate is Pareto-optimal if no other candidate is better in ALL dimensions simultaneously.
### Tracking format
After each experiment (KEEP or DISCARD), update `.codeflash/pareto-frontier.md`:
```markdown
## Pareto Frontier — <date>
| Experiment | Perf throughput | Medium throughput | Latency p99 | Memory | GC pause | Status |
|-----------|-----------------|-------------------|-------------|--------|----------|--------|
| Baseline | 1.23 MT/s | 0.43 MT/s | 12ms | 450 MiB | 800ms | reference |
| Exp 3: Group spin 10K | 2.26 MT/s | 0.99 MT/s | 8ms | 440 MiB | 600ms | KEEP |
| Exp 7: CPU-aware tuning | 2.58 MT/s | 1.23 MT/s | 6ms | 420 MiB | 400ms | KEEP (frontier) |
| Exp 12: Empty-set short-circuit | 2.86 MT/s | 1.24 MT/s | 5ms | 415 MiB | 380ms | KEEP (frontier) |
```
### Decision rules
- **KEEP** if the experiment improves ANY frontier dimension without regressing others below the previous frontier point
- **KEEP with tradeoff note** if it improves one dimension but regresses another — document the tradeoff
- The frontier shows the optimization path — use it to identify which dimensions still have headroom
- After each KEEP, re-assess which dimension has the most remaining headroom and prioritize strategies targeting it
- Print `[pareto] Frontier updated: <dimensions improved>, headroom remaining: <dimensions with gap to theoretical>`
## Extended Session Protocol
This agent is designed for long-running sessions (10-15+ hours). Standard plateau detection is too aggressive for deep optimization — override with these extended rules.
### Session Checkpointing
**Every 2 hours** (or every 5 KEEPs, whichever comes first):
1. Update `.codeflash/HANDOFF.md` with full session state — this is your crash recovery mechanism
2. Update `.codeflash/pareto-frontier.md` with the current frontier
3. Update `.codeflash/strategy-plan.md` with remaining strategies
4. Commit all `.codeflash/` state files: `git add .codeflash/ && git commit -m "chore: session checkpoint"`
5. Print `[checkpoint] <N> experiments, <K> keeps, <elapsed time>, frontier: <best per dimension>`
### Extended Plateau Resistance
In extended sessions, DO NOT declare plateau after just 3 consecutive discards. Instead:
1. **3 consecutive discards**: Switch strategy within the current dimension (normal rotation)
2. **5 consecutive discards**: Re-profile from scratch, rebuild the unified target table, look for second-order effects that previous KEEPs may have revealed
3. **8 consecutive discards**: Try architectural/topology changes — these are the high-risk, high-reward moves that produce step-function improvements (like Kimi K2.6's thread topology change from 4ME+2RE to 2ME+1RE)
4. **12 consecutive discards across ALL dimensions and strategies**: NOW declare plateau — but first check if the Pareto frontier has any dimension with >10% theoretical headroom. If so, focus there
5. **Only declare FINAL plateau when**: All strategies exhausted AND re-profiling shows no targets above threshold AND Pareto frontier shows <5% headroom in all dimensions
### Compaction Recovery
When context is compacted mid-session:
1. Read `.codeflash/HANDOFF.md` — this has your full session state
2. Read `.codeflash/pareto-frontier.md` — this has your optimization trajectory
3. Read `.codeflash/strategy-plan.md` — this has remaining work
4. Read `.codeflash/results.tsv` — this has experiment history
5. Re-profile the current state (the code has changed since your last profile)
6. Continue from where you left off — do NOT restart from scratch
### Session Continuation
If the session was interrupted (Claude Code stopped, context limit, timeout):
1. The router agent checks `.codeflash/HANDOFF.md` for `Session status: active`
2. If active, the router re-launches this agent with the full HANDOFF context
3. This agent reads all `.codeflash/` state and continues the experiment loop
4. The Pareto frontier and strategy plan survive across interruptions
**ALWAYS set `Session status: active` in HANDOFF.md when entering the experiment loop, and `Session status: completed` or `Session status: plateau` when finishing.**
## Logging Format
Tab-separated `.codeflash/results.tsv`:
@ -514,6 +829,7 @@ commit target_test cpu_baseline_s cpu_optimized_s cpu_speedup mem_baseline_mb me
| Hibernate N+1, JDBC, connection pools | `../references/database/guide.md` |
| JNI, reflection caching, native memory | `../references/native/guide.md` |
| Stuck, teammates stalled, context lost, workflow broken | `${CLAUDE_PLUGIN_ROOT}/references/shared/failure-modes.md` |
| Thread topology, spin-wait, Disruptor patterns, engine thread tuning | `../references/topology/guide.md` |
## Workflow
@ -534,7 +850,12 @@ You are self-sufficient -- handle your own setup before any profiling.
- `.codeflash/learnings.md` -- insights from previous sessions. Pay special attention to cross-domain interaction hints.
- `.codeflash/conventions.md` -- maintainer preferences, guard command. Also check `../conventions.md` for org-level conventions (project-level overrides org-level).
5. **Validate tests.** Run the test command from setup.md (`mvn test` or `./gradlew test`). Note pre-existing failures so you don't waste time on them.
6. **Research dependencies** (optional, skip if context7 unavailable). Read `pom.xml` or `build.gradle` to identify performance-relevant libraries (Jackson, Guava, Apache Commons, Hibernate). For each, use `mcp__context7__resolve-library-id` then `mcp__context7__query-docs` (query: "performance optimization best practices"). Note findings for use during profiling.
6. **Research dependencies** (optional, skip if context7 unavailable). Read `pom.xml` or `build.gradle` to identify performance-relevant libraries (Jackson, Guava, Apache Commons, Hibernate). For each:
```
[mcp] resolve-library-id → resolving Context7 ID for <library> to look up optimization docs
[mcp] query-docs → fetching performance optimization best practices for <library>
```
Use `mcp__context7__resolve-library-id` then `mcp__context7__query-docs` (query: "performance optimization best practices"). Note findings for use during profiling.
### Starting fresh
@ -544,6 +865,11 @@ You are self-sufficient -- handle your own setup before any profiling.
git rev-parse HEAD > .codeflash/base-sha.txt
```
Every JMH comparison during the session uses this SHA as the "original" version.
Set session status for continuation tracking:
```bash
# Mark session as active for crash recovery
sed -i '' 's/Session status:.*/Session status: active/' .codeflash/HANDOFF.md 2>/dev/null || true
```
3. **Initialize `.codeflash/HANDOFF.md`** from `${CLAUDE_PLUGIN_ROOT}/references/shared/handoff-template.md`. Fill in: branch, project root, JDK version, build tool, test command, GC algorithm.
4. **Unified baseline.** Run the unified CPU+Memory+GC profiling.
5. **Identify workflow benchmarks.** Find or create JMH benchmarks that exercise entire workflows (request pipelines, data processing chains, batch jobs), not just individual functions. Check `.codeflash/setup.md` for existing JMH infrastructure. If only micro-benchmarks exist, create workflow-level benchmarks that chain the relevant hot-path functions together:
@ -590,6 +916,7 @@ CI mode is triggered when the prompt contains "CI" context (e.g., "This is a CI
1. **Update `.codeflash/HANDOFF.md`:**
- Set Session status to `plateau` or `completed`.
- Update `.codeflash/pareto-frontier.md` with the final frontier state.
- Fill in Stop Reason: why stopped, what was tried last, what remains actionable.
- Update Next Steps with concrete recommendations for a future session.
- Update Strategy & Decisions with any pivots made and why.
@ -606,7 +933,21 @@ CI mode is triggered when the prompt contains "CI" context (e.g., "This is a CI
### Codebase insights
- <observation relevant to future sessions>
```
3. Print `[complete] <total experiments, keeps, per-dimension improvements>`.
3. **Generate Pareto frontier chart.** Produce the multi-objective optimization chart:
```bash
python3 "${CLAUDE_PLUGIN_ROOT}/references/pareto-chart.py" \
--dir .codeflash \
--output .codeflash/pareto-chart.png \
--title "Multi-Objective Performance Optimization"
# Save a timestamped copy for historical comparison across sessions
cp .codeflash/pareto-chart.png ".codeflash/pareto-chart-$(date +%Y%m%d-%H%M%S).png" 2>/dev/null || true
```
If matplotlib is not available, skip the chart and note it in `[complete]`. The chart shows the optimization path from baseline through all experiments, with the Pareto frontier, kept/discarded points, and a theoretical ideal marker — similar to the Kimi K2.6 exchange-core chart.
**The chart and `pareto-frontier.md` are preserved across sessions** (not deleted during cleanup) so future sessions can compare their starting point against previous optimization trajectories. When resuming, read the previous `pareto-frontier.md` to see what was already achieved.
4. Print `[complete] <total experiments, keeps, per-dimension improvements>`. Include the chart path if generated: `Chart: .codeflash/pareto-chart.png`.
## Pre-Submit Review

View file

@ -238,6 +238,17 @@ mvn test
```
```
9. **`{{PARETO_CHART}}`** -- If `.codeflash/pareto-chart.png` exists, include it in the PR body:
```markdown
## Optimization Trajectory
![Pareto Frontier](pareto-chart.png)
```
If the chart doesn't exist, generate it:
```bash
python3 "${CLAUDE_PLUGIN_ROOT}/references/pareto-chart.py" \
--dir .codeflash --output .codeflash/pareto-chart.png
```
### Output
Write the filled template to `.codeflash/pr-body-<function_name>.md` so the user can review it before creating the PR.

View file

@ -59,3 +59,61 @@ You are the team lead for Java/Kotlin performance optimization. Your job is to d
| codeflash-java-structure | `../references/structure/` | Class loading, JPMS, static initializer chains, startup time, circular deps |
| codeflash-java-deep (DB targets) | `../references/database/` | JPA/Hibernate N+1, HikariCP connection pooling, query optimization |
| codeflash-java-deep (native targets) | `../references/native/` | JNI overhead, Panama FFI, Vector API, GraalVM native-image, Unsafe migration |
## Session Continuation Protocol
When Claude Code stops mid-session (context limit, timeout, crash), the optimization session state survives in `.codeflash/`. This protocol ensures the user is prompted to continue rather than starting from scratch.
### Detection (runs at session start)
Before routing, check for an interrupted session:
```bash
# Check if a previous session was interrupted (active but not completed)
if [ -f ".codeflash/HANDOFF.md" ]; then
STATUS=$(grep "Session status:" .codeflash/HANDOFF.md | head -1 | sed 's/.*Session status: *//')
fi
```
**If `Session status: active`**: The previous session was interrupted. This takes priority over any new request.
1. Read `.codeflash/HANDOFF.md` to understand where it stopped
2. Read `.codeflash/results.tsv` to see experiment history
3. Read `.codeflash/strategy-plan.md` if it exists
4. Read `.codeflash/pareto-frontier.md` if it exists
5. Tell the user:
```
I found an interrupted optimization session on branch `codeflash/optimize`:
- Experiments completed: <N> (<K> kept, <D> discarded)
- Last activity: <last experiment description from HANDOFF>
- Strategies remaining: <count from strategy-plan>
- Current improvement: <best metrics from pareto-frontier or results.tsv>
Would you like me to continue this session, or start fresh?
```
6. If the user says continue: proceed to **Resume** flow (step 1 of Resume in router-base.md)
7. If the user says start fresh: `git checkout main && git branch -D codeflash/optimize` then proceed to **Start** flow
**If `Session status: completed` or `Session status: plateau`**: The session finished normally. Mention it briefly ("Previous session completed with <N> optimizations") and proceed with whatever the user asked for.
**If no HANDOFF.md exists**: No previous session. Proceed normally.
### Router Coordination for Long Sessions
For sessions expected to run 10+ hours:
1. **Monitor optimizer health**: If the optimizer goes silent for >15 minutes during the experiment loop phase, proactively check:
- Read `.codeflash/results.tsv` — is it still being updated?
- Read `.codeflash/HANDOFF.md` — did it checkpoint recently?
- Ping the optimizer: `SendMessage(to: "optimizer", summary: "Health check", message: "Report status — are you still making progress?")`
2. **Re-launch on death**: If the optimizer dies (no response to health check, agent completed without `[complete]`):
- Read all `.codeflash/` state files
- Re-launch the optimizer with the resume prompt, including ALL state
- Tell the user: "The optimizer was interrupted. Re-launching from the last checkpoint."
3. **Pre-emptive checkpointing reminder**: After relaying every 5th `[experiment]` message, send:
```
SendMessage(to: "optimizer", summary: "Checkpoint reminder",
message: "Checkpoint your session state to .codeflash/ files now.")
```

View file

@ -4,7 +4,7 @@
## Reasoning Checklist
Before writing any code, answer these 12 questions. If you can't answer 3-8 concretely, research more before coding.
Before writing any code, answer these 14 questions. If you can't answer 3-8 concretely, research more before coding.
1. **Pattern**: What concurrency antipattern or missed parallelism? (see Antipattern Categories in the agent prompt)
2. **Hot path?** Confirm with JFR thread profiling or thread dumps -- don't optimize uncontended locks.
@ -22,6 +22,8 @@ Before writing any code, answer these 12 questions. If you can't answer 3-8 conc
10. **API lookup**: Use context7 for correct `StampedLock`, `CompletableFuture`, virtual thread signatures. Get exact API before implementing.
11. **Thread-safety?** Visibility (`volatile`, happens-before), atomicity, ordering. Shared objects need safe publication (`final` fields, `volatile`, `AtomicReference`).
12. **Verify cheaply**: Can you validate with a JMH micro-benchmark at `@Threads(N)` first? Follow the 6-step decision framework in `../micro-benchmark.md`.
13. **Spin-wait level?** If the target involves a spin loop, what level is it at (busy-spin/yield/park/queue)? Is the spin count tuned to the 95th percentile of observed wait times? See `../references/topology/guide.md` for the spin-wait strategy ladder.
14. **Thread topology?** How many dedicated threads? More than physical cores? Is there a topology diagram or thread naming convention? See `../references/topology/guide.md`.
## Domain-Specific Loop Steps
@ -154,7 +156,7 @@ Output matches original? AND no race conditions?
## Strategy Rotation
If 3+ consecutive discards on the same type, switch strategy:
- Lock elimination -> Parallelization -> Thread pool tuning -> Virtual thread migration -> Lock-free structures -> Batching/coalescing -> Architectural restructuring
- Lock elimination -> Parallelization -> Thread pool tuning -> Spin-wait tuning -> Thread topology reconfiguration -> Virtual thread migration -> Lock-free structures -> Ring buffer / Disruptor replacement -> Batching/coalescing -> Architectural restructuring
## Plateau Detection -- Domain-Specific
@ -163,6 +165,8 @@ If 3+ consecutive discards on the same type, switch strategy:
- Already at optimal lock granularity (confirmed via `-prof perfasm`)
- Limited by Amdahl's law (serial fraction dominates -- calculate serial %)
- Limited by hardware (memory bandwidth, cache contention on NUMA)
- Thread topology already optimal (thread count = core count, spin levels tuned per wait point)
- Spin-wait already at correct level for each wait point (confirmed via CPU% measurement)
If top 3 remaining issues are all non-optimizable, **stop and report to user** with what's left and why.
@ -174,6 +178,8 @@ If top 3 remaining issues are all non-optimizable, **stop and report to user** w
| All contention is external (DB, network) | N/A | Report -- infrastructure change needed |
| Serial fraction > 50% | Amdahl limit | Report -- parallelism gains capped |
| JIT already elides remaining locks | Confirmed via perfasm | Stop -- JIT handles it |
| Thread topology | Threads ≤ cores, spin levels tuned | Stop — topology is optimal |
| Spin-wait CPU | <5% total CPU in spin/yield | Not worth pursuing |
## Logging Format

View file

@ -0,0 +1,520 @@
#!/usr/bin/env python3
"""Generate a Pareto frontier scatter plot from optimization session data.
Reads .codeflash/pareto-frontier.md and .codeflash/results.tsv to produce
a multi-objective optimization chart showing the optimization path,
Pareto optimal zone, and per-experiment results.
Usage:
python3 pareto-chart.py [--output chart.png] [--dir .codeflash]
Requirements: matplotlib (pip install matplotlib)
"""
import argparse
import csv
import re
import sys
from pathlib import Path
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
except ImportError:
print(
"ERROR: matplotlib is required. Install with: pip install matplotlib",
file=sys.stderr,
)
sys.exit(1)
def parse_pareto_frontier(path: Path) -> list[dict]:
"""Parse .codeflash/pareto-frontier.md table into records."""
records = []
if not path.exists():
return records
text = path.read_text(encoding="utf-8")
lines = text.strip().split("\n")
header_idx = None
headers = []
for i, line in enumerate(lines):
if line.startswith("|") and "Experiment" in line:
headers = [h.strip() for h in line.split("|")[1:-1]]
header_idx = i
break
if header_idx is None:
return records
for line in lines[header_idx + 2 :]:
if not line.startswith("|"):
continue
cells = [c.strip() for c in line.split("|")[1:-1]]
if len(cells) < len(headers):
continue
record = dict(zip(headers, cells))
records.append(record)
return records
def parse_results_tsv(path: Path) -> list[dict]:
"""Parse .codeflash/results.tsv into records."""
records = []
if not path.exists():
return records
with open(path, encoding="utf-8") as f:
reader = csv.DictReader(f, delimiter="\t")
for row in reader:
records.append(dict(row))
return records
def extract_numeric(value: str) -> float | None:
"""Extract a numeric value from strings like '2.86 MT/s', '450 MiB', '12ms'."""
if not value:
return None
match = re.search(r"([\d.]+)", value)
if match:
return float(match.group(1))
return None
def find_metric_columns(records: list[dict]) -> tuple[str, str]:
"""Auto-detect the two best metric columns for X and Y axes.
Prefers: Perf throughput (X) vs Medium throughput (Y),
falls back to any two numeric columns.
"""
if not records:
return ("", "")
preferred_x = [
"Perf throughput",
"perf_throughput",
"cpu_optimized_s",
"optimized_throughput",
]
preferred_y = [
"Medium throughput",
"medium_throughput",
"latency_p99",
"optimized_latency_p99_ms",
]
keys = list(records[0].keys())
x_col = ""
y_col = ""
for px in preferred_x:
for k in keys:
if px.lower() in k.lower():
x_col = k
break
if x_col:
break
for py in preferred_y:
for k in keys:
if py.lower() in k.lower():
y_col = k
break
if y_col:
break
if not x_col or not y_col:
numeric_cols = []
for k in keys:
if k.lower() in (
"experiment",
"status",
"description",
"pattern",
"commit",
):
continue
vals = [extract_numeric(r.get(k, "")) for r in records]
if any(v is not None for v in vals):
numeric_cols.append(k)
if len(numeric_cols) >= 2:
x_col = x_col or numeric_cols[0]
y_col = y_col or numeric_cols[1]
return (x_col, y_col)
def generate_chart(
records: list[dict],
x_col: str,
y_col: str,
output_path: Path,
title: str = "Multi-Objective Performance Optimization",
):
"""Generate the Pareto frontier scatter plot."""
if not records:
print("No data to plot.", file=sys.stderr)
return False
experiments = []
for r in records:
x = extract_numeric(r.get(x_col, ""))
y = extract_numeric(r.get(y_col, ""))
name = r.get(
"Experiment", r.get("description", r.get("target_test", "?"))
)
status = r.get("Status", r.get("status", "")).lower()
if x is not None and y is not None:
experiments.append(
{"name": name, "x": x, "y": y, "status": status}
)
if len(experiments) < 2:
print(
f"Need at least 2 data points, got {len(experiments)}.",
file=sys.stderr,
)
return False
fig, ax = plt.subplots(figsize=(12, 8))
fig.patch.set_facecolor("#fafafa")
ax.set_facecolor("#fafafa")
baseline = experiments[0]
keeps = [
e
for e in experiments[1:]
if "keep" in e["status"] or "frontier" in e["status"]
]
discards = [e for e in experiments[1:] if "discard" in e["status"]]
others = [
e for e in experiments[1:] if e not in keeps and e not in discards
]
# Pareto optimal zone (upper-right quadrant from best point)
if keeps:
max_x = max(e["x"] for e in keeps) * 1.15
max_y = max(e["y"] for e in keeps) * 1.15
rect = mpatches.FancyBboxPatch(
(max_x * 0.85, max_y * 0.85),
max_x * 0.20,
max_y * 0.20,
boxstyle="round,pad=0.02",
facecolor="#e8f5e9",
edgecolor="#4caf50",
alpha=0.4,
linewidth=1.5,
)
ax.add_patch(rect)
ax.annotate(
"Pareto Optimal Zone",
xy=(max_x * 0.92, max_y * 1.02),
fontsize=9,
color="#2e7d32",
fontstyle="italic",
)
# Theoretical ideal star
if keeps:
ideal_x = max(e["x"] for e in keeps) * 1.08
ideal_y = max(e["y"] for e in keeps) * 1.08
ax.plot(
ideal_x,
ideal_y,
marker="*",
markersize=18,
color="#ffc107",
markeredgecolor="#f57f17",
markeredgewidth=1.2,
zorder=10,
)
ax.annotate(
"Theoretical\nIdeal",
xy=(ideal_x, ideal_y),
xytext=(ideal_x + ideal_x * 0.03, ideal_y + ideal_y * 0.02),
fontsize=8,
color="#f57f17",
fontweight="bold",
bbox=dict(
boxstyle="round,pad=0.3",
facecolor="#fff8e1",
edgecolor="#ffc107",
alpha=0.8,
),
)
# Plot baseline
ax.scatter(
[baseline["x"]],
[baseline["y"]],
c="#1565c0",
s=120,
zorder=5,
edgecolors="white",
linewidths=1.5,
marker="o",
)
ax.annotate(
f"Baseline\n({baseline['x']:.2f}, {baseline['y']:.2f})",
xy=(baseline["x"], baseline["y"]),
xytext=(
baseline["x"] - baseline["x"] * 0.08,
baseline["y"] - baseline["y"] * 0.12,
),
fontsize=8,
fontweight="bold",
color="#1565c0",
arrowprops=dict(arrowstyle="->", color="#1565c0", lw=1.2),
bbox=dict(
boxstyle="round,pad=0.3",
facecolor="white",
edgecolor="#1565c0",
alpha=0.9,
),
)
# Plot discards
if discards:
ax.scatter(
[e["x"] for e in discards],
[e["y"] for e in discards],
c="#bdbdbd",
s=60,
zorder=3,
alpha=0.5,
marker="x",
linewidths=1.5,
)
# Plot keeps (non-frontier)
non_frontier = [e for e in keeps if "frontier" not in e["status"]]
if non_frontier:
ax.scatter(
[e["x"] for e in non_frontier],
[e["y"] for e in non_frontier],
c="#42a5f5",
s=90,
zorder=4,
edgecolors="white",
linewidths=1.2,
marker="o",
)
for e in non_frontier:
ax.annotate(
f"{e['name']}\n({e['x']:.2f}, {e['y']:.2f})",
xy=(e["x"], e["y"]),
xytext=(8, 8),
textcoords="offset points",
fontsize=7,
color="#1565c0",
bbox=dict(
boxstyle="round,pad=0.2",
facecolor="white",
edgecolor="#90caf9",
alpha=0.8,
),
)
# Plot frontier points
frontier = [e for e in keeps if "frontier" in e["status"]]
if frontier:
ax.scatter(
[e["x"] for e in frontier],
[e["y"] for e in frontier],
c="#2e7d32",
s=140,
zorder=6,
edgecolors="white",
linewidths=2,
marker="D",
)
for e in frontier:
ax.annotate(
f"{e['name']}\n({e['x']:.2f}, {e['y']:.2f})",
xy=(e["x"], e["y"]),
xytext=(10, 10),
textcoords="offset points",
fontsize=8,
fontweight="bold",
color="#2e7d32",
bbox=dict(
boxstyle="round,pad=0.3",
facecolor="#e8f5e9",
edgecolor="#4caf50",
alpha=0.9,
),
)
# Optimization path (dashed line through baseline + keeps in order)
path_points = [baseline] + keeps
if len(path_points) >= 2:
ax.plot(
[p["x"] for p in path_points],
[p["y"] for p in path_points],
linestyle="--",
color="#1565c0",
linewidth=1.5,
alpha=0.6,
zorder=2,
)
# Summary box
if keeps:
best = keeps[-1]
x_gain = ((best["x"] - baseline["x"]) / baseline["x"]) * 100
y_gain = ((best["y"] - baseline["y"]) / baseline["y"]) * 100
summary_text = (
f"Optimization Results\n"
f"Perf Gain: +{x_gain:.0f}%\n"
f"Medium Gain: +{y_gain:.0f}%"
)
ax.text(
0.02,
0.98,
summary_text,
transform=ax.transAxes,
fontsize=9,
verticalalignment="top",
bbox=dict(
boxstyle="round,pad=0.5",
facecolor="#e8f5e9",
edgecolor="#4caf50",
alpha=0.9,
),
)
# Labels and formatting
ax.set_xlabel(x_col, fontsize=11, fontweight="bold")
ax.set_ylabel(y_col, fontsize=11, fontweight="bold")
ax.set_title(title, fontsize=14, fontweight="bold", pad=15)
ax.grid(True, alpha=0.3, linestyle="--")
# Legend
legend_elements = [
Line2D(
[0],
[0],
marker="o",
color="w",
markerfacecolor="#1565c0",
markersize=10,
label="Baseline",
),
Line2D(
[0],
[0],
marker="o",
color="w",
markerfacecolor="#42a5f5",
markersize=9,
label="Kept experiments",
),
Line2D(
[0],
[0],
marker="D",
color="w",
markerfacecolor="#2e7d32",
markersize=9,
label="Frontier points",
),
Line2D(
[0],
[0],
marker="x",
color="#bdbdbd",
markersize=8,
label="Discarded",
linestyle="None",
),
Line2D(
[0],
[0],
linestyle="--",
color="#1565c0",
alpha=0.6,
label="Optimization path",
),
]
ax.legend(
handles=legend_elements, loc="lower right", fontsize=8, framealpha=0.9
)
plt.tight_layout()
plt.savefig(
str(output_path),
dpi=150,
bbox_inches="tight",
facecolor=fig.get_facecolor(),
)
plt.close()
print(f"Chart saved to {output_path}")
return True
def main():
parser = argparse.ArgumentParser(
description="Generate Pareto frontier optimization chart"
)
parser.add_argument(
"--output",
"-o",
default=".codeflash/pareto-chart.png",
help="Output PNG path (default: .codeflash/pareto-chart.png)",
)
parser.add_argument(
"--dir",
"-d",
default=".codeflash",
help="Session directory (default: .codeflash)",
)
parser.add_argument(
"--title",
"-t",
default="Multi-Objective Performance Optimization",
help="Chart title",
)
args = parser.parse_args()
session_dir = Path(args.dir)
pareto_path = session_dir / "pareto-frontier.md"
results_path = session_dir / "results.tsv"
records = parse_pareto_frontier(pareto_path)
if not records:
records = parse_results_tsv(results_path)
if not records:
print(
f"No data found in {pareto_path} or {results_path}",
file=sys.stderr,
)
sys.exit(1)
x_col, y_col = find_metric_columns(records)
if not x_col or not y_col:
print(
"Could not detect metric columns for X and Y axes.",
file=sys.stderr,
)
print(f"Available columns: {list(records[0].keys())}", file=sys.stderr)
sys.exit(1)
output = Path(args.output)
output.parent.mkdir(parents=True, exist_ok=True)
success = generate_chart(records, x_col, y_col, output, title=args.title)
if not success:
sys.exit(1)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,336 @@
# Thread Topology & Spin-Wait Optimization for Java
This guide covers thread topology reconfiguration, spin-wait strategy tuning, and high-performance engine patterns. These are architectural optimizations that produce step-function improvements (50-200%+) rather than incremental gains — they change HOW threads are organized, not just what code runs on them.
## When This Guide Applies
Use this guide when profiling reveals:
- >20% CPU time in spin-wait, yield, or park operations
- Thread count exceeds physical core count (involuntary context switches)
- Multiple engine threads contend on shared data structures
- JFR shows high `jdk.ThreadPark` or `jdk.JavaMonitorWait` between dedicated threads
- Throughput plateaus despite per-function optimizations (the bottleneck is the thread architecture)
## Thread Topology Fundamentals
A **thread topology** is the assignment of roles to threads and threads to cores. In high-performance systems (matching engines, message brokers, event processors, trading systems), the topology determines the theoretical throughput ceiling.
### Anatomy of a Thread Topology
```
┌─────────────────────────────────────────────┐
│ Application Thread Topology │
│ │
│ Input → [Sequencer] → [Engine 1] → Output │
│ → [Engine 2] → Output │
│ → [Engine 3] → Output │
│ │
│ [Risk Engine 1] ←→ [Engine 1] │
│ [Risk Engine 2] ←→ [Engine 2] │
│ │
│ [Journaler] ← all engines │
└─────────────────────────────────────────────┘
```
Each box is a dedicated thread. The arrows represent data flow. The topology defines:
1. **How many threads** of each role
2. **What each thread does** (matching, risk checking, journaling, sequencing)
3. **How threads communicate** (shared memory, ring buffers, queues)
4. **Which cores each thread runs on** (affinity)
### Why Topology Matters More Than Code
A matching engine running optimal code on a bad topology (4 engines + 2 risk threads on 4 cores) will be slower than mediocre code on a good topology (2 engines + 1 risk thread on 4 cores with proper affinity). The bad topology forces context switches, cache invalidation, and contention that no code-level optimization can fix.
## Spin-Wait Strategy Ladder
Every wait point in a concurrent system uses one of these strategies. Each trades latency for CPU efficiency. The RIGHT strategy depends on the expected wait duration and whether the core is dedicated.
| Level | Strategy | Code Pattern | Wake Latency | CPU Cost | Best For |
|-------|----------|-------------|--------------|----------|----------|
| 0 | **Busy-spin** | `while (!condition) {}` | <100ns | 100% core | Dedicated core, sub-microsecond events |
| 1 | **Spin + PAUSE** | `while (!cond) { Thread.onSpinWait(); }` | <100ns | ~80% core | JDK 9+, x86 PAUSE hint saves power/SMT |
| 2 | **Spin N then yield** | `for(i=0;i<N;i++) onSpinWait(); Thread.yield()` | 1-10μs | Variable | Shared core, moderate contention |
| 3 | **Spin N then park** | `for(i=0;i<N;i++) onSpinWait(); LockSupport.parkNanos(1)` | 10-50μs | Low | Infrequent events, batch arrival |
| 4 | **Timed park** | `LockSupport.parkNanos(timeout)` | 50-200μs | ~0% | Background processing, polling |
| 5 | **Blocking queue** | `queue.take()` | 50-500μs | ~0% | I/O consumers, work distribution |
| 6 | **Condition + signal** | `condition.await()` + `signal()` | 200μs-1ms | ~0% | Rare events, shutdown coordination |
### Choosing the Right Spin Level
```
Is this a dedicated core (no other threads share it)?
├── YES → Is the expected wait <1μs 99% of the time?
│ ├── YES → Level 0-1 (busy-spin ± PAUSE)
│ └── NO → Level 2 (spin-then-yield)
└── NO → Is latency critical (trading, real-time)?
├── YES → Level 2-3 (spin-then-yield or spin-then-park)
└── NO → Level 4-5 (timed park or blocking queue)
```
### Spin Count Tuning
The `N` in "spin N then yield/park" is critical. Too low = unnecessary park latency. Too high = wasted CPU.
**Measurement approach:**
1. Instrument the spin loop to count iterations before the condition becomes true
2. Run under production-like load
3. Set N to the **95th percentile** of observed spin counts
4. Common values: 100-1000 for spin-then-yield, 1000-10000 for spin-then-park
```java
// Adaptive spin with statistics
int spinCount = 0;
while (!condition.get()) {
if (spinCount < SPIN_LIMIT) {
Thread.onSpinWait();
spinCount++;
} else {
LockSupport.parkNanos(1);
spinCount = 0;
}
}
// Log spinCount for tuning
```
## Topology Reconfiguration Patterns
### Pattern 1: Thread Count Reduction
**Signal**: More dedicated threads than physical cores. `jfr print --events jdk.CPUInformation` shows core count.
**Example**: 4 matching engines + 2 risk engines on a 4-core machine → 2 matching engines + 1 risk engine.
**Why it works**: Eliminates involuntary context switches. Each engine gets a dedicated core with warm L1/L2 cache. No cache line bouncing between engines.
**Measurement**:
```bash
# Before: check context switches
cat /proc/<pid>/status | grep -i "context_switch"
# Count threads per role
jfr print --events jdk.ExecutionSample /tmp/codeflash-profile.jfr | \
grep "thread:" | sort | uniq -c | sort -rn
# After: same metrics, compare throughput/context-switch ratio
```
**Implementation checklist**:
- [ ] Profile to confirm thread count > physical cores
- [ ] Identify which threads can be consolidated (same role = mergeable)
- [ ] Reduce thread count, increase per-thread batch size to compensate
- [ ] Benchmark throughput AND latency (fewer threads = higher per-event latency possible)
- [ ] Verify no deadlock from changed thread dependencies
### Pattern 2: Core Pinning (Thread Affinity)
**Signal**: High L2/L3 cache miss rate. Threads migrate between cores (visible in `perf stat` or JFR).
**Implementation** (Linux):
```bash
# Pin JVM to specific cores
taskset -c 0-3 java -jar app.jar
# Or per-thread via JNI/JNA
# (requires native library — check if project already has one)
```
**Implementation** (Java, via system property):
```java
// Some frameworks support thread affinity
// LMAX Disruptor: AffinityThreadFactory
ThreadFactory factory = new AffinityThreadFactory("engine", AffinityStrategies.DIFFERENT_CORE);
```
### Pattern 3: Engine Consolidation
**Signal**: Two engines process the same data sequentially. Engine A writes → Engine B reads → Engine B processes. The data transfer between them (even via shared memory) adds latency.
**Fix**: Merge A and B into one engine that does both steps. Eliminates the inter-engine data transfer.
**Risk**: The merged engine is more complex. Ensure the combined work fits within the latency budget.
### Pattern 4: Read/Write Path Separation (LMAX Pattern)
**Signal**: One thread handles both reads (queries) and writes (commands). Reads cause cache pollution that slows writes.
**Fix**: Dedicated writer thread (hot path, busy-spin) + dedicated reader threads (warm path, yield-spin). Writer publishes via ring buffer. Readers consume independently.
```
[Writer] → [Ring Buffer] → [Reader 1] (query processor)
→ [Reader 2] (risk calculator)
→ [Reader 3] (journaler)
```
### Pattern 5: Disruptor / Ring Buffer Replacement
**Signal**: Threads communicate via `BlockingQueue` or `ConcurrentLinkedQueue`. High allocation rate from queue node objects. Lock contention on queue head/tail.
**Fix**: Replace with a pre-allocated ring buffer (Disruptor pattern). Zero allocation, mechanical sympathy with CPU cache lines.
```java
// Disruptor-style ring buffer (simplified)
// Pre-allocated array, sequence counters, no locks
long[] ringBuffer = new long[BUFFER_SIZE]; // power of 2
AtomicLong writerSequence = new AtomicLong(-1);
AtomicLong readerSequence = new AtomicLong(-1);
// Writer (single-threaded, no CAS needed if single writer)
void publish(long value) {
long next = writerSequence.get() + 1;
ringBuffer[(int)(next & (BUFFER_SIZE - 1))] = value;
writerSequence.lazySet(next); // StoreStore barrier, cheaper than volatile
}
// Reader (can be multiple, each tracks own sequence)
long read() {
long next = readerSequence.get() + 1;
while (writerSequence.get() < next) {
Thread.onSpinWait(); // spin until writer publishes
}
long value = ringBuffer[(int)(next & (BUFFER_SIZE - 1))];
readerSequence.lazySet(next);
return value;
}
```
## Profiling Thread Topology
### Step 1: Map the Current Topology
```bash
# List all threads and their CPU time
jfr print --events jdk.ThreadCPULoad /tmp/codeflash-profile.jfr 2>/dev/null | head -40
# Thread names reveal roles
jfr print --events jdk.ExecutionSample /tmp/codeflash-profile.jfr | \
grep "thread:" | sed 's/.*thread: //' | sort | uniq -c | sort -rn | head -20
# How many threads are actually running vs waiting
jcmd <pid> Thread.print | grep -c "RUNNABLE"
jcmd <pid> Thread.print | grep -c "WAITING\|TIMED_WAITING\|BLOCKED"
```
### Step 2: Identify Topology Waste
```bash
# CPU time in spin-wait (wasted CPU)
jfr print --events jdk.ExecutionSample /tmp/codeflash-profile.jfr | \
grep -E "onSpinWait|yield|parkNanos|busySpin|spin\(" | wc -l
# Context switches (OS-level)
perf stat -e context-switches -p <pid> -- sleep 10
# Cache misses (indicates poor affinity)
perf stat -e cache-misses,cache-references -p <pid> -- sleep 10
```
### Step 3: Model the Optimal Topology
Before changing code, calculate the theoretical optimal:
1. **Count physical cores** (not hyperthreads): `lscpu | grep "Core(s) per socket"` x sockets
2. **Count dedicated threads** that need a core (engine, sequencer, journaler)
3. **If threads > cores**: reduce threads or share cores with yield-spin
4. **If threads <= cores**: pin each to a dedicated core, use busy-spin
5. **Reserve 1-2 cores** for GC, JIT compilation, OS
### Step 4: Benchmark the Topology Change
Topology changes are high-risk, high-reward. Always benchmark:
1. **Throughput** (primary): messages/second, transactions/second
2. **Latency distribution** (secondary): p50, p99, p99.9 -- topology changes can shift the distribution shape
3. **CPU utilization** (diagnostic): should decrease if you eliminated spin-waste
4. **Context switches** (diagnostic): should decrease if you reduced thread count
## Common Anti-Patterns
### Over-threaded engine
```
ANTI-PATTERN: 8 matching engines on 4 cores
-> 4 engines idle-wait while 4 run
-> OS context-switches between them every 4ms
-> L1/L2 cache flushed on every switch
-> Effective throughput: worse than 4 engines
FIX: Match engine count to physical core count minus GC/JIT reserve
```
### Wrong spin level
```
ANTI-PATTERN: busy-spin on shared core
-> Spin thread steals 100% of core from other threads
-> Other threads starve, throughput drops
-> Higher latency than Thread.yield() paradoxically
FIX: Use spin-then-yield (Level 2) on shared cores
```
### Queue between adjacent pipeline stages
```
ANTI-PATTERN: Stage A -> BlockingQueue -> Stage B (both CPU-bound)
-> Queue allocation overhead: 1 node object per item
-> Lock contention on queue head/tail
-> Cache misses from pointer-chasing in linked list
FIX: Ring buffer or direct handoff (single-writer, single-reader)
```
### Thread pool for fixed-role threads
```
ANTI-PATTERN: Engine threads managed by ExecutorService
-> Pool can resize, reassign work, add overhead
-> Thread names are generic ("pool-1-thread-3")
-> No affinity, no dedicated spin strategy
FIX: Create threads directly with Thread.ofPlatform()
Set name, priority, daemon status, and affinity explicitly
```
## JMH Benchmarking for Topology Changes
Topology changes affect the entire system. Micro-benchmarks are insufficient -- use workflow-level JMH:
```java
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.SECONDS)
@Fork(value = 3, jvmArgs = {"-Xms2g", "-Xmx2g"})
@Warmup(iterations = 5, time = 5)
@Measurement(iterations = 10, time = 5)
public class TopologyBenchmark {
@Param({"2", "4", "8"}) // test different engine counts
int engineCount;
@Param({"1000", "10000"}) // spin counts
int spinLimit;
@Setup(Level.Trial)
public void setup() {
// Configure topology with parameterized engine count
engine = new MatchingEngine(engineCount, spinLimit);
engine.start();
}
@Benchmark
public void matchOrders(Blackhole bh) {
// Submit a batch of orders and measure throughput
bh.consume(engine.processBatch(orderBatch));
}
@TearDown(Level.Trial)
public void teardown() {
engine.shutdown();
}
}
```
Run with:
```bash
java -jar benchmarks.jar "TopologyBenchmark" \
-rf json -rff /tmp/topology-results.json \
-f 3 -wi 5 -i 10 -t 1 # single-threaded benchmark runner; engines are internal
```
Compare across `engineCount` and `spinLimit` parameter combinations to find the optimal topology.

View file

@ -166,6 +166,40 @@ echo "=== MEMORY: Outside-TLAB allocations (large objects) ==="
jfr print --events jdk.ObjectAllocationOutsideTLAB "${JFR_FILE}" 2>/dev/null | \
grep -E "stackTrace:|objectClass:|allocationSize:" | head -40
echo ""
echo "=== MEMORY: Top allocators (aggregated by class) ==="
# Aggregate TLAB allocations by objectClass to show which types dominate
TLAB_AGG=$(jfr print --events jdk.ObjectAllocationInNewTLAB "${JFR_FILE}" 2>/dev/null | \
grep "objectClass:" | sed 's/.*objectClass: //' | sort | uniq -c | sort -rn | head -15)
if [[ -n "${TLAB_AGG}" ]]; then
echo " Type Alloc count"
echo " ────────────────────────────────────────────────────"
echo "${TLAB_AGG}" | while read -r count cls; do
printf " %-40s %s\n" "${cls}" "${count}"
done
else
echo " No TLAB allocation events recorded"
fi
# Estimate allocation rate from TLAB sizes
echo ""
echo "=== MEMORY: Allocation rate estimate ==="
TOTAL_ALLOC_BYTES=$(jfr print --events jdk.ObjectAllocationInNewTLAB "${JFR_FILE}" 2>/dev/null | \
grep "allocationSize:" | grep -oP '\d+' | awk '{sum+=$1} END {printf "%.0f", sum}' 2>/dev/null || echo "0")
OUTSIDE_TLAB_BYTES=$(jfr print --events jdk.ObjectAllocationOutsideTLAB "${JFR_FILE}" 2>/dev/null | \
grep "allocationSize:" | grep -oP '\d+' | awk '{sum+=$1} END {printf "%.0f", sum}' 2>/dev/null || echo "0")
TOTAL_ALLOC=$((TOTAL_ALLOC_BYTES + OUTSIDE_TLAB_BYTES))
WALL_SECONDS=$((WALL_TIME_MS / 1000))
if [[ ${WALL_SECONDS} -gt 0 && ${TOTAL_ALLOC} -gt 0 ]]; then
ALLOC_RATE_MB=$((TOTAL_ALLOC / 1048576 / WALL_SECONDS))
echo " Total sampled allocation: $((TOTAL_ALLOC / 1048576)) MiB"
echo " Allocation rate: ~${ALLOC_RATE_MB} MiB/s"
echo " (Note: JFR samples allocations, actual rate is higher)"
else
echo " Insufficient data to estimate allocation rate"
fi
# --- GC: Collection events ---
echo ""
@ -247,6 +281,46 @@ else
echo " No monitor wait events recorded"
fi
# --- Thread topology analysis ---
echo ""
echo "=== TOPOLOGY: Thread CPU distribution ==="
THREAD_CPU=$(jfr print --events jdk.ExecutionSample "${JFR_FILE}" 2>/dev/null | \
grep "name:" | sed 's/.*name: //' | sed 's/ ".*//' | sort | uniq -c | sort -rn | head -15)
if [[ -n "${THREAD_CPU}" ]]; then
TOTAL_SAMPLES=$(echo "${THREAD_CPU}" | awk '{sum+=$1} END {print sum}')
echo " Thread Samples CPU%"
echo " ──────────────────────────────────────────────────────────"
echo "${THREAD_CPU}" | while read -r count thread; do
if [[ ${TOTAL_SAMPLES} -gt 0 ]]; then
PCT=$((count * 100 / TOTAL_SAMPLES))
printf " %-40s %-10s %s%%\n" "${thread}" "${count}" "${PCT}"
fi
done
echo " Total samples: ${TOTAL_SAMPLES}"
else
echo " No execution samples found"
fi
# Spin-wait detection
echo ""
echo "=== TOPOLOGY: Spin-wait CPU waste ==="
SPIN_SAMPLES=$(jfr print --events jdk.ExecutionSample "${JFR_FILE}" 2>/dev/null | \
grep -cE "onSpinWait|Thread\.yield|busySpin|\.spin\b" 2>/dev/null || echo "0")
PARK_SAMPLES=$(jfr print --events jdk.ExecutionSample "${JFR_FILE}" 2>/dev/null | \
grep -cE "LockSupport\.park|parkNanos" 2>/dev/null || echo "0")
echo " Spin-wait samples: ${SPIN_SAMPLES}"
echo " Park samples: ${PARK_SAMPLES}"
if [[ ${TOTAL_SAMPLES:-0} -gt 0 ]]; then
SPIN_PCT=$((SPIN_SAMPLES * 100 / TOTAL_SAMPLES))
PARK_PCT=$((PARK_SAMPLES * 100 / TOTAL_SAMPLES))
echo " Spin-wait CPU: ~${SPIN_PCT}%"
echo " Park CPU: ~${PARK_PCT}%"
if [[ ${SPIN_PCT} -gt 20 ]]; then
echo " ⚠ HIGH SPIN-WAIT: >20% CPU in spin loops — consider spin-wait strategy tuning"
fi
fi
# --- Summary ---
echo ""
@ -256,6 +330,12 @@ echo "============================================================"
echo " Wall time: ${WALL_TIME_MS}ms"
echo " GC collections: ${GC_COUNT}"
echo " GC total: ${GC_TOTAL_MS}ms"
echo " Sampled allocs: $((TOTAL_ALLOC / 1048576)) MiB"
if [[ ${WALL_SECONDS:-0} -gt 0 && ${TOTAL_ALLOC:-0} -gt 0 ]]; then
echo " Alloc rate: ~${ALLOC_RATE_MB} MiB/s"
fi
echo " Spin-wait CPU: ~${SPIN_PCT:-0}%"
echo " Active threads: $(echo "${THREAD_CPU}" | wc -l | tr -d ' ')"
echo " JFR recording: ${JFR_FILE}"
if [[ ! -f "${BASELINE_PATH}" ]]; then

View file

@ -300,9 +300,9 @@ When the user says "done", "clean up", or "finish session", or when the domain a
1. **Generate changelog.** Before cleaning up, generate `.codeflash/changelog.md` (see "## Changelog Generation" below). For multi-domain sessions, do this after the merge step.
2. **Shut down teammates.** Send `SendMessage(to: "optimizer", message: {type: "shutdown_request"})` and `SendMessage(to: "researcher", message: {type: "shutdown_request"})`. Wait for confirmation. If multiple domain agents are running, shut down each one.
3. **Delete team.** `TeamDelete` to clean up team config and task list.
4. **Preserve** `.codeflash/learnings.md`, `.codeflash/results.tsv`, and `.codeflash/changelog.md` (useful for future sessions and PR creation).
5. **Delete transient files**: `HANDOFF.md`, `setup.md`, `conventions.md`, and any benchmark scripts in `.codeflash/`.
6. If `.codeflash/` is now empty (no learnings, results, or changelog), remove the directory entirely.
4. **Preserve** `.codeflash/learnings.md`, `.codeflash/results.tsv`, `.codeflash/changelog.md`, `.codeflash/pareto-frontier.md`, and `.codeflash/pareto-chart.png` (useful for future sessions, PR creation, and optimization history).
5. **Delete transient files**: `HANDOFF.md`, `setup.md`, `conventions.md`, `strategy-plan.md`, and any benchmark scripts in `.codeflash/`.
6. If `.codeflash/` is now empty (no learnings, results, changelog, or pareto files), remove the directory entirely.
7. Delete `.claude/agent-memory/` if it exists in the project directory (agent memory is per-session, not meant to persist).
## Maintainer Feedback

246
uv.lock
View file

@ -1,5 +1,5 @@
version = 1
revision = 3
revision = 2
requires-python = ">=3.12"
resolution-markers = [
"python_full_version >= '3.14' and sys_platform == 'win32'",
@ -351,10 +351,14 @@ version = "0.1.0"
source = { editable = "packages/codeflash-mcp" }
dependencies = [
{ name = "codeflash-core" },
{ name = "mcp", extra = ["cli"] },
]
[package.metadata]
requires-dist = [{ name = "codeflash-core", editable = "packages/codeflash-core" }]
requires-dist = [
{ name = "codeflash-core", editable = "packages/codeflash-core" },
{ name = "mcp", extras = ["cli"], specifier = ">=1.0.0" },
]
[[package]]
name = "codeflash-python"
@ -1037,6 +1041,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
]
[[package]]
name = "httpx-sse"
version = "0.4.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" },
]
[[package]]
name = "idna"
version = "3.11"
@ -1180,6 +1193,33 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
]
[[package]]
name = "jsonschema"
version = "4.26.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "attrs" },
{ name = "jsonschema-specifications" },
{ name = "referencing" },
{ name = "rpds-py" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" },
]
[[package]]
name = "jsonschema-specifications"
version = "2025.9.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "referencing" },
]
sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
]
[[package]]
name = "junitparser"
version = "5.0.0"
@ -1546,6 +1586,37 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
]
[[package]]
name = "mcp"
version = "1.27.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
{ name = "httpx" },
{ name = "httpx-sse" },
{ name = "jsonschema" },
{ name = "pydantic" },
{ name = "pydantic-settings" },
{ name = "pyjwt", extra = ["crypto"] },
{ name = "python-multipart" },
{ name = "pywin32", marker = "sys_platform == 'win32'" },
{ name = "sse-starlette" },
{ name = "starlette" },
{ name = "typing-extensions" },
{ name = "typing-inspection" },
{ name = "uvicorn", marker = "sys_platform != 'emscripten'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/8b/eb/c0cfc62075dc6e1ec1c64d352ae09ac051d9334311ed226f1f425312848a/mcp-1.27.0.tar.gz", hash = "sha256:d3dc35a7eec0d458c1da4976a48f982097ddaab87e278c5511d5a4a56e852b83", size = 607509, upload-time = "2026-04-02T14:48:08.88Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9c/46/f6b4ad632c67ef35209a66127e4bddc95759649dd595f71f13fba11bdf9a/mcp-1.27.0-py3-none-any.whl", hash = "sha256:5ce1fa81614958e267b21fb2aa34e0aea8e2c6ede60d52aba45fd47246b4d741", size = 215967, upload-time = "2026-04-02T14:48:07.24Z" },
]
[package.optional-dependencies]
cli = [
{ name = "python-dotenv" },
{ name = "typer" },
]
[[package]]
name = "mdit-py-plugins"
version = "0.5.0"
@ -2370,6 +2441,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" },
]
[[package]]
name = "pydantic-settings"
version = "2.14.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pydantic" },
{ name = "python-dotenv" },
{ name = "typing-inspection" },
]
sdist = { url = "https://files.pythonhosted.org/packages/42/98/c8345dccdc31de4228c039a98f6467a941e39558da41c1744fbe29fa5666/pydantic_settings-2.14.0.tar.gz", hash = "sha256:24285fd4b0e0c06507dd9fdfd331ee23794305352aaec8fc4eb92d4047aeb67d", size = 235709, upload-time = "2026-04-20T13:37:40.293Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/01/dd/bebff3040138f00ae8a102d426b27349b9a49acc310fcae7f92112d867e3/pydantic_settings-2.14.0-py3-none-any.whl", hash = "sha256:fc8d5d692eb7092e43c8647c1c35a3ecd00e040fcf02ed86f4cb5458ca62182e", size = 60940, upload-time = "2026-04-20T13:37:38.586Z" },
]
[[package]]
name = "pygls"
version = "2.1.1"
@ -2472,6 +2557,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" },
]
[[package]]
name = "python-multipart"
version = "0.0.27"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/69/9b/f23807317a113dc36e74e75eb265a02dd1a4d9082abc3c1064acd22997c4/python_multipart-0.0.27.tar.gz", hash = "sha256:9870a6a8c5a20a5bf4f07c017bd1489006ff8836cff097b6933355ee2b49b602", size = 44043, upload-time = "2026-04-27T10:51:26.649Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/99/78/4126abcbdbd3c559d43e0db7f7b9173fc6befe45d39a2856cc0b8ec2a5a6/python_multipart-0.0.27-py3-none-any.whl", hash = "sha256:6fccfad17a27334bd0193681b369f476eda3409f17381a2d65aa7df3f7275645", size = 29254, upload-time = "2026-04-27T10:51:24.997Z" },
]
[[package]]
name = "pytokens"
version = "0.4.1"
@ -2501,6 +2595,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c6/78/397db326746f0a342855b81216ae1f0a32965deccfd7c830a2dbc66d2483/pytokens-0.4.1-py3-none-any.whl", hash = "sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de", size = 13729, upload-time = "2026-01-30T01:03:45.029Z" },
]
[[package]]
name = "pywin32"
version = "311"
source = { registry = "https://pypi.org/simple" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" },
{ url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" },
{ url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" },
{ url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" },
{ url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" },
{ url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" },
{ url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" },
{ url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" },
{ url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" },
]
[[package]]
name = "pyyaml"
version = "6.0.3"
@ -2571,6 +2681,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c0/28/26534bed77109632a956977f60d8519049f545abc39215d086e33a61f1f2/pyyaml_ft-8.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:de04cfe9439565e32f178106c51dd6ca61afaa2907d143835d501d84703d3793", size = 171579, upload-time = "2025-06-10T15:32:14.34Z" },
]
[[package]]
name = "referencing"
version = "0.37.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "attrs" },
{ name = "rpds-py" },
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" },
]
[[package]]
name = "requests"
version = "2.33.1"
@ -2620,6 +2744,87 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" },
]
[[package]]
name = "rpds-py"
version = "0.30.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad", size = 375086, upload-time = "2025-11-30T20:22:17.93Z" },
{ url = "https://files.pythonhosted.org/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05", size = 359053, upload-time = "2025-11-30T20:22:19.297Z" },
{ url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763, upload-time = "2025-11-30T20:22:21.661Z" },
{ url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951, upload-time = "2025-11-30T20:22:23.408Z" },
{ url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622, upload-time = "2025-11-30T20:22:25.16Z" },
{ url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492, upload-time = "2025-11-30T20:22:26.505Z" },
{ url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080, upload-time = "2025-11-30T20:22:27.934Z" },
{ url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680, upload-time = "2025-11-30T20:22:29.341Z" },
{ url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589, upload-time = "2025-11-30T20:22:31.469Z" },
{ url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289, upload-time = "2025-11-30T20:22:32.997Z" },
{ url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737, upload-time = "2025-11-30T20:22:34.419Z" },
{ url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120, upload-time = "2025-11-30T20:22:35.903Z" },
{ url = "https://files.pythonhosted.org/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf", size = 223782, upload-time = "2025-11-30T20:22:37.271Z" },
{ url = "https://files.pythonhosted.org/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b", size = 240463, upload-time = "2025-11-30T20:22:39.021Z" },
{ url = "https://files.pythonhosted.org/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e", size = 230868, upload-time = "2025-11-30T20:22:40.493Z" },
{ url = "https://files.pythonhosted.org/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2", size = 374887, upload-time = "2025-11-30T20:22:41.812Z" },
{ url = "https://files.pythonhosted.org/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8", size = 358904, upload-time = "2025-11-30T20:22:43.479Z" },
{ url = "https://files.pythonhosted.org/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4", size = 389945, upload-time = "2025-11-30T20:22:44.819Z" },
{ url = "https://files.pythonhosted.org/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136", size = 407783, upload-time = "2025-11-30T20:22:46.103Z" },
{ url = "https://files.pythonhosted.org/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7", size = 515021, upload-time = "2025-11-30T20:22:47.458Z" },
{ url = "https://files.pythonhosted.org/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2", size = 414589, upload-time = "2025-11-30T20:22:48.872Z" },
{ url = "https://files.pythonhosted.org/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6", size = 394025, upload-time = "2025-11-30T20:22:50.196Z" },
{ url = "https://files.pythonhosted.org/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e", size = 408895, upload-time = "2025-11-30T20:22:51.87Z" },
{ url = "https://files.pythonhosted.org/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d", size = 422799, upload-time = "2025-11-30T20:22:53.341Z" },
{ url = "https://files.pythonhosted.org/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7", size = 572731, upload-time = "2025-11-30T20:22:54.778Z" },
{ url = "https://files.pythonhosted.org/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31", size = 599027, upload-time = "2025-11-30T20:22:56.212Z" },
{ url = "https://files.pythonhosted.org/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95", size = 563020, upload-time = "2025-11-30T20:22:58.2Z" },
{ url = "https://files.pythonhosted.org/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d", size = 223139, upload-time = "2025-11-30T20:23:00.209Z" },
{ url = "https://files.pythonhosted.org/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15", size = 240224, upload-time = "2025-11-30T20:23:02.008Z" },
{ url = "https://files.pythonhosted.org/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1", size = 230645, upload-time = "2025-11-30T20:23:03.43Z" },
{ url = "https://files.pythonhosted.org/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a", size = 364443, upload-time = "2025-11-30T20:23:04.878Z" },
{ url = "https://files.pythonhosted.org/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e", size = 353375, upload-time = "2025-11-30T20:23:06.342Z" },
{ url = "https://files.pythonhosted.org/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000", size = 383850, upload-time = "2025-11-30T20:23:07.825Z" },
{ url = "https://files.pythonhosted.org/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db", size = 392812, upload-time = "2025-11-30T20:23:09.228Z" },
{ url = "https://files.pythonhosted.org/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2", size = 517841, upload-time = "2025-11-30T20:23:11.186Z" },
{ url = "https://files.pythonhosted.org/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa", size = 408149, upload-time = "2025-11-30T20:23:12.864Z" },
{ url = "https://files.pythonhosted.org/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083", size = 383843, upload-time = "2025-11-30T20:23:14.638Z" },
{ url = "https://files.pythonhosted.org/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9", size = 396507, upload-time = "2025-11-30T20:23:16.105Z" },
{ url = "https://files.pythonhosted.org/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0", size = 414949, upload-time = "2025-11-30T20:23:17.539Z" },
{ url = "https://files.pythonhosted.org/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94", size = 565790, upload-time = "2025-11-30T20:23:19.029Z" },
{ url = "https://files.pythonhosted.org/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08", size = 590217, upload-time = "2025-11-30T20:23:20.885Z" },
{ url = "https://files.pythonhosted.org/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27", size = 555806, upload-time = "2025-11-30T20:23:22.488Z" },
{ url = "https://files.pythonhosted.org/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6", size = 211341, upload-time = "2025-11-30T20:23:24.449Z" },
{ url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload-time = "2025-11-30T20:23:25.908Z" },
{ url = "https://files.pythonhosted.org/packages/86/81/dad16382ebbd3d0e0328776d8fd7ca94220e4fa0798d1dc5e7da48cb3201/rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0", size = 362099, upload-time = "2025-11-30T20:23:27.316Z" },
{ url = "https://files.pythonhosted.org/packages/2b/60/19f7884db5d5603edf3c6bce35408f45ad3e97e10007df0e17dd57af18f8/rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be", size = 353192, upload-time = "2025-11-30T20:23:29.151Z" },
{ url = "https://files.pythonhosted.org/packages/bf/c4/76eb0e1e72d1a9c4703c69607cec123c29028bff28ce41588792417098ac/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f", size = 384080, upload-time = "2025-11-30T20:23:30.785Z" },
{ url = "https://files.pythonhosted.org/packages/72/87/87ea665e92f3298d1b26d78814721dc39ed8d2c74b86e83348d6b48a6f31/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f", size = 394841, upload-time = "2025-11-30T20:23:32.209Z" },
{ url = "https://files.pythonhosted.org/packages/77/ad/7783a89ca0587c15dcbf139b4a8364a872a25f861bdb88ed99f9b0dec985/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87", size = 516670, upload-time = "2025-11-30T20:23:33.742Z" },
{ url = "https://files.pythonhosted.org/packages/5b/3c/2882bdac942bd2172f3da574eab16f309ae10a3925644e969536553cb4ee/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18", size = 408005, upload-time = "2025-11-30T20:23:35.253Z" },
{ url = "https://files.pythonhosted.org/packages/ce/81/9a91c0111ce1758c92516a3e44776920b579d9a7c09b2b06b642d4de3f0f/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad", size = 382112, upload-time = "2025-11-30T20:23:36.842Z" },
{ url = "https://files.pythonhosted.org/packages/cf/8e/1da49d4a107027e5fbc64daeab96a0706361a2918da10cb41769244b805d/rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07", size = 399049, upload-time = "2025-11-30T20:23:38.343Z" },
{ url = "https://files.pythonhosted.org/packages/df/5a/7ee239b1aa48a127570ec03becbb29c9d5a9eb092febbd1699d567cae859/rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f", size = 415661, upload-time = "2025-11-30T20:23:40.263Z" },
{ url = "https://files.pythonhosted.org/packages/70/ea/caa143cf6b772f823bc7929a45da1fa83569ee49b11d18d0ada7f5ee6fd6/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65", size = 565606, upload-time = "2025-11-30T20:23:42.186Z" },
{ url = "https://files.pythonhosted.org/packages/64/91/ac20ba2d69303f961ad8cf55bf7dbdb4763f627291ba3d0d7d67333cced9/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f", size = 591126, upload-time = "2025-11-30T20:23:44.086Z" },
{ url = "https://files.pythonhosted.org/packages/21/20/7ff5f3c8b00c8a95f75985128c26ba44503fb35b8e0259d812766ea966c7/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53", size = 553371, upload-time = "2025-11-30T20:23:46.004Z" },
{ url = "https://files.pythonhosted.org/packages/72/c7/81dadd7b27c8ee391c132a6b192111ca58d866577ce2d9b0ca157552cce0/rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed", size = 215298, upload-time = "2025-11-30T20:23:47.696Z" },
{ url = "https://files.pythonhosted.org/packages/3e/d2/1aaac33287e8cfb07aab2e6b8ac1deca62f6f65411344f1433c55e6f3eb8/rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950", size = 228604, upload-time = "2025-11-30T20:23:49.501Z" },
{ url = "https://files.pythonhosted.org/packages/e8/95/ab005315818cc519ad074cb7784dae60d939163108bd2b394e60dc7b5461/rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6", size = 222391, upload-time = "2025-11-30T20:23:50.96Z" },
{ url = "https://files.pythonhosted.org/packages/9e/68/154fe0194d83b973cdedcdcc88947a2752411165930182ae41d983dcefa6/rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb", size = 364868, upload-time = "2025-11-30T20:23:52.494Z" },
{ url = "https://files.pythonhosted.org/packages/83/69/8bbc8b07ec854d92a8b75668c24d2abcb1719ebf890f5604c61c9369a16f/rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8", size = 353747, upload-time = "2025-11-30T20:23:54.036Z" },
{ url = "https://files.pythonhosted.org/packages/ab/00/ba2e50183dbd9abcce9497fa5149c62b4ff3e22d338a30d690f9af970561/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7", size = 383795, upload-time = "2025-11-30T20:23:55.556Z" },
{ url = "https://files.pythonhosted.org/packages/05/6f/86f0272b84926bcb0e4c972262f54223e8ecc556b3224d281e6598fc9268/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898", size = 393330, upload-time = "2025-11-30T20:23:57.033Z" },
{ url = "https://files.pythonhosted.org/packages/cb/e9/0e02bb2e6dc63d212641da45df2b0bf29699d01715913e0d0f017ee29438/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e", size = 518194, upload-time = "2025-11-30T20:23:58.637Z" },
{ url = "https://files.pythonhosted.org/packages/ee/ca/be7bca14cf21513bdf9c0606aba17d1f389ea2b6987035eb4f62bd923f25/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419", size = 408340, upload-time = "2025-11-30T20:24:00.2Z" },
{ url = "https://files.pythonhosted.org/packages/c2/c7/736e00ebf39ed81d75544c0da6ef7b0998f8201b369acf842f9a90dc8fce/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551", size = 383765, upload-time = "2025-11-30T20:24:01.759Z" },
{ url = "https://files.pythonhosted.org/packages/4a/3f/da50dfde9956aaf365c4adc9533b100008ed31aea635f2b8d7b627e25b49/rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8", size = 396834, upload-time = "2025-11-30T20:24:03.687Z" },
{ url = "https://files.pythonhosted.org/packages/4e/00/34bcc2565b6020eab2623349efbdec810676ad571995911f1abdae62a3a0/rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5", size = 415470, upload-time = "2025-11-30T20:24:05.232Z" },
{ url = "https://files.pythonhosted.org/packages/8c/28/882e72b5b3e6f718d5453bd4d0d9cf8df36fddeb4ddbbab17869d5868616/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404", size = 565630, upload-time = "2025-11-30T20:24:06.878Z" },
{ url = "https://files.pythonhosted.org/packages/3b/97/04a65539c17692de5b85c6e293520fd01317fd878ea1995f0367d4532fb1/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856", size = 591148, upload-time = "2025-11-30T20:24:08.445Z" },
{ url = "https://files.pythonhosted.org/packages/85/70/92482ccffb96f5441aab93e26c4d66489eb599efdcf96fad90c14bbfb976/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40", size = 556030, upload-time = "2025-11-30T20:24:10.956Z" },
{ url = "https://files.pythonhosted.org/packages/20/53/7c7e784abfa500a2b6b583b147ee4bb5a2b3747a9166bab52fec4b5b5e7d/rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0", size = 211570, upload-time = "2025-11-30T20:24:12.735Z" },
{ url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" },
]
[[package]]
name = "ruff"
version = "0.15.10"
@ -2728,6 +2933,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e1/e3/c164c88b2e5ce7b24d667b9bd83589cf4f3520d97cad01534cd3c4f55fdb/setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6", size = 1062021, upload-time = "2026-02-06T21:10:37.175Z" },
]
[[package]]
name = "shellingham"
version = "1.5.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
]
[[package]]
name = "six"
version = "1.17.0"
@ -2746,6 +2960,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/d4/59e74daffcb57a07668852eeeb6035af9f32cbfd7a1d2511f17d2fe6a738/smmap-5.0.3-py3-none-any.whl", hash = "sha256:c106e05d5a61449cf6ba9a1e650227ecfb141590d2a98412103ff35d89fc7b2f", size = 24390, upload-time = "2026-03-09T03:43:24.361Z" },
]
[[package]]
name = "sse-starlette"
version = "3.4.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
{ name = "starlette" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e1/9a/f35932a8c0eb6b2287b66fa65a0321df8c84e4e355a659c1841a37c39fdb/sse_starlette-3.4.1.tar.gz", hash = "sha256:f780bebcf6c8997fe514e3bd8e8c648d8284976b391c8bed0bcb1f611632b555", size = 35127, upload-time = "2026-04-26T13:32:32.292Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ff/07/45c21ed03d708c477367305726b89919b020a3a2a01f72aaf5ad941caf35/sse_starlette-3.4.1-py3-none-any.whl", hash = "sha256:6b43cf21f1d574d582a6e1b0cfbde1c94dc86a32a701a7168c99c4475c6bd1d0", size = 16487, upload-time = "2026-04-26T13:32:30.819Z" },
]
[[package]]
name = "stamina"
version = "25.2.0"
@ -2987,6 +3214,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload-time = "2026-01-20T16:01:14.236Z" },
]
[[package]]
name = "typer"
version = "0.25.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "annotated-doc" },
{ name = "click" },
{ name = "rich" },
{ name = "shellingham" },
]
sdist = { url = "https://files.pythonhosted.org/packages/7b/27/ede8cec7596e0041ba7e7b80b47d132562f56ff454313a16f6084e555c9f/typer-0.25.0.tar.gz", hash = "sha256:123eaf9f19bb40fd268310e12a542c0c6b4fab9c98d9d23342a01ff95e3ce930", size = 120150, upload-time = "2026-04-26T08:46:14.767Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9a/72/193d4e586ec5a4db834a36bbeb47641a62f951f114ffd0fe5b1b46e8d56f/typer-0.25.0-py3-none-any.whl", hash = "sha256:ac01b48823d3db9a83c9e164338057eadbb1c9957a2a6b4eeb486669c560b5dc", size = 55993, upload-time = "2026-04-26T08:46:15.889Z" },
]
[[package]]
name = "types-requests"
version = "2.33.0.20260408"