* Add Unstructured engagement report as uv workspace member Three-tier Plotly Dash app (Executive Brief, Engineering Team, Full Detail) with data in JSON, theme constants in theme.py, and Dash production improvements (Google Fonts, clientside callbacks, meta tags). Also: add .playwright-mcp/ to .gitignore, add reports/* ruff overrides, remove tracked .codeflash/observability/read-tracker. * Rewrite statusline to derive context from git state Detects active area from changed files (reports, packages, plugin, .codeflash, case-studies, evals), falls back to branch name convention (perf/*, feat/*, fix/*), shows dirty indicator. Uses whoami for cross-platform user detection. * Add pre-push lint rule to commit guidelines * Exclude .codeflash/ from ruff linting Benchmark and profiling scripts in .codeflash/ are scratch work, not package source. Excluding them prevents CI failures from ad-hoc scripts. * Run ruff format across packages, scripts, evals, and plugin refs * Fix github-app async test failures in CI Add asyncio_mode = "auto" to root pytest config so async tests are detected when running from the repo root via uv run pytest packages/.
2406 lines
95 KiB
Python
2406 lines
95 KiB
Python
"""Unstructured x Codeflash — Engagement Report
|
|
|
|
Three-tier report:
|
|
1. Executive Brief — for Chris Maddock (SVP Eng) and JPC
|
|
2. Engineering Team — for Crag's team, aggregate view
|
|
3. Engineering Detail — per-PR, benchmarks, methodology
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import plotly.graph_objects as go
|
|
from dash import (
|
|
Dash,
|
|
Input,
|
|
Output,
|
|
clientside_callback,
|
|
dash_table,
|
|
dcc,
|
|
html,
|
|
)
|
|
from theme import (
|
|
ACCENT,
|
|
AMBER,
|
|
BG,
|
|
BLUE,
|
|
CARD,
|
|
CARD_BG,
|
|
CARD_BORDER,
|
|
DARK,
|
|
FONT,
|
|
GRAY,
|
|
GREEN,
|
|
LIGHT_GRAY,
|
|
LIGHT_GREEN,
|
|
LIGHT_RED,
|
|
MONO,
|
|
PINK,
|
|
PURPLE,
|
|
RED,
|
|
SLATE,
|
|
TABLE_STYLE,
|
|
WHITE,
|
|
)
|
|
|
|
# ── Data ────────────────────────────────────────────────────────────────────
|
|
_DATA = json.loads((Path(__file__).parent / "data.json").read_text())
|
|
|
|
CORE_PRODUCT_BASE = _DATA["core_product_base"]
|
|
GITHUB_WORKFLOWS_BASE = _DATA["github_workflows_base"]
|
|
MEM_BEFORE = _DATA["mem_before"]
|
|
MEM_AFTER = _DATA["mem_after"]
|
|
BENCH_BEFORE = _DATA["bench_before"]
|
|
BENCH_AFTER = _DATA["bench_after"]
|
|
LATENCY_OPTS = _DATA["latency_opts"]
|
|
CI_BEFORE = _DATA["ci_before"]
|
|
CI_AFTER = _DATA["ci_after"]
|
|
MERGED_PRS = _DATA["merged_prs"]
|
|
OPEN_PRS = _DATA["open_prs"]
|
|
|
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def hero_metric(value, label, detail, color=GREEN):
|
|
return html.Div(
|
|
[
|
|
html.Div(
|
|
value,
|
|
style={
|
|
"fontSize": "42px",
|
|
"fontWeight": "800",
|
|
"color": color,
|
|
"lineHeight": "1",
|
|
"letterSpacing": "-0.02em",
|
|
"fontFamily": FONT,
|
|
},
|
|
),
|
|
html.Div(
|
|
label,
|
|
style={
|
|
"fontSize": "15px",
|
|
"fontWeight": "600",
|
|
"color": SLATE,
|
|
"marginTop": "8px",
|
|
},
|
|
),
|
|
html.Div(
|
|
detail,
|
|
style={"fontSize": "13px", "color": GRAY, "marginTop": "4px"},
|
|
),
|
|
],
|
|
style={
|
|
"background": CARD_BG,
|
|
"borderRadius": "12px",
|
|
"padding": "32px 24px",
|
|
"textAlign": "center",
|
|
"flex": "1",
|
|
"minWidth": "200px",
|
|
"border": f"1px solid {CARD_BORDER}",
|
|
},
|
|
)
|
|
|
|
|
|
def section(title, subtitle=None):
|
|
children = [
|
|
html.H2(
|
|
title,
|
|
style={
|
|
"fontSize": "22px",
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"margin": "0",
|
|
"fontFamily": FONT,
|
|
"letterSpacing": "-0.01em",
|
|
},
|
|
)
|
|
]
|
|
if subtitle:
|
|
children.append(
|
|
html.P(
|
|
subtitle,
|
|
style={
|
|
"fontSize": "14px",
|
|
"color": GRAY,
|
|
"margin": "6px 0 0",
|
|
"lineHeight": "1.5",
|
|
},
|
|
)
|
|
)
|
|
return html.Div(children, style={"margin": "56px 0 24px"})
|
|
|
|
|
|
def card(children, **kw):
|
|
style = {**CARD}
|
|
for k, v in kw.items():
|
|
style[k] = v
|
|
return html.Div(children, style=style)
|
|
|
|
|
|
def metric_row(
|
|
label, before, after, unit="", fmt="{:,.0f}", better="lower", note=None
|
|
):
|
|
if before and after:
|
|
delta = (after - before) / before * 100
|
|
improved = delta < 0 if better == "lower" else delta > 0
|
|
delta_text = f"{delta:+.0f}%"
|
|
delta_color = GREEN if improved else RED
|
|
delta_bg = LIGHT_GREEN if improved else LIGHT_RED
|
|
else:
|
|
delta_text, delta_color, delta_bg = "—", GRAY, "transparent"
|
|
|
|
def _f(v):
|
|
return f"{fmt.format(v)} {unit}".strip() if v is not None else "—"
|
|
|
|
return html.Div(
|
|
[
|
|
html.Div(
|
|
[
|
|
html.Span(
|
|
label,
|
|
style={
|
|
"fontWeight": "600",
|
|
"color": SLATE,
|
|
"fontSize": "14px",
|
|
},
|
|
),
|
|
html.Span(
|
|
f" {note}",
|
|
style={"fontSize": "12px", "color": LIGHT_GRAY},
|
|
)
|
|
if note
|
|
else html.Span(),
|
|
],
|
|
style={"flex": "1"},
|
|
),
|
|
html.Div(
|
|
_f(before),
|
|
style={
|
|
"width": "140px",
|
|
"textAlign": "right",
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"fontFamily": MONO,
|
|
},
|
|
),
|
|
html.Div(
|
|
_f(after),
|
|
style={
|
|
"width": "140px",
|
|
"textAlign": "right",
|
|
"color": SLATE,
|
|
"fontSize": "14px",
|
|
"fontWeight": "600",
|
|
"fontFamily": MONO,
|
|
},
|
|
),
|
|
html.Span(
|
|
delta_text,
|
|
style={
|
|
"width": "80px",
|
|
"textAlign": "center",
|
|
"fontSize": "13px",
|
|
"fontWeight": "700",
|
|
"color": delta_color,
|
|
"background": delta_bg,
|
|
"borderRadius": "6px",
|
|
"padding": "2px 8px",
|
|
},
|
|
),
|
|
],
|
|
style={
|
|
"display": "flex",
|
|
"alignItems": "center",
|
|
"gap": "16px",
|
|
"padding": "12px 0",
|
|
"borderBottom": f"1px solid {CARD_BORDER}",
|
|
},
|
|
)
|
|
|
|
|
|
def table_header(cols):
|
|
return html.Div(
|
|
style={
|
|
"display": "flex",
|
|
"gap": "16px",
|
|
"padding": "10px 0",
|
|
"borderBottom": f"2px solid {CARD_BORDER}",
|
|
"marginBottom": "4px",
|
|
},
|
|
children=[
|
|
html.Div(
|
|
c["label"],
|
|
style={
|
|
"flex": "1" if c.get("flex") else None,
|
|
"width": c.get("width"),
|
|
"textAlign": c.get("align", "left"),
|
|
"fontWeight": "700",
|
|
"fontSize": "13px",
|
|
"color": ACCENT,
|
|
"textTransform": "uppercase",
|
|
"letterSpacing": "0.05em",
|
|
},
|
|
)
|
|
for c in cols
|
|
],
|
|
)
|
|
|
|
|
|
# ── Charts ───────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def make_memory_chart():
|
|
"""Before/after memory: the headline chart."""
|
|
cats = ["Pre-Partition RSS", "Post-Partition RSS", "K8s Allocation"]
|
|
before = [
|
|
MEM_BEFORE["pre_partition_mb"],
|
|
MEM_BEFORE["post_partition_mb"],
|
|
MEM_BEFORE["k8s_gb"] * 1024,
|
|
]
|
|
after = [
|
|
MEM_AFTER["pre_partition_mb"],
|
|
MEM_AFTER["post_partition_mb"],
|
|
MEM_AFTER["k8s_gb"] * 1024,
|
|
]
|
|
|
|
fig = go.Figure()
|
|
fig.add_trace(
|
|
go.Bar(
|
|
name="Before (4 OCR workers)",
|
|
x=cats,
|
|
y=before,
|
|
marker_color=LIGHT_GRAY,
|
|
marker_cornerradius=6,
|
|
text=[f"{v:,.0f} MB" for v in before],
|
|
textposition="outside",
|
|
textfont={"size": 13, "color": GRAY},
|
|
)
|
|
)
|
|
fig.add_trace(
|
|
go.Bar(
|
|
name="After (serial OCR)",
|
|
x=cats,
|
|
y=after,
|
|
marker_color=ACCENT,
|
|
marker_cornerradius=6,
|
|
text=[f"{v:,.0f} MB" for v in after],
|
|
textposition="outside",
|
|
textfont={"size": 13, "color": ACCENT},
|
|
)
|
|
)
|
|
fig.update_layout(
|
|
barmode="group",
|
|
bargap=0.3,
|
|
bargroupgap=0.1,
|
|
plot_bgcolor="rgba(0,0,0,0)",
|
|
paper_bgcolor="rgba(0,0,0,0)",
|
|
font={"family": FONT, "size": 13, "color": SLATE},
|
|
yaxis={
|
|
"title": "Memory (MB)",
|
|
"gridcolor": CARD_BORDER,
|
|
"zeroline": False,
|
|
},
|
|
xaxis={"title": ""},
|
|
margin={"t": 20, "b": 60, "l": 60, "r": 20},
|
|
legend={
|
|
"orientation": "h",
|
|
"yanchor": "bottom",
|
|
"y": 1.05,
|
|
"xanchor": "center",
|
|
"x": 0.5,
|
|
"font": {"size": 13},
|
|
},
|
|
height=380,
|
|
)
|
|
return fig
|
|
|
|
|
|
def make_k8s_chart():
|
|
"""K8s allocation: 32 GB -> 4 GB."""
|
|
fig = go.Figure()
|
|
fig.add_trace(
|
|
go.Bar(
|
|
y=["K8s Pod"],
|
|
x=[32],
|
|
orientation="h",
|
|
marker_color=LIGHT_GRAY,
|
|
marker_cornerradius=6,
|
|
opacity=0.5,
|
|
name="Before: 32 GB",
|
|
text=["32 GB"],
|
|
textposition="inside",
|
|
textfont={"size": 14, "color": WHITE},
|
|
)
|
|
)
|
|
fig.add_trace(
|
|
go.Bar(
|
|
y=["K8s Pod"],
|
|
x=[4],
|
|
orientation="h",
|
|
marker_color=ACCENT,
|
|
marker_cornerradius=6,
|
|
name="Recommended: 4 GB",
|
|
text=["4 GB"],
|
|
textposition="inside",
|
|
textfont={"size": 14, "color": DARK, "family": FONT},
|
|
)
|
|
)
|
|
fig.update_layout(
|
|
barmode="overlay",
|
|
plot_bgcolor="rgba(0,0,0,0)",
|
|
paper_bgcolor="rgba(0,0,0,0)",
|
|
font={"family": FONT, "size": 13, "color": SLATE},
|
|
xaxis={
|
|
"title": "Memory (GB)",
|
|
"gridcolor": CARD_BORDER,
|
|
"zeroline": False,
|
|
"range": [0, 36],
|
|
},
|
|
margin={"t": 10, "b": 40, "l": 80, "r": 20},
|
|
legend={
|
|
"orientation": "h",
|
|
"yanchor": "bottom",
|
|
"y": 1.10,
|
|
"xanchor": "center",
|
|
"x": 0.5,
|
|
"font": {"size": 12},
|
|
},
|
|
height=160,
|
|
)
|
|
return fig
|
|
|
|
|
|
def make_ci_chart():
|
|
"""CI: before/after."""
|
|
cats = ["Jobs Spawned", "Billed Minutes", "Wall Clock (s)"]
|
|
before = [CI_BEFORE["jobs_spawned"], CI_BEFORE["billed_min"], 229] # 3m49s
|
|
after = [CI_AFTER["jobs_spawned"], CI_AFTER["billed_min"], 65] # 1m05s
|
|
|
|
fig = go.Figure()
|
|
fig.add_trace(
|
|
go.Bar(
|
|
name="Before",
|
|
x=cats,
|
|
y=before,
|
|
marker_color=LIGHT_GRAY,
|
|
marker_cornerradius=6,
|
|
text=["301", "205 min", "3m 49s"],
|
|
textposition="outside",
|
|
textfont={"size": 13, "color": GRAY},
|
|
)
|
|
)
|
|
fig.add_trace(
|
|
go.Bar(
|
|
name="After",
|
|
x=cats,
|
|
y=after,
|
|
marker_color=ACCENT,
|
|
marker_cornerradius=6,
|
|
text=["33", "31 min", "1m 05s"],
|
|
textposition="outside",
|
|
textfont={"size": 13, "color": ACCENT},
|
|
)
|
|
)
|
|
fig.update_layout(
|
|
barmode="group",
|
|
bargap=0.3,
|
|
bargroupgap=0.1,
|
|
plot_bgcolor="rgba(0,0,0,0)",
|
|
paper_bgcolor="rgba(0,0,0,0)",
|
|
font={"family": FONT, "size": 13, "color": SLATE},
|
|
yaxis={"gridcolor": CARD_BORDER, "zeroline": False},
|
|
xaxis={"title": ""},
|
|
margin={"t": 20, "b": 60, "l": 60, "r": 20},
|
|
legend={
|
|
"orientation": "h",
|
|
"yanchor": "bottom",
|
|
"y": 1.05,
|
|
"xanchor": "center",
|
|
"x": 0.5,
|
|
"font": {"size": 13},
|
|
},
|
|
height=340,
|
|
)
|
|
return fig
|
|
|
|
|
|
# ── View builders ────────────────────────────────────────────────────────────
|
|
|
|
|
|
def build_exec_view():
|
|
return html.Div(
|
|
id="exec-view",
|
|
children=[
|
|
section(
|
|
"The Problem",
|
|
"core-product pods were configured with 32 GB K8s limits and still occasionally OOM'ing.",
|
|
),
|
|
card(
|
|
[
|
|
html.P(
|
|
[
|
|
"The root cause: on Knative pods with ",
|
|
html.Span(
|
|
"1 CPU request",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": ACCENT,
|
|
},
|
|
),
|
|
", Python's ",
|
|
html.Code(
|
|
"os.cpu_count()",
|
|
style={
|
|
"fontFamily": MONO,
|
|
"color": ACCENT,
|
|
"fontSize": "13px",
|
|
},
|
|
),
|
|
" returns the ",
|
|
html.Span(
|
|
"host's full CPU count",
|
|
style={"fontWeight": "700"},
|
|
),
|
|
" (e.g. 4), so the OCR pool spawns 4 workers that each load the full ONNX model set "
|
|
"- with zero parallelism benefit on a single core. This means 4x the memory for no speed gain.",
|
|
],
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "15px",
|
|
"lineHeight": "1.7",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
]
|
|
),
|
|
section("The Result"),
|
|
html.Div(
|
|
style={
|
|
"display": "flex",
|
|
"gap": "20px",
|
|
"flexWrap": "wrap",
|
|
},
|
|
children=[
|
|
card(
|
|
[
|
|
html.Div(
|
|
"BEFORE",
|
|
style={
|
|
"fontSize": "11px",
|
|
"fontWeight": "700",
|
|
"color": RED,
|
|
"letterSpacing": "0.1em",
|
|
"marginBottom": "16px",
|
|
},
|
|
),
|
|
html.Div(
|
|
"32 GB",
|
|
style={
|
|
"fontSize": "48px",
|
|
"fontWeight": "800",
|
|
"color": SLATE,
|
|
"lineHeight": "1",
|
|
},
|
|
),
|
|
html.Div(
|
|
"K8s pod allocation",
|
|
style={
|
|
"fontSize": "14px",
|
|
"color": GRAY,
|
|
"marginTop": "4px",
|
|
},
|
|
),
|
|
html.Div(
|
|
style={
|
|
"marginTop": "16px",
|
|
"paddingTop": "16px",
|
|
"borderTop": f"1px solid {CARD_BORDER}",
|
|
},
|
|
children=[
|
|
html.Div(
|
|
"3,491 MB peak RSS",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"marginBottom": "4px",
|
|
},
|
|
),
|
|
html.Div(
|
|
"268 MB max single allocation",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"marginBottom": "4px",
|
|
},
|
|
),
|
|
html.Div(
|
|
"Still OOM'ing occasionally",
|
|
style={
|
|
"color": RED,
|
|
"fontSize": "14px",
|
|
"fontWeight": "600",
|
|
},
|
|
),
|
|
],
|
|
),
|
|
],
|
|
flex="1",
|
|
minWidth="260px",
|
|
borderTop=f"4px solid {RED}",
|
|
),
|
|
html.Div(
|
|
"\u2192",
|
|
style={
|
|
"fontSize": "42px",
|
|
"color": GRAY,
|
|
"alignSelf": "center",
|
|
"padding": "0 8px",
|
|
},
|
|
),
|
|
card(
|
|
[
|
|
html.Div(
|
|
"AFTER",
|
|
style={
|
|
"fontSize": "11px",
|
|
"fontWeight": "700",
|
|
"color": GREEN,
|
|
"letterSpacing": "0.1em",
|
|
"marginBottom": "16px",
|
|
},
|
|
),
|
|
html.Div(
|
|
"4 GB",
|
|
style={
|
|
"fontSize": "48px",
|
|
"fontWeight": "800",
|
|
"color": GREEN,
|
|
"lineHeight": "1",
|
|
},
|
|
),
|
|
html.Div(
|
|
"recommended K8s allocation",
|
|
style={
|
|
"fontSize": "14px",
|
|
"color": GRAY,
|
|
"marginTop": "4px",
|
|
},
|
|
),
|
|
html.Div(
|
|
style={
|
|
"marginTop": "16px",
|
|
"paddingTop": "16px",
|
|
"borderTop": f"1px solid {CARD_BORDER}",
|
|
},
|
|
children=[
|
|
html.Div(
|
|
"1,398 MB peak RSS",
|
|
style={
|
|
"color": SLATE,
|
|
"fontSize": "14px",
|
|
"marginBottom": "4px",
|
|
},
|
|
),
|
|
html.Div(
|
|
"134 MB max single allocation",
|
|
style={
|
|
"color": SLATE,
|
|
"fontSize": "14px",
|
|
"marginBottom": "4px",
|
|
},
|
|
),
|
|
html.Div(
|
|
"2.6 GB headroom at 4 GB limit",
|
|
style={
|
|
"color": GREEN,
|
|
"fontSize": "14px",
|
|
"fontWeight": "600",
|
|
},
|
|
),
|
|
],
|
|
),
|
|
],
|
|
flex="1",
|
|
minWidth="260px",
|
|
borderTop=f"4px solid {GREEN}",
|
|
),
|
|
],
|
|
),
|
|
card(
|
|
[
|
|
dcc.Graph(
|
|
figure=make_k8s_chart(),
|
|
config={"displayModeBar": False},
|
|
),
|
|
html.P(
|
|
"With serial OCR on 1-CPU pods, peak RSS is ~1.4 GB. "
|
|
"A 4 GB request / 6 GB limit provides headroom for document size variance. "
|
|
"This is a direct per-pod infrastructure cost reduction.",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"marginTop": "12px",
|
|
},
|
|
),
|
|
],
|
|
marginTop="20px",
|
|
),
|
|
section(
|
|
"How This Was Tested",
|
|
"Reproducible A/B benchmark on identical hardware with controlled conditions.",
|
|
),
|
|
card(
|
|
[
|
|
html.Ul(
|
|
[
|
|
html.Li(
|
|
[
|
|
html.Span(
|
|
"Same hardware: ",
|
|
style={
|
|
"fontWeight": "600",
|
|
"color": SLATE,
|
|
},
|
|
),
|
|
"Azure Standard_D8s_v5 (8 vCPU, 32 GB RAM)",
|
|
]
|
|
),
|
|
html.Li(
|
|
[
|
|
html.Span(
|
|
"Same workload: ",
|
|
style={
|
|
"fontWeight": "600",
|
|
"color": SLATE,
|
|
},
|
|
),
|
|
"18 common partition tests (od_only, hi_res, pptx, docx)",
|
|
]
|
|
),
|
|
html.Li(
|
|
[
|
|
html.Span(
|
|
"Same model: ",
|
|
style={
|
|
"fontWeight": "600",
|
|
"color": SLATE,
|
|
},
|
|
),
|
|
"Proprietary YOLOX, identical weights",
|
|
]
|
|
),
|
|
html.Li(
|
|
[
|
|
html.Span(
|
|
"Profiler: ",
|
|
style={
|
|
"fontWeight": "600",
|
|
"color": SLATE,
|
|
},
|
|
),
|
|
"memray --native (captures C/C++ malloc + mmap, not just Python)",
|
|
]
|
|
),
|
|
html.Li(
|
|
[
|
|
html.Span(
|
|
"Protocol: ",
|
|
style={
|
|
"fontWeight": "600",
|
|
"color": SLATE,
|
|
},
|
|
),
|
|
"VM rebooted + 5-min idle before each run",
|
|
]
|
|
),
|
|
],
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "2.0",
|
|
"paddingLeft": "20px",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
]
|
|
),
|
|
section(
|
|
"Beyond core-product",
|
|
"Delivered in addition to the core performance work. Each is available as a standalone engagement.",
|
|
),
|
|
html.Div(
|
|
style={
|
|
"display": "flex",
|
|
"gap": "20px",
|
|
"flexWrap": "wrap",
|
|
},
|
|
children=[
|
|
card(
|
|
[
|
|
html.Div(
|
|
"CI/CD Modernization",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"fontSize": "16px",
|
|
"marginBottom": "8px",
|
|
},
|
|
),
|
|
html.Div(
|
|
"separate engagement",
|
|
style={
|
|
"fontSize": "12px",
|
|
"color": ACCENT,
|
|
"fontWeight": "600",
|
|
"letterSpacing": "0.05em",
|
|
"marginBottom": "12px",
|
|
},
|
|
),
|
|
html.P(
|
|
"Consolidated 28 Python packages from per-package invoke/pip/venv toolchains "
|
|
"into a single uv workspace with unified CI. "
|
|
"Wall clock dropped 72%, CI cost dropped 85%.",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
],
|
|
flex="1",
|
|
minWidth="280px",
|
|
),
|
|
card(
|
|
[
|
|
html.Div(
|
|
"Security Audit",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"fontSize": "16px",
|
|
"marginBottom": "8px",
|
|
},
|
|
),
|
|
html.Div(
|
|
"separate engagement",
|
|
style={
|
|
"fontSize": "12px",
|
|
"color": RED,
|
|
"fontWeight": "600",
|
|
"letterSpacing": "0.05em",
|
|
"marginBottom": "12px",
|
|
},
|
|
),
|
|
html.P(
|
|
"Identified dependency confusion risk across 28 packages and a lockfile bypass "
|
|
"in Docker builds that caused CVE patches to never reach production for ~2 months.",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
],
|
|
flex="1",
|
|
minWidth="280px",
|
|
),
|
|
],
|
|
),
|
|
section("Delivered"),
|
|
card(
|
|
[
|
|
html.Div(
|
|
style={
|
|
"display": "flex",
|
|
"gap": "40px",
|
|
"flexWrap": "wrap",
|
|
},
|
|
children=[
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"12",
|
|
style={
|
|
"fontSize": "36px",
|
|
"fontWeight": "800",
|
|
"color": ACCENT,
|
|
"lineHeight": "1",
|
|
},
|
|
),
|
|
html.Div(
|
|
"PRs merged",
|
|
style={
|
|
"fontSize": "14px",
|
|
"color": GRAY,
|
|
"marginTop": "4px",
|
|
},
|
|
),
|
|
]
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"7",
|
|
style={
|
|
"fontSize": "36px",
|
|
"fontWeight": "800",
|
|
"color": AMBER,
|
|
"lineHeight": "1",
|
|
},
|
|
),
|
|
html.Div(
|
|
"PRs in progress",
|
|
style={
|
|
"fontSize": "14px",
|
|
"color": GRAY,
|
|
"marginTop": "4px",
|
|
},
|
|
),
|
|
]
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"3",
|
|
style={
|
|
"fontSize": "36px",
|
|
"fontWeight": "800",
|
|
"color": SLATE,
|
|
"lineHeight": "1",
|
|
},
|
|
),
|
|
html.Div(
|
|
"repos touched",
|
|
style={
|
|
"fontSize": "14px",
|
|
"color": GRAY,
|
|
"marginTop": "4px",
|
|
},
|
|
),
|
|
]
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"348",
|
|
style={
|
|
"fontSize": "36px",
|
|
"fontWeight": "800",
|
|
"color": GREEN,
|
|
"lineHeight": "1",
|
|
},
|
|
),
|
|
html.Div(
|
|
"tests passing",
|
|
style={
|
|
"fontSize": "14px",
|
|
"color": GRAY,
|
|
"marginTop": "4px",
|
|
},
|
|
),
|
|
]
|
|
),
|
|
],
|
|
),
|
|
]
|
|
),
|
|
],
|
|
)
|
|
|
|
|
|
def build_team_view():
|
|
return html.Div(
|
|
id="team-view",
|
|
style={"display": "none"},
|
|
children=[
|
|
section(
|
|
"What Changed: Memory",
|
|
"Three root causes fixed, one allocator optimization added.",
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"CPU-Aware OCR Worker Count",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"fontSize": "16px",
|
|
},
|
|
),
|
|
html.Span(
|
|
"Biggest impact",
|
|
style={
|
|
"marginLeft": "12px",
|
|
"padding": "2px 10px",
|
|
"borderRadius": "999px",
|
|
"fontSize": "12px",
|
|
"fontWeight": "600",
|
|
"background": ACCENT,
|
|
"color": DARK,
|
|
},
|
|
),
|
|
],
|
|
style={
|
|
"marginBottom": "12px",
|
|
"display": "flex",
|
|
"alignItems": "center",
|
|
},
|
|
),
|
|
html.P(
|
|
[
|
|
html.Code(
|
|
"os.cpu_count()",
|
|
style={
|
|
"fontFamily": MONO,
|
|
"color": ACCENT,
|
|
},
|
|
),
|
|
" returns the host CPU count (4), not the cgroup limit (1). ",
|
|
"The OCR pool was spawning 4 workers on a 1-CPU pod, each loading the full ONNX model set. "
|
|
"Replaced with ",
|
|
html.Code(
|
|
"os.sched_getaffinity(0)",
|
|
style={
|
|
"fontFamily": MONO,
|
|
"color": ACCENT,
|
|
},
|
|
),
|
|
" which respects cgroup/taskset limits. Result: serial mode on 1-CPU pods, "
|
|
"eliminating 3 redundant model copies from memory.",
|
|
],
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
html.Div(
|
|
html.A(
|
|
"PR #1502",
|
|
href=f"{CORE_PRODUCT_BASE}/1502",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"fontSize": "13px",
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
style={"marginTop": "8px"},
|
|
),
|
|
],
|
|
style={
|
|
**CARD,
|
|
"marginBottom": "16px",
|
|
"borderLeft": f"4px solid {ACCENT}",
|
|
},
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"Resize-First Preprocessing",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"fontSize": "16px",
|
|
"marginBottom": "12px",
|
|
},
|
|
),
|
|
html.P(
|
|
"Pages were being converted to full-resolution numpy arrays before any resizing. "
|
|
"Now resizes the PIL image first, avoiding a large temporary allocation for every page.",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
html.Div(
|
|
html.A(
|
|
"PR #1441",
|
|
href=f"{CORE_PRODUCT_BASE}/1441",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"fontSize": "13px",
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
style={"marginTop": "8px"},
|
|
),
|
|
],
|
|
style={
|
|
**CARD,
|
|
"marginBottom": "16px",
|
|
"borderLeft": f"4px solid {GREEN}",
|
|
},
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"Early Page Image Release",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"fontSize": "16px",
|
|
"marginBottom": "12px",
|
|
},
|
|
),
|
|
html.P(
|
|
"Page images were held in memory through the entire table OCR + transformer inference pipeline. "
|
|
"Now freed as soon as OCR is complete, reducing peak concurrent memory.",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
html.Div(
|
|
html.A(
|
|
"PR #1448",
|
|
href=f"{CORE_PRODUCT_BASE}/1448",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"fontSize": "13px",
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
style={"marginTop": "8px"},
|
|
),
|
|
],
|
|
style={
|
|
**CARD,
|
|
"marginBottom": "16px",
|
|
"borderLeft": f"4px solid {GREEN}",
|
|
},
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"jemalloc Allocator",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"fontSize": "16px",
|
|
"marginBottom": "12px",
|
|
},
|
|
),
|
|
html.P(
|
|
"Switched from glibc malloc to jemalloc via LD_PRELOAD. Reduces memory fragmentation "
|
|
"from the allocate/free/allocate pattern in the image processing pipeline. "
|
|
"Opt-in via environment variable, fault-tolerant if jemalloc is not installed.",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
html.Div(
|
|
html.A(
|
|
"PR #1507",
|
|
href=f"{CORE_PRODUCT_BASE}/1507",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"fontSize": "13px",
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
style={"marginTop": "8px"},
|
|
),
|
|
],
|
|
style={
|
|
**CARD,
|
|
"marginBottom": "16px",
|
|
"borderLeft": f"4px solid {AMBER}",
|
|
},
|
|
),
|
|
# Memory results chart
|
|
section("Memory Results"),
|
|
card(
|
|
[
|
|
dcc.Graph(
|
|
figure=make_memory_chart(),
|
|
config={"displayModeBar": False},
|
|
)
|
|
]
|
|
),
|
|
# Memory table
|
|
card(
|
|
[
|
|
table_header(
|
|
[
|
|
{"label": "Metric", "flex": True},
|
|
{
|
|
"label": "Before",
|
|
"width": "140px",
|
|
"align": "right",
|
|
},
|
|
{
|
|
"label": "After",
|
|
"width": "140px",
|
|
"align": "right",
|
|
},
|
|
{
|
|
"label": "Delta",
|
|
"width": "80px",
|
|
"align": "center",
|
|
},
|
|
]
|
|
),
|
|
metric_row(
|
|
"Pre-partition RSS",
|
|
MEM_BEFORE["pre_partition_mb"],
|
|
MEM_AFTER["pre_partition_mb"],
|
|
"MB",
|
|
),
|
|
metric_row(
|
|
"Post-partition RSS",
|
|
MEM_BEFORE["post_partition_mb"],
|
|
MEM_AFTER["post_partition_mb"],
|
|
"MB",
|
|
),
|
|
metric_row(
|
|
"Max single allocation",
|
|
MEM_BEFORE["max_alloc_mb"],
|
|
MEM_AFTER["max_alloc_mb"],
|
|
"MB",
|
|
),
|
|
metric_row(
|
|
"K8s allocation",
|
|
MEM_BEFORE["k8s_gb"] * 1024,
|
|
MEM_AFTER["k8s_gb"] * 1024,
|
|
"MB",
|
|
),
|
|
],
|
|
marginTop="20px",
|
|
),
|
|
section(
|
|
"What Changed: Latency",
|
|
"Two optimizations in the OCR pipeline that eliminate redundant image format conversions.",
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"BMP Instead of PNG for PDF Rendering",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"fontSize": "16px",
|
|
"marginBottom": "12px",
|
|
},
|
|
),
|
|
html.P(
|
|
"pdfium was rendering pages as PNG (compressed) when the downstream consumer immediately "
|
|
"decompresses to a raw bitmap. Switching to BMP skips the compression step entirely.",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Span(
|
|
"-89 ms/page",
|
|
style={
|
|
"color": GREEN,
|
|
"fontWeight": "700",
|
|
},
|
|
),
|
|
html.Span(
|
|
" | -890 ms for a 10-page scan",
|
|
style={"color": GRAY},
|
|
),
|
|
],
|
|
style={
|
|
"marginTop": "8px",
|
|
"fontSize": "14px",
|
|
},
|
|
),
|
|
html.Div(
|
|
html.A(
|
|
"PR #1503 (open)",
|
|
href=f"{CORE_PRODUCT_BASE}/1503",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"fontSize": "13px",
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
style={"marginTop": "4px"},
|
|
),
|
|
],
|
|
style={
|
|
**CARD,
|
|
"marginBottom": "16px",
|
|
"borderLeft": f"4px solid {GREEN}",
|
|
},
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"Direct File Path to Tesseract",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"fontSize": "16px",
|
|
"marginBottom": "12px",
|
|
},
|
|
),
|
|
html.P(
|
|
"The OCR path was converting numpy -> PIL -> PNG bytes -> temp file -> tesseract. "
|
|
"In serial mode, the page image file already exists on disk. "
|
|
"Passing the path directly skips three intermediate copies.",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Span(
|
|
"-515 ms/page",
|
|
style={
|
|
"color": GREEN,
|
|
"fontWeight": "700",
|
|
},
|
|
),
|
|
html.Span(
|
|
" | -5,148 ms for a 10-page scan | 12.2% of pipeline",
|
|
style={"color": GRAY},
|
|
),
|
|
],
|
|
style={
|
|
"marginTop": "8px",
|
|
"fontSize": "14px",
|
|
},
|
|
),
|
|
html.Div(
|
|
html.A(
|
|
"PR #1506 (merged)",
|
|
href=f"{CORE_PRODUCT_BASE}/1506",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"fontSize": "13px",
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
style={"marginTop": "4px"},
|
|
),
|
|
],
|
|
style={
|
|
**CARD,
|
|
"marginBottom": "16px",
|
|
"borderLeft": f"4px solid {GREEN}",
|
|
},
|
|
),
|
|
section(
|
|
"What Changed: CI/CD",
|
|
"platform-libs unified workflow - 28 packages, one CI pipeline.",
|
|
),
|
|
card(
|
|
[
|
|
dcc.Graph(
|
|
figure=make_ci_chart(),
|
|
config={"displayModeBar": False},
|
|
)
|
|
]
|
|
),
|
|
card(
|
|
[
|
|
table_header(
|
|
[
|
|
{"label": "Metric", "flex": True},
|
|
{
|
|
"label": "Before",
|
|
"width": "140px",
|
|
"align": "right",
|
|
},
|
|
{
|
|
"label": "After",
|
|
"width": "140px",
|
|
"align": "right",
|
|
},
|
|
{
|
|
"label": "Delta",
|
|
"width": "80px",
|
|
"align": "center",
|
|
},
|
|
]
|
|
),
|
|
metric_row(
|
|
"CI jobs spawned",
|
|
CI_BEFORE["jobs_spawned"],
|
|
CI_AFTER["jobs_spawned"],
|
|
),
|
|
metric_row(
|
|
"Jobs that ran",
|
|
CI_BEFORE["jobs_ran"],
|
|
CI_AFTER["jobs_ran"],
|
|
),
|
|
metric_row(
|
|
"Billed minutes",
|
|
CI_BEFORE["billed_min"],
|
|
CI_AFTER["billed_min"],
|
|
"min",
|
|
),
|
|
metric_row(
|
|
"Cost per run",
|
|
CI_BEFORE["cost"],
|
|
CI_AFTER["cost"],
|
|
"$",
|
|
"{:.2f}",
|
|
),
|
|
],
|
|
marginTop="20px",
|
|
),
|
|
section("What Changed: Reliability & Code Quality"),
|
|
html.Div(
|
|
style={
|
|
"display": "flex",
|
|
"gap": "16px",
|
|
"flexDirection": "column",
|
|
},
|
|
children=[
|
|
card(
|
|
[
|
|
html.Div(
|
|
[
|
|
html.Span(
|
|
"Event loop blocking",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"fontSize": "15px",
|
|
},
|
|
),
|
|
html.Span(
|
|
" 3 PRs",
|
|
style={
|
|
"color": ACCENT,
|
|
"fontSize": "13px",
|
|
"fontWeight": "600",
|
|
},
|
|
),
|
|
]
|
|
),
|
|
html.P(
|
|
"Gzip decompression, PDF validation, and CSV response merging were all running "
|
|
"synchronously in the async event loop. Fixed with async I/O wrappers.",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"margin": "8px 0 0",
|
|
},
|
|
),
|
|
html.Div(
|
|
[
|
|
html.A(
|
|
"#1398",
|
|
href=f"{CORE_PRODUCT_BASE}/1398",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"fontSize": "13px",
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
html.Span(
|
|
", ",
|
|
style={
|
|
"color": BLUE,
|
|
"fontSize": "13px",
|
|
},
|
|
),
|
|
html.A(
|
|
"#1399",
|
|
href=f"{CORE_PRODUCT_BASE}/1399",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"fontSize": "13px",
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
html.Span(
|
|
", ",
|
|
style={
|
|
"color": BLUE,
|
|
"fontSize": "13px",
|
|
},
|
|
),
|
|
html.A(
|
|
"#1400",
|
|
href=f"{CORE_PRODUCT_BASE}/1400",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"fontSize": "13px",
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
],
|
|
style={"marginTop": "8px"},
|
|
),
|
|
]
|
|
),
|
|
card(
|
|
[
|
|
html.Div(
|
|
[
|
|
html.Span(
|
|
"Security audit",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"fontSize": "15px",
|
|
},
|
|
),
|
|
html.Span(
|
|
" separate engagement",
|
|
style={
|
|
"marginLeft": "12px",
|
|
"padding": "2px 10px",
|
|
"borderRadius": "999px",
|
|
"fontSize": "11px",
|
|
"fontWeight": "600",
|
|
"background": LIGHT_RED,
|
|
"color": RED,
|
|
},
|
|
),
|
|
]
|
|
),
|
|
html.P(
|
|
[
|
|
"Identified dependency confusion risk across 28 packages "
|
|
"and a lockfile bypass in Docker builds that caused CVE patches to never reach "
|
|
"production for ~2 months. The team fixed the Docker issue in ",
|
|
html.A(
|
|
"#1465",
|
|
href=f"{CORE_PRODUCT_BASE}/1465",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
".",
|
|
],
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"margin": "8px 0 0",
|
|
},
|
|
),
|
|
]
|
|
),
|
|
],
|
|
),
|
|
],
|
|
)
|
|
|
|
|
|
def build_detail_view():
|
|
return html.Div(
|
|
id="detail-view",
|
|
style={"display": "none"},
|
|
children=[
|
|
section(
|
|
"Merged PR Inventory",
|
|
"All PRs merged across 3 repositories, ordered by date.",
|
|
),
|
|
dash_table.DataTable(
|
|
columns=[
|
|
{"name": "PR", "id": "pr"},
|
|
{"name": "Merged", "id": "date"},
|
|
{"name": "Description", "id": "desc"},
|
|
{"name": "Category", "id": "cat"},
|
|
{"name": "Repo", "id": "repo"},
|
|
],
|
|
data=[
|
|
{
|
|
"pr": f"#{r[0]}",
|
|
"date": r[1],
|
|
"desc": r[2],
|
|
"cat": r[3],
|
|
"repo": r[4],
|
|
}
|
|
for r in MERGED_PRS
|
|
],
|
|
**{
|
|
**TABLE_STYLE,
|
|
"style_data_conditional": [
|
|
{
|
|
"if": {"row_index": "odd"},
|
|
"backgroundColor": "#1f1f23",
|
|
},
|
|
*[
|
|
{
|
|
"if": {
|
|
"filter_query": f'{{cat}} = "{cat}"',
|
|
"column_id": "cat",
|
|
},
|
|
"color": color,
|
|
"fontWeight": "600",
|
|
}
|
|
for cat, color in {
|
|
"Memory": GREEN,
|
|
"Latency": ACCENT,
|
|
"Reliability": BLUE,
|
|
"Code quality": PURPLE,
|
|
"CI/CD": PINK,
|
|
}.items()
|
|
],
|
|
],
|
|
},
|
|
),
|
|
section("Open / In-Progress PRs"),
|
|
dash_table.DataTable(
|
|
columns=[
|
|
{"name": "PR", "id": "pr"},
|
|
{"name": "Description", "id": "desc"},
|
|
{"name": "Category", "id": "cat"},
|
|
{"name": "Repo", "id": "repo"},
|
|
],
|
|
data=[
|
|
{
|
|
"pr": f"#{r[0]}",
|
|
"desc": r[1],
|
|
"cat": r[2],
|
|
"repo": r[3],
|
|
}
|
|
for r in OPEN_PRS
|
|
],
|
|
**{
|
|
**TABLE_STYLE,
|
|
"style_data_conditional": [
|
|
{
|
|
"if": {"row_index": "odd"},
|
|
"backgroundColor": "#1f1f23",
|
|
},
|
|
],
|
|
},
|
|
),
|
|
section(
|
|
"A/B Benchmark Results (memray --native)",
|
|
"18 common partition tests, pre-Feb 2026 baseline vs current main.",
|
|
),
|
|
card(
|
|
[
|
|
table_header(
|
|
[
|
|
{"label": "Metric", "flex": True},
|
|
{
|
|
"label": "Baseline",
|
|
"width": "140px",
|
|
"align": "right",
|
|
},
|
|
{
|
|
"label": "Current",
|
|
"width": "140px",
|
|
"align": "right",
|
|
},
|
|
{
|
|
"label": "Delta",
|
|
"width": "80px",
|
|
"align": "center",
|
|
},
|
|
]
|
|
),
|
|
metric_row(
|
|
"Post-import RSS",
|
|
BENCH_BEFORE["post_import_mib"],
|
|
BENCH_AFTER["post_import_mib"],
|
|
"MiB",
|
|
),
|
|
metric_row(
|
|
"First partition delta",
|
|
BENCH_BEFORE["first_partition_delta_mib"],
|
|
BENCH_AFTER["first_partition_delta_mib"],
|
|
"MiB",
|
|
),
|
|
metric_row(
|
|
"Peak memory",
|
|
BENCH_BEFORE["peak_gb"],
|
|
BENCH_AFTER["peak_gb"],
|
|
"GB",
|
|
"{:.3f}",
|
|
),
|
|
metric_row(
|
|
"Total allocated",
|
|
BENCH_BEFORE["total_gb"],
|
|
BENCH_AFTER["total_gb"],
|
|
"GB",
|
|
"{:.1f}",
|
|
better="lower",
|
|
),
|
|
metric_row(
|
|
"Allocation count",
|
|
BENCH_BEFORE["allocs"],
|
|
BENCH_AFTER["allocs"],
|
|
"",
|
|
"{:,.0f}",
|
|
better="lower",
|
|
),
|
|
metric_row(
|
|
"Wall time",
|
|
BENCH_BEFORE["wall_s"],
|
|
BENCH_AFTER["wall_s"],
|
|
"s",
|
|
"{:.1f}",
|
|
),
|
|
]
|
|
),
|
|
html.P(
|
|
"Total allocated increased because current uses more frequent smaller allocations - "
|
|
"peak (the OOM-risk metric) still decreased. This pattern indicates better memory recycling.",
|
|
style={
|
|
"color": LIGHT_GRAY,
|
|
"fontSize": "12px",
|
|
"marginTop": "12px",
|
|
},
|
|
),
|
|
section(
|
|
"Latency Optimization Detail",
|
|
"Traced 9-step PIL conversion chain in the OCR path.",
|
|
),
|
|
dash_table.DataTable(
|
|
columns=[
|
|
{"name": "Optimization", "id": "opt"},
|
|
{"name": "Per Page", "id": "per_page"},
|
|
{"name": "10-Page Scan", "id": "ten_page"},
|
|
{"name": "% of Pipeline", "id": "pct"},
|
|
{"name": "PR", "id": "pr"},
|
|
],
|
|
data=[
|
|
{
|
|
"opt": r[0],
|
|
"per_page": f"-{r[1]} ms",
|
|
"ten_page": f"-{r[2]:,} ms",
|
|
"pct": f"{r[3]}%",
|
|
"pr": r[4],
|
|
}
|
|
for r in LATENCY_OPTS
|
|
]
|
|
+ [
|
|
{
|
|
"opt": "Combined",
|
|
"per_page": "-604 ms",
|
|
"ten_page": "-6,038 ms",
|
|
"pct": "14.4%",
|
|
"pr": "",
|
|
}
|
|
],
|
|
**TABLE_STYLE,
|
|
),
|
|
section(
|
|
"CI/CD Detail",
|
|
"platform-libs: 28 packages migrated from per-package toolchains to uv workspace.",
|
|
),
|
|
card(
|
|
[
|
|
html.Ul(
|
|
[
|
|
html.Li(
|
|
"7-11 runners per package consolidated to 1"
|
|
),
|
|
html.Li(
|
|
"28 independent lockfiles replaced by 1 workspace lockfile (245 packages)"
|
|
),
|
|
html.Li(
|
|
"~1,500 lines of boilerplate deleted (23 tasks.py, 27 .python-version, 13 requirements/ dirs)"
|
|
),
|
|
html.Li(
|
|
"messagebus test suite optimized: ~64s -> ~30s (sleep-based -> event-based)"
|
|
),
|
|
html.Li(
|
|
[
|
|
"2 merged PRs in github-workflows (",
|
|
html.A(
|
|
"#360",
|
|
href=f"{GITHUB_WORKFLOWS_BASE}/360",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
", ",
|
|
html.A(
|
|
"#361",
|
|
href=f"{GITHUB_WORKFLOWS_BASE}/361",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
") adding uv workspace support",
|
|
]
|
|
),
|
|
],
|
|
style={
|
|
"color": SLATE,
|
|
"fontSize": "14px",
|
|
"lineHeight": "2.0",
|
|
"paddingLeft": "20px",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
]
|
|
),
|
|
section("Estimated Cost Impact (platform-libs)"),
|
|
card(
|
|
[
|
|
table_header(
|
|
[
|
|
{"label": "", "flex": True},
|
|
{
|
|
"label": "Monthly",
|
|
"width": "140px",
|
|
"align": "right",
|
|
},
|
|
{
|
|
"label": "Annual",
|
|
"width": "140px",
|
|
"align": "right",
|
|
},
|
|
]
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"GitHub Actions billing savings",
|
|
style={
|
|
"flex": "1",
|
|
"fontWeight": "600",
|
|
"color": SLATE,
|
|
"fontSize": "14px",
|
|
},
|
|
),
|
|
html.Div(
|
|
"$7",
|
|
style={
|
|
"width": "140px",
|
|
"textAlign": "right",
|
|
"color": SLATE,
|
|
"fontSize": "14px",
|
|
"fontFamily": MONO,
|
|
},
|
|
),
|
|
html.Div(
|
|
"$85",
|
|
style={
|
|
"width": "140px",
|
|
"textAlign": "right",
|
|
"color": SLATE,
|
|
"fontSize": "14px",
|
|
"fontFamily": MONO,
|
|
},
|
|
),
|
|
],
|
|
style={
|
|
"display": "flex",
|
|
"gap": "16px",
|
|
"padding": "12px 0",
|
|
"borderBottom": f"1px solid {CARD_BORDER}",
|
|
},
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"Engineering wait time reduced",
|
|
style={
|
|
"flex": "1",
|
|
"fontWeight": "600",
|
|
"color": SLATE,
|
|
"fontSize": "14px",
|
|
},
|
|
),
|
|
html.Div(
|
|
"$5,640",
|
|
style={
|
|
"width": "140px",
|
|
"textAlign": "right",
|
|
"color": GREEN,
|
|
"fontSize": "14px",
|
|
"fontWeight": "700",
|
|
"fontFamily": MONO,
|
|
},
|
|
),
|
|
html.Div(
|
|
"$67,680",
|
|
style={
|
|
"width": "140px",
|
|
"textAlign": "right",
|
|
"color": GREEN,
|
|
"fontSize": "14px",
|
|
"fontWeight": "700",
|
|
"fontFamily": MONO,
|
|
},
|
|
),
|
|
],
|
|
style={
|
|
"display": "flex",
|
|
"gap": "16px",
|
|
"padding": "12px 0",
|
|
"borderBottom": f"1px solid {CARD_BORDER}",
|
|
},
|
|
),
|
|
html.P(
|
|
"Assumptions: 15 engineers, 5 CI pushes/engineer/day, $75/h loaded rate, 22 working days/month. "
|
|
"Engineering wait time is an upper-bound proxy for context-switching cost.",
|
|
style={
|
|
"color": LIGHT_GRAY,
|
|
"fontSize": "12px",
|
|
"marginTop": "12px",
|
|
},
|
|
),
|
|
]
|
|
),
|
|
section("Security Findings"),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"Dependency Confusion Risk",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"fontSize": "16px",
|
|
},
|
|
),
|
|
html.Span(
|
|
"High",
|
|
style={
|
|
"marginLeft": "12px",
|
|
"padding": "2px 10px",
|
|
"borderRadius": "999px",
|
|
"fontSize": "12px",
|
|
"fontWeight": "600",
|
|
"background": RED,
|
|
"color": DARK,
|
|
},
|
|
),
|
|
],
|
|
style={
|
|
"marginBottom": "12px",
|
|
"display": "flex",
|
|
"alignItems": "center",
|
|
},
|
|
),
|
|
html.P(
|
|
"Each of 28 platform-libs packages had its own index config pointing at the private "
|
|
"Azure DevOps feed with no explicit priority relative to PyPI. An attacker could publish "
|
|
"a malicious package with the same name as an internal package.",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
html.P(
|
|
[
|
|
"Fix: ",
|
|
html.Span(
|
|
"PyPI set as default, private feed set as explicit, "
|
|
"all internal packages mapped as workspace sources.",
|
|
style={"fontWeight": "600"},
|
|
),
|
|
],
|
|
style={
|
|
"color": SLATE,
|
|
"fontSize": "14px",
|
|
"marginTop": "8px",
|
|
},
|
|
),
|
|
],
|
|
style={
|
|
**CARD,
|
|
"marginBottom": "16px",
|
|
"borderLeft": f"4px solid {RED}",
|
|
},
|
|
),
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
[
|
|
html.Div(
|
|
"CVE Patches Not Reaching Production",
|
|
style={
|
|
"fontWeight": "700",
|
|
"color": SLATE,
|
|
"fontSize": "16px",
|
|
},
|
|
),
|
|
html.Span(
|
|
"Critical",
|
|
style={
|
|
"marginLeft": "12px",
|
|
"padding": "2px 10px",
|
|
"borderRadius": "999px",
|
|
"fontSize": "12px",
|
|
"fontWeight": "600",
|
|
"background": RED,
|
|
"color": DARK,
|
|
},
|
|
),
|
|
],
|
|
style={
|
|
"marginBottom": "12px",
|
|
"display": "flex",
|
|
"alignItems": "center",
|
|
},
|
|
),
|
|
html.P(
|
|
"uv pip install in Docker builds bypassed the lockfile. Renovate CVE fix PRs were merged "
|
|
"and the lockfile updated, but production images never received the patches. "
|
|
"This also caused torch to pull CUDA variants, adding ~2.5 GB of unnecessary nvidia packages.",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
html.P(
|
|
"Unstructured processes sensitive documents for enterprise customers in regulated industries "
|
|
"(HIPAA, SOC 2, PCI-DSS). Unpatched CVEs in production are a compliance risk.",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "14px",
|
|
"lineHeight": "1.6",
|
|
"marginTop": "8px",
|
|
},
|
|
),
|
|
html.P(
|
|
[
|
|
"Fixed by Unstructured team: ",
|
|
html.A(
|
|
"#1465",
|
|
href=f"{CORE_PRODUCT_BASE}/1465",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"fontWeight": "600",
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
html.Span(
|
|
" (uv sync --locked) and ",
|
|
style={"fontWeight": "600"},
|
|
),
|
|
html.A(
|
|
"#1319",
|
|
href=f"{CORE_PRODUCT_BASE}/1319",
|
|
target="_blank",
|
|
style={
|
|
"color": BLUE,
|
|
"fontWeight": "600",
|
|
"textDecoration": "none",
|
|
},
|
|
),
|
|
html.Span(
|
|
" (Dockerfile layer caching).",
|
|
style={"fontWeight": "600"},
|
|
),
|
|
],
|
|
style={
|
|
"color": SLATE,
|
|
"fontSize": "14px",
|
|
"marginTop": "8px",
|
|
},
|
|
),
|
|
],
|
|
style={
|
|
**CARD,
|
|
"marginBottom": "16px",
|
|
"borderLeft": f"4px solid {RED}",
|
|
},
|
|
),
|
|
section("Benchmark Environment"),
|
|
dash_table.DataTable(
|
|
columns=[
|
|
{"name": "Parameter", "id": "param"},
|
|
{"name": "Value", "id": "value"},
|
|
],
|
|
data=[
|
|
{
|
|
"param": "VM",
|
|
"value": "Azure Standard_D8s_v5 (8 vCPU, 32 GB RAM)",
|
|
},
|
|
{"param": "OS", "value": "Ubuntu 20.04"},
|
|
{"param": "Python", "value": "3.12"},
|
|
{
|
|
"param": "Profiler",
|
|
"value": "memray --native (C/C++ malloc, mmap)",
|
|
},
|
|
{
|
|
"param": "Test Runner",
|
|
"value": "memray run --native -o {out}.bin --force -m pytest -v",
|
|
},
|
|
{
|
|
"param": "Baseline",
|
|
"value": "pre-Feb 2026 (4 OCR workers, os.cpu_count)",
|
|
},
|
|
{
|
|
"param": "Current",
|
|
"value": "main (serial OCR, sched_getaffinity)",
|
|
},
|
|
{
|
|
"param": "Protocol",
|
|
"value": "VM reboot + 5-min idle before each run",
|
|
},
|
|
{
|
|
"param": "Production Target",
|
|
"value": "Knative pods, 1 CPU / 32 GB RAM -> 4 GB recommended",
|
|
},
|
|
{
|
|
"param": "Test Scope",
|
|
"value": "18 common partition tests (od_only, hi_res, pptx, docx)",
|
|
},
|
|
],
|
|
**TABLE_STYLE,
|
|
),
|
|
],
|
|
)
|
|
|
|
|
|
# ── App ──────────────────────────────────────────────────────────────────────
|
|
|
|
app = Dash(
|
|
__name__,
|
|
meta_tags=[
|
|
{"name": "viewport", "content": "width=device-width, initial-scale=1"},
|
|
{
|
|
"property": "og:title",
|
|
"content": "Unstructured x Codeflash — Engagement Report",
|
|
},
|
|
{
|
|
"property": "og:description",
|
|
"content": "Performance optimization results: 87.5% memory reduction, 14.4% latency improvement",
|
|
},
|
|
],
|
|
suppress_callback_exceptions=True,
|
|
)
|
|
app.title = "Unstructured x Codeflash — Engagement Report"
|
|
|
|
app.index_string = """<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
{%metas%}
|
|
<title>{%title%}</title>
|
|
<link rel="preconnect" href="https://fonts.googleapis.com">
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&family=JetBrains+Mono:wght@400;600;700&display=swap" rel="stylesheet">
|
|
{%favicon%}
|
|
{%css%}
|
|
</head>
|
|
<body>
|
|
{%app_entry%}
|
|
<footer>
|
|
{%config%}
|
|
{%scripts%}
|
|
{%renderer%}
|
|
</footer>
|
|
</body>
|
|
</html>"""
|
|
|
|
app.layout = html.Div(
|
|
style={"background": BG, "minHeight": "100vh", "fontFamily": FONT},
|
|
children=[
|
|
# ── Hero ──
|
|
html.Div(
|
|
style={
|
|
"background": f"linear-gradient(135deg, {DARK} 0%, #1c1917 50%, {DARK} 100%)",
|
|
"padding": "60px 24px 52px",
|
|
"textAlign": "center",
|
|
"borderBottom": f"1px solid {CARD_BORDER}",
|
|
},
|
|
children=[
|
|
html.Div(
|
|
"UNSTRUCTURED",
|
|
style={
|
|
"fontSize": "13px",
|
|
"fontWeight": "700",
|
|
"color": ACCENT,
|
|
"letterSpacing": "0.15em",
|
|
"marginBottom": "12px",
|
|
},
|
|
),
|
|
html.H1(
|
|
"Codeflash Engagement Report",
|
|
style={
|
|
"color": WHITE,
|
|
"fontSize": "36px",
|
|
"fontWeight": "800",
|
|
"margin": "0",
|
|
"letterSpacing": "-0.02em",
|
|
"fontFamily": FONT,
|
|
},
|
|
),
|
|
html.P(
|
|
"core-product performance optimization, CI/CD modernization, and security hardening",
|
|
style={
|
|
"color": GRAY,
|
|
"fontSize": "17px",
|
|
"margin": "12px auto 0",
|
|
"maxWidth": "700px",
|
|
},
|
|
),
|
|
html.Div(
|
|
style={
|
|
"marginTop": "24px",
|
|
"display": "flex",
|
|
"justifyContent": "center",
|
|
"gap": "24px",
|
|
"flexWrap": "wrap",
|
|
},
|
|
children=[
|
|
html.Span(
|
|
"March - April 2026",
|
|
style={"color": LIGHT_GRAY, "fontSize": "13px"},
|
|
),
|
|
html.Span("|", style={"color": LIGHT_GRAY}),
|
|
html.Span(
|
|
"12 PRs merged",
|
|
style={"color": LIGHT_GRAY, "fontSize": "13px"},
|
|
),
|
|
html.Span("|", style={"color": LIGHT_GRAY}),
|
|
html.Span(
|
|
"7 PRs in progress",
|
|
style={"color": LIGHT_GRAY, "fontSize": "13px"},
|
|
),
|
|
html.Span("|", style={"color": LIGHT_GRAY}),
|
|
html.Span(
|
|
"3 repos",
|
|
style={"color": LIGHT_GRAY, "fontSize": "13px"},
|
|
),
|
|
],
|
|
),
|
|
],
|
|
),
|
|
# ── Content ──
|
|
html.Div(
|
|
style={
|
|
"maxWidth": "960px",
|
|
"margin": "0 auto",
|
|
"padding": "0 24px 80px",
|
|
},
|
|
children=[
|
|
# ── Hero Metrics ──
|
|
html.Div(
|
|
style={
|
|
"display": "flex",
|
|
"gap": "20px",
|
|
"flexWrap": "wrap",
|
|
"marginTop": "-40px",
|
|
"position": "relative",
|
|
"zIndex": "1",
|
|
},
|
|
children=[
|
|
hero_metric(
|
|
"32 \u2192 4 GB",
|
|
"K8s Pod Memory",
|
|
"87.5% infrastructure reduction",
|
|
GREEN,
|
|
),
|
|
hero_metric(
|
|
"-60%",
|
|
"Peak RSS",
|
|
"3,491 MB \u2192 1,398 MB post-partition",
|
|
GREEN,
|
|
),
|
|
hero_metric(
|
|
"-14.4%",
|
|
"Latency",
|
|
"604 ms/page eliminated from OCR pipeline",
|
|
ACCENT,
|
|
),
|
|
hero_metric(
|
|
"0",
|
|
"Regressions",
|
|
"348 tests pass, zero breakage",
|
|
ACCENT,
|
|
),
|
|
],
|
|
),
|
|
# ── View Toggle ──
|
|
html.Div(
|
|
style={
|
|
"display": "flex",
|
|
"justifyContent": "center",
|
|
"margin": "40px 0 8px",
|
|
},
|
|
children=[
|
|
html.Div(
|
|
style={
|
|
"display": "inline-flex",
|
|
"background": CARD_BG,
|
|
"borderRadius": "12px",
|
|
"padding": "4px",
|
|
"border": f"1px solid {CARD_BORDER}",
|
|
},
|
|
children=[
|
|
html.Button(
|
|
"Executive Brief",
|
|
id="btn-exec",
|
|
n_clicks=1,
|
|
style={
|
|
"padding": "10px 24px",
|
|
"border": "none",
|
|
"borderRadius": "8px",
|
|
"cursor": "pointer",
|
|
"fontSize": "14px",
|
|
"fontWeight": "600",
|
|
"fontFamily": FONT,
|
|
"background": ACCENT,
|
|
"color": DARK,
|
|
"transition": "all 0.2s",
|
|
},
|
|
),
|
|
html.Button(
|
|
"Engineering Team",
|
|
id="btn-team",
|
|
n_clicks=0,
|
|
style={
|
|
"padding": "10px 24px",
|
|
"border": "none",
|
|
"borderRadius": "8px",
|
|
"cursor": "pointer",
|
|
"fontSize": "14px",
|
|
"fontWeight": "600",
|
|
"fontFamily": FONT,
|
|
"background": "transparent",
|
|
"color": GRAY,
|
|
"transition": "all 0.2s",
|
|
},
|
|
),
|
|
html.Button(
|
|
"Full Detail",
|
|
id="btn-detail",
|
|
n_clicks=0,
|
|
style={
|
|
"padding": "10px 24px",
|
|
"border": "none",
|
|
"borderRadius": "8px",
|
|
"cursor": "pointer",
|
|
"fontSize": "14px",
|
|
"fontWeight": "600",
|
|
"fontFamily": FONT,
|
|
"background": "transparent",
|
|
"color": GRAY,
|
|
"transition": "all 0.2s",
|
|
},
|
|
),
|
|
],
|
|
),
|
|
],
|
|
),
|
|
# ═════════════════════════════════════════════════════════════════════
|
|
# VIEW 1: EXECUTIVE BRIEF
|
|
# For Chris Maddock (SVP Eng) and JPC
|
|
# ═════════════════════════════════════════════════════════════════════
|
|
build_exec_view(),
|
|
# ═════════════════════════════════════════════════════════════════════
|
|
# VIEW 2: ENGINEERING TEAM
|
|
# For Crag's team — what changed, in plain language, with commit refs
|
|
# ═════════════════════════════════════════════════════════════════════
|
|
build_team_view(),
|
|
# ═════════════════════════════════════════════════════════════════════
|
|
# VIEW 3: FULL DETAIL
|
|
# Per-PR inventory, benchmarks, methodology
|
|
# ═════════════════════════════════════════════════════════════════════
|
|
build_detail_view(),
|
|
# ── Footer (always visible) ──
|
|
html.Div(
|
|
style={
|
|
"textAlign": "center",
|
|
"marginTop": "64px",
|
|
"paddingTop": "24px",
|
|
"borderTop": f"1px solid {CARD_BORDER}",
|
|
},
|
|
children=[
|
|
html.Div(
|
|
"UNSTRUCTURED",
|
|
style={
|
|
"fontSize": "11px",
|
|
"fontWeight": "700",
|
|
"color": ACCENT,
|
|
"letterSpacing": "0.15em",
|
|
"marginBottom": "4px",
|
|
},
|
|
),
|
|
html.P(
|
|
"Codeflash Engagement Report — April 2026",
|
|
style={
|
|
"color": LIGHT_GRAY,
|
|
"fontSize": "13px",
|
|
"margin": "0",
|
|
},
|
|
),
|
|
],
|
|
),
|
|
],
|
|
),
|
|
],
|
|
)
|
|
|
|
|
|
# ── Toggle callback ──
|
|
clientside_callback(
|
|
"""
|
|
function(exec_c, team_c, detail_c) {
|
|
exec_c = exec_c || 0;
|
|
team_c = team_c || 0;
|
|
detail_c = detail_c || 0;
|
|
var base = {
|
|
"padding": "10px 24px", "border": "none", "borderRadius": "8px",
|
|
"cursor": "pointer", "fontSize": "14px", "fontWeight": "600",
|
|
"fontFamily": "'Inter', system-ui, -apple-system, sans-serif",
|
|
"transition": "all 0.2s"
|
|
};
|
|
var active = Object.assign({}, base, {"background": "#ffd227", "color": "#09090b"});
|
|
var inactive = Object.assign({}, base, {"background": "transparent", "color": "#a1a1aa"});
|
|
var show = {"display": "block"};
|
|
var hide = {"display": "none"};
|
|
var mx = Math.max(exec_c, team_c, detail_c);
|
|
if (detail_c === mx && detail_c > 0) return [hide, hide, show, inactive, inactive, active];
|
|
if (team_c === mx && team_c > 0) return [hide, show, hide, inactive, active, inactive];
|
|
return [show, hide, hide, active, inactive, inactive];
|
|
}
|
|
""",
|
|
Output("exec-view", "style"),
|
|
Output("team-view", "style"),
|
|
Output("detail-view", "style"),
|
|
Output("btn-exec", "style"),
|
|
Output("btn-team", "style"),
|
|
Output("btn-detail", "style"),
|
|
Input("btn-exec", "n_clicks"),
|
|
Input("btn-team", "n_clicks"),
|
|
Input("btn-detail", "n_clicks"),
|
|
)
|
|
|
|
server = app.server
|
|
|
|
if __name__ == "__main__":
|
|
app.run(debug=False, port=8050)
|