codeflash-agent/reports/unstructured/data.json
Kevin Turcios 33faedf427
Add Unstructured report, rewrite statusline, format evals/scripts (#20)
* Add Unstructured engagement report as uv workspace member

Three-tier Plotly Dash app (Executive Brief, Engineering Team, Full
Detail) with data in JSON, theme constants in theme.py, and Dash
production improvements (Google Fonts, clientside callbacks, meta tags).

Also: add .playwright-mcp/ to .gitignore, add reports/* ruff overrides,
remove tracked .codeflash/observability/read-tracker.

* Rewrite statusline to derive context from git state

Detects active area from changed files (reports, packages, plugin,
.codeflash, case-studies, evals), falls back to branch name convention
(perf/*, feat/*, fix/*), shows dirty indicator. Uses whoami for
cross-platform user detection.

* Add pre-push lint rule to commit guidelines

* Exclude .codeflash/ from ruff linting

Benchmark and profiling scripts in .codeflash/ are scratch work, not
package source. Excluding them prevents CI failures from ad-hoc scripts.

* Run ruff format across packages, scripts, evals, and plugin refs

* Fix github-app async test failures in CI

Add asyncio_mode = "auto" to root pytest config so async tests
are detected when running from the repo root via uv run pytest packages/.
2026-04-15 03:06:16 -05:00

73 lines
3 KiB
JSON

{
"core_product_base": "https://github.com/Unstructured-IO/core-product/pull",
"github_workflows_base": "https://github.com/Unstructured-IO/github-workflows/pull",
"mem_before": {
"pre_partition_mb": 2619,
"post_partition_mb": 3491,
"max_alloc_mb": 268,
"k8s_gb": 32
},
"mem_after": {
"pre_partition_mb": 499,
"post_partition_mb": 1398,
"max_alloc_mb": 134,
"k8s_gb": 4
},
"bench_before": {
"post_import_mib": 1189,
"first_partition_delta_mib": 949,
"peak_gb": 1.660,
"total_gb": 16.398,
"allocs": 5585979,
"wall_s": 76.0
},
"bench_after": {
"post_import_mib": 952,
"first_partition_delta_mib": 707,
"peak_gb": 1.473,
"total_gb": 20.239,
"allocs": 6210809,
"wall_s": 86.0
},
"latency_opts": [
["Pdfium PNG -> BMP render", 89, 890, 2.1, "#1503"],
["Pass file path to tesseract", 515, 5148, 12.2, "#1506"]
],
"ci_before": {
"jobs_spawned": 301,
"jobs_ran": 193,
"billed_min": 205,
"wall": "3m 49s",
"cost": 1.64
},
"ci_after": {
"jobs_spawned": 33,
"jobs_ran": 31,
"billed_min": 31,
"wall": "1m 05s",
"cost": 0.25
},
"merged_prs": [
[1398, "2026-03-27", "Fix: avoid blocking event loop during gzip decompression", "Reliability", "core-product"],
[1399, "2026-04-03", "Fix: avoid blocking event loop during PDF validation", "Reliability", "core-product"],
[1400, "2026-03-30", "Fix: avoid blocking event loop during CSV response merging", "Reliability", "core-product"],
[1441, "2026-04-03", "mem: resize-first preprocessing", "Memory", "core-product"],
[1448, "2026-03-24", "mem: free page image before table OCR", "Memory", "core-product"],
[1464, "2026-03-27", "refactor: replace lazyproperty with cached_property", "Code quality", "core-product"],
[1481, "2026-04-03", "perf: reduce attribute lookups in hot path", "Latency", "core-product"],
[1502, "2026-04-14", "perf: CPU-aware serial OCR (sched_getaffinity)", "Memory", "core-product"],
[1506, "2026-04-13", "perf: pass file path directly to tesseract", "Latency", "core-product"],
[1507, "2026-04-14", "perf: use jemalloc to reduce fragmentation", "Memory", "core-product"],
[360, "2026-04-10", "Add uv workspace support via optional package input", "CI/CD", "github-workflows"],
[361, "2026-04-11", "Skip pip.conf in uv workspace mode", "CI/CD", "github-workflows"]
],
"open_prs": [
[1471, "Async OCR pipeline via aiopytesseract", "Latency", "core-product"],
[1500, "Stacked optimizations for hi_res PDF pipeline", "Memory + Latency", "core-product"],
[1503, "Render PDF pages as BMP instead of PNG", "Latency", "core-product"],
[1505, "Pass image file path directly to tesseract OCR", "Latency", "core-product"],
[1509, "Use BMP instead of PNG for pytesseract temp files", "Latency", "core-product"],
[667, "POC: uv workspace for platform-libs (28 packages)", "CI/CD", "platform-libs"],
[669, "CI baseline measurement (do not merge)", "CI/CD", "platform-libs"]
]
}