mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
Merge main and fix lint errors in reports/unstructured-security
- Resolve merge conflict in security_report.py (take sorted findings from main) - Fix ruff lint/format issues in unstructured-security/app.py
This commit is contained in:
commit
0f117e968a
12 changed files with 2548 additions and 64 deletions
1318
reports/unstructured-security/app.py
Normal file
1318
reports/unstructured-security/app.py
Normal file
File diff suppressed because it is too large
Load diff
16
reports/unstructured-security/assets/codeflash.svg
Normal file
16
reports/unstructured-security/assets/codeflash.svg
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
<svg width="665" height="90" viewBox="0 0 665 90" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M133.849 79.4871C127.682 79.4871 125.148 77.4063 125.148 72.8666C125.148 71.6559 125.375 70.2184 125.715 68.5538L132.865 34.5811C134.416 27.0905 137.708 23.6478 145.652 23.6478H185.035L182.16 37.1915H150.722C148.301 37.1915 147.62 37.7589 147.09 40.1802L142.361 62.4629C142.248 63.0304 142.134 63.4465 142.134 63.9005C142.134 65.1111 142.777 65.5651 144.669 65.5651H176.107L173.232 79.4492H133.849V79.4871Z" fill="white"/>
|
||||
<path d="M193.622 79.4871C187.455 79.4871 184.921 77.4063 184.921 72.8666C184.921 71.6559 185.148 70.2184 185.488 68.5538L192.638 34.5811C194.189 27.0905 197.481 23.6478 205.425 23.6478H243.37C249.764 23.6478 252.185 25.7285 252.185 30.2683C252.185 31.4789 251.958 32.9165 251.618 34.5811L244.467 68.5538C242.803 76.0444 239.625 79.4871 231.567 79.4871H193.622ZM234.972 40.1045C235.085 39.537 235.199 39.1209 235.199 38.7804C235.199 37.4563 234.329 37.0023 232.21 37.0023H210.381C208.074 37.0023 207.279 37.6454 206.863 40.1045L202.134 62.8034C202.02 63.3709 201.907 63.787 201.907 64.241C201.907 65.3381 202.55 65.7921 204.328 65.7921H226.157C228.805 65.7921 229.789 65.0354 230.243 62.8034L234.972 40.1045Z" fill="white"/>
|
||||
<path d="M263.422 79.487C257.255 79.487 254.72 77.4063 254.72 72.8665C254.72 71.6559 254.947 70.2183 255.288 68.5537L262.438 34.581C263.989 27.0904 268.264 23.6477 275.225 23.6477H301.594L298.718 37.0023H280.295C277.987 37.0023 277.192 37.5697 276.663 40.1044L271.934 62.6898C271.82 63.2573 271.707 63.787 271.707 64.1274C271.707 65.338 272.35 65.792 274.242 65.792H298.151L312.035 0.305713H327.584L310.825 79.487H263.422Z" fill="white"/>
|
||||
<path d="M332.238 79.4871C325.958 79.4871 323.423 77.4063 323.423 72.8666C323.423 71.656 323.65 70.2184 323.991 68.5538L331.141 34.5811C332.692 27.0905 335.983 23.6478 344.042 23.6478H381.759C387.396 23.6478 389.591 25.7285 389.591 30.0413C389.591 31.2519 389.477 32.576 389.137 34.1271L384.521 56.0694H347.371L349.262 46.8007H371.772L373.437 39.31C373.55 38.7426 373.55 38.3264 373.55 37.8724C373.55 36.7753 372.983 36.3213 371.242 36.3213H349.073C346.538 36.3213 345.971 36.9645 345.441 39.4235L340.258 63.4465C340.145 64.0897 340.031 64.6571 340.031 65.1111C340.031 66.4352 340.675 66.7757 342.566 66.7757H380.171L377.522 79.4492H332.314L332.238 79.4871Z" fill="white"/>
|
||||
<path d="M390.801 79.487L404.799 13.0928C406.88 3.71055 410.852 0.305713 418.04 0.305713H437.22L434.686 12.3361H423.109C421.142 12.3361 420.234 13.0928 419.818 15.2113L418.04 23.6856H431.811L429.276 35.8295H415.505L406.237 79.487H390.801Z" fill="white"/>
|
||||
<path d="M429.161 79.487L445.92 0.305713H461.356L444.596 79.487H429.161Z" fill="white"/>
|
||||
<path d="M465.782 79.4871C460.145 79.4871 457.838 77.4063 457.838 73.0935C457.838 71.9964 458.064 70.5588 458.405 69.0077L461.167 55.7667C462.945 47.3681 465.593 45.0604 472.63 45.0604H500.965L498.998 54.5561H478.72C476.64 54.5561 475.959 55.1236 475.505 57.4313L474.294 63.2574C474.181 63.9005 474.067 64.3545 474.067 64.8084C474.067 65.9056 474.635 66.246 476.148 66.246H501.949L507.775 38.8939C507.889 38.4399 507.889 38.0238 507.889 37.6833C507.889 36.4727 507.245 35.7917 505.24 35.7917H467.409L469.944 23.6478H515.606C522 23.6478 524.421 25.7285 524.421 30.2683C524.421 31.4789 524.194 32.9165 523.853 34.5811L514.358 79.4492H465.744L465.782 79.4871Z" fill="white"/>
|
||||
<path d="M525.746 79.487L528.394 66.6999H566.642C568.949 66.6999 569.517 66.1325 569.933 64.0517L571.144 58.5283C571.257 58.1879 571.257 57.8852 571.257 57.6582C571.257 56.6746 570.69 56.2206 569.063 56.2206H539.176C534.22 56.2206 532.026 54.1399 532.026 50.6972C532.026 49.9406 532.139 49.1461 532.366 48.276L535.469 33.3704C536.793 26.7499 540.084 23.6855 547.802 23.6855H593.88L591.232 36.1321H553.212C550.904 36.1321 549.996 36.8887 549.58 39.1208L548.596 43.7362C548.483 44.0767 548.483 44.3794 548.483 44.7199C548.483 45.9305 549.126 46.4979 550.79 46.4979H581.018C586.087 46.4979 588.281 48.6922 588.281 52.1348C588.281 52.778 588.168 53.5724 588.054 54.3291L584.763 69.7643C583.325 76.3848 580.034 79.487 572.316 79.487H525.784H525.746Z" fill="white"/>
|
||||
<path d="M639.127 79.487L647.412 40.1044C647.525 39.537 647.639 39.1208 647.639 38.7803C647.639 37.4562 646.768 37.0023 644.877 37.0023H621.838C619.643 37.0023 618.735 37.6454 618.206 40.1044L609.921 79.487H594.485L611.245 0.305713H626.68L620.4 29.7386C623.275 25.5393 626.566 23.6856 631.333 23.6856H655.81C661.75 23.6856 664.398 25.5771 664.398 30.4196C664.398 31.6302 664.284 33.0678 663.944 34.6189L654.448 79.487H639.127Z" fill="white"/>
|
||||
<path d="M24.8853 51.8125L0.00537109 51.8388L27.1447 9.06742H52.0509L24.8853 51.8125Z" fill="#FFC143"/>
|
||||
<path d="M88.3331 21.4679H53.1282L61.0099 9.06735H96.2148L88.3331 21.4679Z" fill="white"/>
|
||||
<path d="M95.7944 48.8436H60.5894L69.9161 34.1311H105.147L95.7944 48.8436Z" fill="white"/>
|
||||
<path d="M71.9656 73.9075H44.6423L52.524 61.507H79.8473L71.9656 73.9075Z" fill="white"/>
|
||||
<path d="M25.857 89.4869H0.977173L36.1295 34.1311H61.0094L25.857 89.4869Z" fill="#FFC143"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 4.9 KiB |
BIN
reports/unstructured-security/assets/unstructured_logo.jpg
Normal file
BIN
reports/unstructured-security/assets/unstructured_logo.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 153 KiB |
3
reports/unstructured-security/plotly-cloud.toml
Normal file
3
reports/unstructured-security/plotly-cloud.toml
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
name = "unstructured-security-audit"
|
||||
app_id = "6632c2f8-0bed-40c2-9380-0425f89d6d1c"
|
||||
app_url = "19727fbf-a6a0-45ac-968f-680035ab6b3b"
|
||||
8
reports/unstructured-security/pyproject.toml
Normal file
8
reports/unstructured-security/pyproject.toml
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
[project]
|
||||
name = "unstructured-security-audit"
|
||||
version = "0.1.0"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"dash[cloud]>=4.1",
|
||||
"plotly>=6.7",
|
||||
]
|
||||
783
reports/unstructured-security/security_data.json
Normal file
783
reports/unstructured-security/security_data.json
Normal file
|
|
@ -0,0 +1,783 @@
|
|||
{
|
||||
"core_product_base": "https://github.com/Unstructured-IO/core-product/pull",
|
||||
"github_workflows_base": "https://github.com/Unstructured-IO/github-workflows/pull",
|
||||
"platform_libs_base": "https://github.com/Unstructured-IO/platform-libs/pull",
|
||||
"unstructured_base": "https://github.com/Unstructured-IO/unstructured/pull",
|
||||
"unstructured_inference_base": "https://github.com/Unstructured-IO/unstructured-inference/pull",
|
||||
"audit_metadata": {
|
||||
"date": "2026-04-16",
|
||||
"repos_audited": 8,
|
||||
"repos": [
|
||||
"core-product",
|
||||
"github-workflows",
|
||||
"platform-libs",
|
||||
"unstructured",
|
||||
"unstructured-inference",
|
||||
"unstructured-od-models",
|
||||
"unstructured-python-client",
|
||||
"unstructured.pytesseract"
|
||||
],
|
||||
"workflow_files_audited": 69,
|
||||
"dockerfiles_audited": 6,
|
||||
"action_references_audited": 446,
|
||||
"categories_audited": [
|
||||
"Supply Chain",
|
||||
"CI/CD Security",
|
||||
"Docker/Container Security",
|
||||
"Code-Level Vulnerabilities",
|
||||
"Secrets Management",
|
||||
"Build Reproducibility",
|
||||
"Vulnerability Management",
|
||||
"PKI/Cryptography"
|
||||
]
|
||||
},
|
||||
"findings": [
|
||||
{
|
||||
"id": "SEC-001",
|
||||
"title": "Lockfile Bypass via uv pip install in Docker Builds",
|
||||
"severity": "critical",
|
||||
"category": "Supply Chain",
|
||||
"status": "partially-fixed",
|
||||
"fixed_by": "Unstructured (PR #1465)",
|
||||
"repo": "core-product",
|
||||
"duration_exposed": "~2 months (core path); ongoing (residual)",
|
||||
"description": "Core-product's uv migration used `uv pip install` for ALL Docker build dependencies, completely bypassing the lockfile. Unstructured fixed the primary dependency tree by switching to `uv sync --locked` (PR #1465). However, `uv pip install` is still used in Docker builds for 3 package groups outside the main lockfile: inference proprietary deps (resolved from a separate pyproject.toml at build time), upstream packages installed with --no-deps, and custom OpenCV FIPS-compliant wheels. Additionally, 6+ CI workflow steps and 2 Makefile targets still use `uv pip install` for tooling (pip, setuptools, wheel, types-requests, boto3, awscli, ruff).",
|
||||
"impact": [
|
||||
"FIXED: Main dependency tree now uses uv sync --locked — builds are reproducible for core packages",
|
||||
"REMAINING: Inference deps (unstructured_inference_prop/pyproject.toml) are resolved at build time, not from a lockfile — different builds can get different inference dependency versions",
|
||||
"REMAINING: 6+ CI workflow steps install packages via uv pip install outside any lockfile (ci.yml:67,282,309,370; daily-metric.yml:136; process-render-only-files.yml:125)",
|
||||
"REMAINING: Makefile install target uses uv pip install --no-deps for upstream versions (line 36)",
|
||||
"torch CUDA variant issue is resolved; original 15+ workflow patterns reduced but not eliminated"
|
||||
],
|
||||
"context": "The critical path (production Docker image core dependencies) is fixed. The remaining uv pip install usage is a reduced but real risk — inference dependencies in Docker builds are still resolved non-deterministically, and CI tooling versions are unpinned."
|
||||
},
|
||||
{
|
||||
"id": "SEC-002",
|
||||
"title": "CVE Remediation Not Reaching Production",
|
||||
"severity": "high",
|
||||
"category": "Vulnerability Management",
|
||||
"status": "partially-fixed",
|
||||
"fixed_by": "Unstructured (PRs #1423, #1434, #1437, #1465)",
|
||||
"repo": "core-product",
|
||||
"duration_exposed": "Unknown (core path fixed); ongoing (inference deps)",
|
||||
"description": "Because of the lockfile bypass (SEC-001), CVE remediation PRs merged into main never changed what was installed in production Docker images. The core dependency tree now uses uv sync --locked, so CVE fixes to packages in uv.lock DO reach production. However, inference dependencies installed via `uv pip install -r unstructured_inference_prop/pyproject.toml` are still resolved at build time — CVE fixes to inference dependencies (torch, transformers, onnxruntime, etc.) may still not deploy as expected.",
|
||||
"impact": [
|
||||
"FIXED: CVE remediation for packages in the main uv.lock now reaches production images",
|
||||
"REMAINING: CVE fixes to inference dependencies (installed from unstructured_inference_prop/pyproject.toml without a lockfile constraint) are resolved at build time — Renovate/Dependabot fixes may be overridden by pip resolution",
|
||||
"REMAINING: CI workflow dependencies (boto3, awscli, types-requests) are installed without version pins — vulnerable versions could be pulled",
|
||||
"Original issues (CVE-2026-28351 pypdf, PR #1423, #1434, #1437) are resolved"
|
||||
],
|
||||
"context": "The core fix works. The residual risk is narrower — limited to inference deps and CI tooling — but inference packages (torch, transformers, onnxruntime) are high-value targets for supply chain attacks."
|
||||
},
|
||||
{
|
||||
"id": "SEC-003",
|
||||
"title": "pip.conf Bypass in CI Workflows",
|
||||
"severity": "medium",
|
||||
"category": "Build Pipeline",
|
||||
"status": "fixed",
|
||||
"fixed_by": "Codeflash (PR #361) — incidental fix during uv workspace migration",
|
||||
"repo": "github-workflows",
|
||||
"description": "The shared github-workflows configure-pypi step writes a .venv/pip.conf at the work-dir level, but in uv workspace mode the .venv is at the repo root. This caused private Azure DevOps PyPI index credentials to be written to the wrong location.",
|
||||
"impact": [
|
||||
"Internal packages (utic-instrumentation, unstructured-prompts) failed to resolve in uv workspace mode",
|
||||
"Workaround required environment variable credential injection",
|
||||
"Blocked uv workspace migration for platform-libs"
|
||||
],
|
||||
"context": "Discovered while building the uv workspace POC for platform-libs."
|
||||
},
|
||||
{
|
||||
"id": "SEC-004",
|
||||
"title": "No uv Version Pinning Across Repos",
|
||||
"severity": "medium",
|
||||
"category": "Build Reproducibility",
|
||||
"status": "open",
|
||||
"repo": "org-wide",
|
||||
"duration_exposed": "Ongoing",
|
||||
"description": "None of Unstructured's 7 repos use required-version in [tool.uv]. CI uses setup-uv versions ranging from v5 to v7 across repos. Different uv versions can produce different lockfile resolutions.",
|
||||
"impact": [
|
||||
"Different developers on different uv versions can produce different lockfiles",
|
||||
"CI uses setup-uv versions ranging from v5 to v7 — inconsistent resolution behavior",
|
||||
"No .gitattributes or documented process for handling uv.lock merge conflicts (main repo has a 1.4 MB lockfile)",
|
||||
"Version constraint mismatch: unstructured-inference requires Python >=3.12,<3.13 while unstructured allows >=3.11"
|
||||
],
|
||||
"context": "Creates a vector for lockfile divergence. When two developers generate different lockfiles, one may inadvertently introduce a vulnerable dependency version."
|
||||
},
|
||||
{
|
||||
"id": "SEC-005",
|
||||
"title": "uv pip Resolution Differences (Dependency Confusion)",
|
||||
"severity": "medium",
|
||||
"category": "Supply Chain",
|
||||
"status": "open",
|
||||
"repo": "unstructured",
|
||||
"description": "numba>=0.60.0 was resolved by uv to version 0.53.1 (Python <3.10 only), while pip correctly picked 0.63.1 (Python 3.12 compatible). The resolution behavior differences between uv and pip are a real risk during migration.",
|
||||
"impact": [
|
||||
"Wrong package version installed silently — no error, just incorrect behavior",
|
||||
"uv's resolution algorithm differs from pip in edge cases involving Python version markers",
|
||||
"Could lead to installing packages with known vulnerabilities if the wrong version is selected"
|
||||
],
|
||||
"context": "Discovered during the unstructured repo's uv migration. This class of bug is subtle and hard to catch."
|
||||
},
|
||||
{
|
||||
"id": "SEC-006",
|
||||
"title": "Socket Security Org-Level Ruleset",
|
||||
"severity": "info",
|
||||
"category": "Positive Control",
|
||||
"status": "active",
|
||||
"repo": "org-wide",
|
||||
"description": "Unstructured-IO has an org-level GitHub ruleset (ID 14342252) requiring the Socket Security GitHub App to scan all PRs for supply chain security issues.",
|
||||
"impact": [
|
||||
"All PRs across the org are scanned for dependency supply chain issues",
|
||||
"Cannot be disabled at the repo level — enforced org-wide",
|
||||
"Catches new vulnerable dependencies before they reach main"
|
||||
],
|
||||
"context": "Effective control. However, it only covers new PRs — does not retroactively scan existing dependencies, and cannot catch the lockfile bypass issue (SEC-001) since uv pip install doesn't change the lockfile."
|
||||
},
|
||||
{
|
||||
"id": "SEC-007",
|
||||
"title": "Claude Code Action Triggered by Any GitHub Commenter",
|
||||
"severity": "critical",
|
||||
"category": "CI/CD Security",
|
||||
"status": "open",
|
||||
"repo": "org-wide (5 repos)",
|
||||
"files": [
|
||||
"unstructured/.github/workflows/claude.yml",
|
||||
"core-product/.github/workflows/claude.yml",
|
||||
"unstructured-inference/.github/workflows/claude.yml",
|
||||
"unstructured-python-client/.github/workflows/claude.yml",
|
||||
"platform-libs/.github/workflows/claude.yaml"
|
||||
],
|
||||
"description": "The Claude Code Action triggers on `issue_comment` for ANY user who comments `@claude` on any issue or PR. The workflow receives the ANTHROPIC_API_KEY secret and runs Claude with Bash tool access — a prompt injection and secret exfiltration vector.",
|
||||
"impact": [
|
||||
"Any GitHub user can trigger the workflow by commenting @claude on any public issue/PR",
|
||||
"The GH_ANTHROPIC_API_KEY secret is exposed to the workflow environment",
|
||||
"Claude follows attacker-controlled instructions from the comment body",
|
||||
"Prompt injection + secret exfiltration vector",
|
||||
"No actor/association check limits who can trigger it"
|
||||
],
|
||||
"context": "Present in all 5 repos with Claude Code Action. Permissions are read-only which limits blast radius, but the API key is still exposed."
|
||||
},
|
||||
{
|
||||
"id": "SEC-008",
|
||||
"title": "Workflow Injection via Unsanitized release_name Input",
|
||||
"severity": "critical",
|
||||
"category": "CI/CD Security",
|
||||
"status": "open",
|
||||
"repo": "github-workflows",
|
||||
"files": ["github-workflows/.github/workflows/create-release.yml:155"],
|
||||
"description": "The `release_name` workflow input is a free-text string injected directly into a `run:` shell block without sanitization. The workflow also has access to a GitHub App private key token (UTIC_GITHUB_CICD_TOKEN_GENERATOR_PRIVATE_KEY), making this a secret exfiltration vector via crafted release names.",
|
||||
"impact": [
|
||||
"Any user with write access can inject arbitrary shell commands via a crafted release name",
|
||||
"GitHub App private key accessible in the same workflow context",
|
||||
"When called via workflow_call, bump_type becomes a string type with no validation (workflow_dispatch constrains it to a choice)",
|
||||
"Shared workflow used by multiple repos — blast radius extends across the org"
|
||||
],
|
||||
"context": "Classic workflow injection pattern — direct ${{ }} interpolation of user input in run: blocks."
|
||||
},
|
||||
{
|
||||
"id": "SEC-009",
|
||||
"title": "pip install from Third-Party Chinese PyPI Mirror in Docker Build",
|
||||
"severity": "critical",
|
||||
"category": "Supply Chain",
|
||||
"status": "open",
|
||||
"repo": "core-product",
|
||||
"files": ["core-product/triton-server/Dockerfile:20"],
|
||||
"description": "The triton-server Dockerfile installs paddlepaddle-gpu from https://pypi.tuna.tsinghua.edu.cn/simple — a third-party Chinese university mirror instead of official PyPI. No hash verification is performed on the downloaded package.",
|
||||
"impact": [
|
||||
"Third-party mirror could serve modified/trojanized packages without detection",
|
||||
"No --require-hashes flag — no integrity verification",
|
||||
"Triton server processes sensitive customer documents in regulated industries",
|
||||
"A compromised mirror is a single point of supply chain failure"
|
||||
],
|
||||
"context": "Supply chain risk for a document processing pipeline handling HIPAA, SOC 2, PCI-DSS regulated data."
|
||||
},
|
||||
{
|
||||
"id": "SEC-010",
|
||||
"title": "Private PyPI Credentials Persisted in GitHub Actions Cache",
|
||||
"severity": "critical",
|
||||
"category": "Secrets Management",
|
||||
"status": "open",
|
||||
"repo": "github-workflows",
|
||||
"files": ["github-workflows/.github/workflows/setup-legacy-environment.yml:84-101"],
|
||||
"description": "PRIVATE_PYPI_INDEX_URL (containing embedded Azure DevOps password in the URL) is written to .venv/pip.conf, then the entire .venv/ directory is cached by actions/cache. The credential persists in the GitHub Actions cache across workflow runs.",
|
||||
"impact": [
|
||||
"Azure DevOps PAT/token persists beyond the workflow run in the actions cache",
|
||||
"Any subsequent workflow run that restores this cache can read the credential",
|
||||
"Credential potentially accessible to fork PR workflows if cache key matches",
|
||||
"Violates the principle that secrets should be ephemeral"
|
||||
],
|
||||
"context": "The credential is an Azure DevOps access token for the private unstructured package index."
|
||||
},
|
||||
{
|
||||
"id": "SEC-011",
|
||||
"title": "ECR Password Passed as Unmasked Job Output",
|
||||
"severity": "critical",
|
||||
"category": "Secrets Management",
|
||||
"status": "open",
|
||||
"repo": "core-product",
|
||||
"files": [
|
||||
"core-product/.github/workflows/ci.yml:198",
|
||||
"core-product/.github/workflows/daily-metric.yml:94"
|
||||
],
|
||||
"description": "AWS ECR login password (temporary auth token) is written to $GITHUB_OUTPUT without masking, then consumed by downstream jobs. The token appears in plaintext in workflow logs visible to anyone with repo read access.",
|
||||
"impact": [
|
||||
"ECR tokens grant pull/push access to all private container images in the registry",
|
||||
"Visible in plaintext in workflow logs",
|
||||
"Short-lived (~12 hours) but fully exploitable within that window",
|
||||
"Token passed across job boundaries in cleartext via GITHUB_OUTPUT"
|
||||
],
|
||||
"context": "ECR contains production container images for the document processing pipeline."
|
||||
},
|
||||
{
|
||||
"id": "SEC-012",
|
||||
"title": "Three Docker Images Run as Root",
|
||||
"severity": "critical",
|
||||
"category": "Docker/Container Security",
|
||||
"status": "open",
|
||||
"repo": "core-product, github-workflows",
|
||||
"files": [
|
||||
"core-product/triton-server/Dockerfile (no USER directive anywhere)",
|
||||
"core-product/unstructured-api/Dockerfile.ci:40 (USER root as final directive)",
|
||||
"github-workflows/utic-build-tools/Dockerfile (no USER in runtime stage)"
|
||||
],
|
||||
"description": "Three Docker images run as root by default: the triton ML inference server, the CI build image, and the org-wide build tools image. A container escape or application vulnerability gives the attacker root on the host.",
|
||||
"impact": [
|
||||
"Container escape gives attacker root on the host (depending on runtime config)",
|
||||
"Triton server processes customer documents — high-value target for data exfiltration",
|
||||
"CI image with root access is a prime vector for supply chain attacks",
|
||||
"Build tools image is used by all repos — root there compromises the entire CI pipeline"
|
||||
],
|
||||
"context": "Critical for a company processing HIPAA/SOC 2 regulated data. Container isolation is a key defense layer."
|
||||
},
|
||||
{
|
||||
"id": "SEC-013",
|
||||
"title": "API Key Leaked in Error Response",
|
||||
"severity": "high",
|
||||
"category": "Code-Level Vulnerability",
|
||||
"status": "open",
|
||||
"repo": "core-product",
|
||||
"files": ["core-product/unstructured-api/prepline_general/api/general.py:691"],
|
||||
"description": "When an invalid API key is submitted, the server echoes the submitted key back in the error response body. If this response is logged by any intermediary (proxy, WAF, load balancer, SIEM) or shown to end users, the attempted key is exposed.",
|
||||
"impact": [
|
||||
"Submitted API keys visible in error responses and downstream logs",
|
||||
"HIPAA/SOC 2 compliance violation — credentials should never appear in logs or responses",
|
||||
"Enables key enumeration and information leakage attacks",
|
||||
"Any logging or monitoring system that captures 401 responses will store the key"
|
||||
],
|
||||
"context": "The API key comparison also uses timing-unsafe != operator (see SEC-029)."
|
||||
},
|
||||
{
|
||||
"id": "SEC-014",
|
||||
"title": "Environment Variable Injection via User-Controlled OCR Agent",
|
||||
"severity": "high",
|
||||
"category": "Code-Level Vulnerability",
|
||||
"status": "open",
|
||||
"repo": "core-product",
|
||||
"files": ["core-product/unstructured-api/prepline_general/api/general.py:180-181"],
|
||||
"description": "The ocr_agent parameter comes from user form input with no validation and is set directly as os.environ['OCR_AGENT']. While table_ocr_agent has a validation gate, the ocr_agent parameter does NOT. Additionally, os.environ is process-global and not thread-safe — concurrent requests create a race condition where one request's OCR agent setting affects another request's processing.",
|
||||
"impact": [
|
||||
"Attacker can set OCR_AGENT to an arbitrary Python module path",
|
||||
"Process-global os.environ creates race conditions between concurrent requests",
|
||||
"One request's OCR agent setting can silently affect another customer's document processing",
|
||||
"Both a security vulnerability and a reliability/data-isolation concern"
|
||||
],
|
||||
"context": "The race condition means this is both a security and reliability issue affecting multi-tenant document processing."
|
||||
},
|
||||
{
|
||||
"id": "SEC-015",
|
||||
"title": "SSRF via URL Parameters in Document Partitioning",
|
||||
"severity": "high",
|
||||
"category": "Code-Level Vulnerability",
|
||||
"status": "open",
|
||||
"repo": "unstructured",
|
||||
"files": [
|
||||
"unstructured/unstructured/partition/auto.py:310",
|
||||
"unstructured/unstructured/partition/html/partition.py:161",
|
||||
"unstructured/unstructured/partition/md.py:97"
|
||||
],
|
||||
"description": "partition(), partition_html(), and partition_md() accept user-controlled URLs and fetch them without any validation against internal/private IP ranges. An attacker can reach cloud metadata endpoints, internal services, or credential stores.",
|
||||
"impact": [
|
||||
"Access to cloud metadata endpoints (169.254.169.254) — exposes AWS/Azure credentials",
|
||||
"Internal service enumeration and data exfiltration",
|
||||
"md.py:97 is worst case — no timeout, no SSL verification option, completely unconstrained HTTP fetch",
|
||||
"Cloud environments (Azure VMs, AWS) expose sensitive credentials via metadata endpoints"
|
||||
],
|
||||
"context": "Unstructured runs on Azure VMs and AWS infrastructure where metadata endpoints expose IAM credentials, subscription IDs, and other sensitive configuration."
|
||||
},
|
||||
{
|
||||
"id": "SEC-016",
|
||||
"title": "Gzip Decompression Bomb — No Size Limit",
|
||||
"severity": "high",
|
||||
"category": "Code-Level Vulnerability",
|
||||
"status": "open",
|
||||
"repo": "core-product",
|
||||
"files": ["core-product/unstructured-api/prepline_general/api/general.py:526-528"],
|
||||
"description": "A malicious user can upload a gzip bomb — a small compressed file (e.g., 10KB) that decompresses to gigabytes or terabytes. There is no limit on the decompressed size, and SpooledTemporaryFile has no max_size set. A single request can exhaust server memory/disk, causing denial of service against all users.",
|
||||
"impact": [
|
||||
"Denial of service via a single malicious request",
|
||||
"Server memory and disk exhaustion affects ALL users on the shared infrastructure",
|
||||
".gz files are explicitly supported by the API — trivially exploitable",
|
||||
"No rate limiting or size cap on decompression"
|
||||
],
|
||||
"context": "Trivially exploitable since .gz is an accepted upload format. A 42KB gzip bomb can decompress to 5.5 GB."
|
||||
},
|
||||
{
|
||||
"id": "SEC-017",
|
||||
"title": "Insecure Randomness for PKI Certificate Serial Numbers",
|
||||
"severity": "high",
|
||||
"category": "PKI/Cryptography",
|
||||
"status": "open",
|
||||
"repo": "platform-libs",
|
||||
"files": ["platform-libs/libs/pki/manual_scripts/root-management/sign_with_azure.py:43"],
|
||||
"description": "Certificate serial numbers use Python's `random` module (Mersenne Twister PRNG) which is deterministic and predictable. Certificate serials MUST be unpredictable per RFC 5280 and CA/Browser Forum Baseline Requirements. This is a PKI root management script — compromise here undermines the entire certificate trust chain.",
|
||||
"impact": [
|
||||
"Predictable certificate serial numbers — violates RFC 5280",
|
||||
"Violates CA/Browser Forum Baseline Requirements",
|
||||
"An attacker who observes serial numbers can predict future ones",
|
||||
"Predictable serials have been used in real attacks (e.g., 2008 Debian OpenSSL vulnerability)"
|
||||
],
|
||||
"context": "The code comment on line 39-41 explicitly acknowledges timing attack concerns but uses the non-cryptographic random module anyway."
|
||||
},
|
||||
{
|
||||
"id": "SEC-018",
|
||||
"title": "12 CI Workflows Missing permissions: Declaration",
|
||||
"severity": "high",
|
||||
"category": "CI/CD Security",
|
||||
"status": "open",
|
||||
"repo": "org-wide",
|
||||
"files": [
|
||||
"core-product/.github/workflows/docker-publish.yml",
|
||||
"core-product/.github/workflows/daily-metric.yml",
|
||||
"core-product/.github/workflows/ci-infra.yml",
|
||||
"core-product/.github/workflows/process-render-only-files.yml",
|
||||
"core-product/.github/workflows/triton.yml",
|
||||
"unstructured/.github/workflows/docker-publish.yml",
|
||||
"unstructured/.github/workflows/release-version-alert.yml",
|
||||
"unstructured/.github/workflows/codeflash.yml",
|
||||
"unstructured-od-models/.github/workflows/ci.yml",
|
||||
"unstructured-od-models/.github/workflows/release.yml",
|
||||
"unstructured-inference/.github/workflows/create_issue.yml",
|
||||
"unstructured.pytesseract/.github/workflows/ci.yaml"
|
||||
],
|
||||
"description": "12 workflow files lack explicit permissions: declarations, inheriting the org/repo default (often write-all). Several are triggered by pull_request events from untrusted forks, meaning the write-scoped token could be abused.",
|
||||
"impact": [
|
||||
"GITHUB_TOKEN may get unnecessary write access to contents, packages, pull-requests, issues",
|
||||
"Violates principle of least privilege",
|
||||
"Increases blast radius of any workflow vulnerability",
|
||||
"Several are PR-triggered — untrusted code runs with elevated permissions"
|
||||
],
|
||||
"context": "GitHub's security guidance recommends explicit minimal permissions on every workflow."
|
||||
},
|
||||
{
|
||||
"id": "SEC-019",
|
||||
"title": "secrets: inherit Passes All Secrets to Reusable Workflows",
|
||||
"severity": "high",
|
||||
"category": "CI/CD Security",
|
||||
"status": "open",
|
||||
"repo": "org-wide (15+ instances)",
|
||||
"files": [
|
||||
"core-product/.github/workflows/publish.yml:195,205",
|
||||
"platform-libs/.github/workflows/ci.yml:142,155,176,185",
|
||||
"unstructured-od-models/.github/workflows/ci.yml:39,46",
|
||||
"github-workflows/.github/workflows/build.yml:158,177,192,205",
|
||||
"github-workflows/.github/workflows/auto-release-on-merge.yml:112"
|
||||
],
|
||||
"description": "`secrets: inherit` passes ALL repository secrets to called workflows. If any reusable workflow (particularly from the shared github-workflows repo) is compromised or has a vulnerability, every secret in the calling repo is exposed.",
|
||||
"impact": [
|
||||
"Full secret exposure if any reusable workflow is compromised: AWS credentials, ACR passwords, Chainguard tokens, PyPI keys, Slack tokens, HuggingFace tokens, GitHub App private keys",
|
||||
"Violates principle of least privilege for secret access",
|
||||
"15+ instances across multiple repos",
|
||||
"Single compromised reusable workflow = full org secret breach"
|
||||
],
|
||||
"context": "The shared github-workflows repo is the central CI infrastructure — it's the highest-value target."
|
||||
},
|
||||
{
|
||||
"id": "SEC-020",
|
||||
"title": "Dependency Confusion via extra-index-url for Private PyPI",
|
||||
"severity": "high",
|
||||
"category": "Supply Chain",
|
||||
"status": "open",
|
||||
"repo": "core-product, github-workflows",
|
||||
"files": [
|
||||
"github-workflows/.github/actions/configure-pypi/action.yml",
|
||||
"github-workflows/.github/actions/configure-pypi-oidc/action.yml",
|
||||
"core-product/.github/workflows/ci.yml (15+ instances)"
|
||||
],
|
||||
"description": "Private packages use extra-index-url which checks BOTH public PyPI and the private Azure DevOps index, selecting the highest version. 13 private package names (utic-crypto, utic-instrumentation, utic-metrics, etc.) are unclaimed on public PyPI — an attacker could register them with higher version numbers to hijack installs.",
|
||||
"impact": [
|
||||
"Attacker registers utic-* on public PyPI with higher version number → installs malicious package instead of the real one",
|
||||
"13 private package names are unclaimed and vulnerable to squatting",
|
||||
"Only utic-public-types exists on public PyPI (legitimately published by Unstructured)",
|
||||
"Affects all CI builds and Docker builds that use extra-index-url"
|
||||
],
|
||||
"context": "platform-libs already mitigates this with explicit = true on its private index. The pattern exists but isn't applied consistently."
|
||||
},
|
||||
{
|
||||
"id": "SEC-021",
|
||||
"title": "97% of GitHub Action References Use Mutable Pins",
|
||||
"severity": "high",
|
||||
"category": "Supply Chain",
|
||||
"status": "open",
|
||||
"repo": "org-wide",
|
||||
"files": ["All workflow files across 8 repos (446 action references audited)"],
|
||||
"description": "Of 446 action references audited org-wide: 147 SHA-pinned (33%), 203 tag-pinned (45.5%), 96 branch-pinned (21.5%). Independent VM verification of core-product (latest main) shows only 2 of 96 action references are SHA-pinned (2.1%) — 97% use mutable tags or branches.",
|
||||
"impact": [
|
||||
"Compromised upstream action silently executes malicious code in CI with access to all secrets",
|
||||
"ludeeus/action-shellcheck@master — individual developer's repo, ~400 stars, pinned to master branch",
|
||||
"anthropics/claude-code-action@beta — mutable branch pin used across 5 repos",
|
||||
"core-product has 94 mutable action references (97%) with access to 25+ distinct secrets including AWS, Azure, PyPI, HuggingFace, Anthropic, and Slack credentials"
|
||||
],
|
||||
"context": "github-workflows repo is best at 60.6% SHA-pinned. Renovate is configured with helpers:pinGitHubActionDigests but only in the shared repo."
|
||||
},
|
||||
{
|
||||
"id": "SEC-022",
|
||||
"title": "Self-Hosted Runners Used for PR-Triggered Workflows",
|
||||
"severity": "high",
|
||||
"category": "CI/CD Security",
|
||||
"status": "open",
|
||||
"repo": "core-product",
|
||||
"files": [
|
||||
"core-product/.github/workflows/ci.yml:42,76,89,105,202,254,313,414",
|
||||
"core-product/.github/workflows/daily-metric.yml:22,98,254",
|
||||
"core-product/.github/workflows/docker-publish.yml:84,187,250"
|
||||
],
|
||||
"description": "CI workflows triggered by pull_request events run on self-hosted-xlarge runners. Self-hosted runners persist state between runs (Docker images, caches, credentials on disk, environment variables). An attacker opening a PR can execute arbitrary code on the runner.",
|
||||
"impact": [
|
||||
"Arbitrary code execution on persistent runner infrastructure via a malicious PR",
|
||||
"Access to secrets and credentials left on disk by previous workflow runs",
|
||||
"Potential to install persistent backdoors on the runner",
|
||||
"Lateral movement to AWS/Azure resources since runners have cloud credentials configured"
|
||||
],
|
||||
"context": "The self-hosted-xlarge label may correspond to GitHub-managed larger runners (lower risk) or traditional persistent runners (high risk). Needs verification."
|
||||
},
|
||||
{
|
||||
"id": "SEC-023",
|
||||
"title": "Unpinned :latest Base Images in Production Dockerfiles",
|
||||
"severity": "high",
|
||||
"category": "Docker/Container Security",
|
||||
"status": "open",
|
||||
"repo": "unstructured, unstructured-inference",
|
||||
"files": [
|
||||
"unstructured/Dockerfile:1 (cgr.dev/chainguard/wolfi-base:latest)",
|
||||
"unstructured/Dockerfile:55 (ghcr.io/astral-sh/uv:latest)",
|
||||
"unstructured-inference/Dockerfile:11 (ghcr.io/astral-sh/uv:latest)"
|
||||
],
|
||||
"description": "Production Dockerfiles use :latest tags for base images. Builds are non-reproducible — a compromised or buggy upstream image silently enters the supply chain. Docker-compose files also use :latest for infrastructure services (prometheus, grafana, minio, kafka, etc.).",
|
||||
"impact": [
|
||||
"Non-reproducible builds — same Dockerfile produces different images at different times",
|
||||
"Compromised upstream image silently enters the supply chain",
|
||||
"Chainguard wolfi-base and uv form the foundation of production images",
|
||||
":latest tags in docker-compose affect infrastructure services (prometheus, grafana, minio)"
|
||||
],
|
||||
"context": "Chainguard and Astral (uv) are reputable providers, but :latest is still a mutable reference that could be compromised or accidentally broken."
|
||||
},
|
||||
{
|
||||
"id": "SEC-024",
|
||||
"title": "Unquoted Secret Expansion in Shell Commands",
|
||||
"severity": "high",
|
||||
"category": "Secrets Management",
|
||||
"status": "open",
|
||||
"repo": "core-product",
|
||||
"files": [
|
||||
"core-product/.github/workflows/ci.yml:184",
|
||||
"core-product/.github/workflows/docker-publish.yml:152"
|
||||
],
|
||||
"description": "PRIVATE_PYPI_INDEX_URL is expanded without quotes via ${{ secrets.PRIVATE_PYPI_INDEX_URL }} directly in a run: block. GitHub Actions performs string interpolation before the shell sees it — if the secret contains shell metacharacters, it enables command injection. The extracted password may also appear unmasked in logs since it doesn't match the original secret value.",
|
||||
"impact": [
|
||||
"Potential command injection if secret value contains shell metacharacters",
|
||||
"Password extracted by sed may not match GitHub's known secret value — appears unmasked in logs",
|
||||
"Process args visible in /proc on self-hosted runners",
|
||||
"Inconsistent — the correct pattern is used in an adjacent step in the same file"
|
||||
],
|
||||
"context": "The correct pattern (env: indirection) is already used at line 178 in the same workflow — this is an inconsistency."
|
||||
},
|
||||
{
|
||||
"id": "SEC-025",
|
||||
"title": "Private PyPI URL Written to Disk on Self-Hosted Runners",
|
||||
"severity": "high",
|
||||
"category": "Secrets Management",
|
||||
"status": "open",
|
||||
"repo": "core-product",
|
||||
"files": [
|
||||
"core-product/.github/workflows/ci.yml:131,224,277,337",
|
||||
"core-product/.github/workflows/daily-metric.yml:58,130",
|
||||
"core-product/.github/workflows/docker-publish.yml:111,224",
|
||||
"core-product/.github/workflows/triton.yml:65"
|
||||
],
|
||||
"description": "The full PRIVATE_PYPI_INDEX_URL (with embedded Azure DevOps credentials in user:TOKEN@host format) is written to ~/.pip/unstructured.conf on the runner filesystem. On self-hosted runners (which core-product uses extensively), this file persists between jobs without cleanup.",
|
||||
"impact": [
|
||||
"Azure DevOps credentials persist on self-hosted runner disk between workflow jobs",
|
||||
"15+ instances of this pattern across core-product workflows",
|
||||
"URL contains embedded user:TOKEN@host credentials in plaintext",
|
||||
"Combined with SEC-022 (self-hosted runner risk), credentials from previous runs are accessible to subsequent PR workflows"
|
||||
],
|
||||
"context": "This compounds with the self-hosted runner issue (SEC-022) — a PR author's code runs on a runner that has credentials from previous builds on disk."
|
||||
},
|
||||
{
|
||||
"id": "SEC-026",
|
||||
"title": "curl Piped to Shell for Poetry Install in Build Tools",
|
||||
"severity": "medium",
|
||||
"category": "Docker/Container Security",
|
||||
"status": "open",
|
||||
"repo": "github-workflows",
|
||||
"files": ["github-workflows/utic-build-tools/Dockerfile:7"],
|
||||
"description": "Poetry is installed by piping curl output directly to python3. A DNS hijack, MITM, or compromised server delivers arbitrary code execution. The -s flag silences errors, hiding download failures.",
|
||||
"impact": [
|
||||
"Arbitrary code execution from a compromised download source",
|
||||
"No checksum verification on the downloaded installer",
|
||||
"-s flag hides download failures — silent compromise",
|
||||
"Build-tools image is used across all CI — compromising it affects every repo"
|
||||
],
|
||||
"context": "The build-tools image is the foundation of CI across the org."
|
||||
},
|
||||
{
|
||||
"id": "SEC-027",
|
||||
"title": "Python 3.9 EOL Base Image in Build Tools",
|
||||
"severity": "medium",
|
||||
"category": "Docker/Container Security",
|
||||
"status": "open",
|
||||
"repo": "github-workflows",
|
||||
"files": ["github-workflows/utic-build-tools/Dockerfile:2,24"],
|
||||
"description": "Both builder and runtime stages use python:3.9-slim. Python 3.9 reached end-of-life in October 2025 — no more security patches are being issued.",
|
||||
"impact": [
|
||||
"No security patches for Python 3.9 vulnerabilities (CVEs go unpatched)",
|
||||
"Both builder and runtime stages affected",
|
||||
"Build tools image is shared across all CI pipelines"
|
||||
],
|
||||
"context": "Python 3.9 EOL was October 2025. Current supported versions are 3.11, 3.12, 3.13."
|
||||
},
|
||||
{
|
||||
"id": "SEC-028",
|
||||
"title": "HuggingFace Token Residual Risk in Docker Image",
|
||||
"severity": "medium",
|
||||
"category": "Secrets Management",
|
||||
"status": "open",
|
||||
"repo": "core-product",
|
||||
"files": ["core-product/unstructured-api/Dockerfile:83-90"],
|
||||
"description": "While BuildKit --mount=type=secret is used correctly for the HF token, HuggingFace libraries may cache the token to disk during model downloads. The preload script cleans ~/.cache/huggingface/token but other cache paths may be missed. The app-start.sh script looks for ~/hf_token at runtime, implying deployment paths where the token file may be baked into the image.",
|
||||
"impact": [
|
||||
"HF token may persist in image layers via library-created cache directories",
|
||||
"Internal ticket CORE-4302 acknowledges this as a known issue",
|
||||
"Marketplace images (where external users access the image) are the highest risk",
|
||||
"Token provides access to gated models and potentially private model repos"
|
||||
],
|
||||
"context": "BuildKit secret mounting is done correctly — the risk is from HuggingFace library behavior caching tokens in unexpected locations during model downloads."
|
||||
},
|
||||
{
|
||||
"id": "SEC-029",
|
||||
"title": "Timing-Unsafe API Key Comparison",
|
||||
"severity": "medium",
|
||||
"category": "Code-Level Vulnerability",
|
||||
"status": "open",
|
||||
"repo": "core-product",
|
||||
"files": ["core-product/unstructured-api/prepline_general/api/general.py:689"],
|
||||
"description": "API key validation uses Python's != operator which performs byte-by-byte comparison that short-circuits on the first mismatch. An attacker can statistically determine the correct API key one character at a time by measuring response time differences across many requests.",
|
||||
"impact": [
|
||||
"Timing side-channel enables API key extraction",
|
||||
"Especially exploitable on low-latency internal networks",
|
||||
"Fully automatable — requires many requests but no special access",
|
||||
"Combined with SEC-013, provides two independent key extraction vectors"
|
||||
],
|
||||
"context": "Standard mitigation is constant-time comparison, which Python provides in the hmac module."
|
||||
},
|
||||
{
|
||||
"id": "SEC-030",
|
||||
"title": "SHA-1 Used for Subject Key Identifier in PKI Tool",
|
||||
"severity": "medium",
|
||||
"category": "PKI/Cryptography",
|
||||
"status": "open",
|
||||
"repo": "platform-libs",
|
||||
"files": ["platform-libs/libs/pki/manual_scripts/root-management/sign_with_azure.py:72-74"],
|
||||
"description": "SHA-1 is used to hash the public key for the Subject Key Identifier extension. SHA-1 is cryptographically broken for collision resistance (SHAttered attack, 2017). Modern PKI guidance from NIST and the CA/Browser Forum recommends SHA-256.",
|
||||
"impact": [
|
||||
"May fail compliance audits that require modern cryptographic algorithms",
|
||||
"SHA-1 collision attacks are practical since 2017",
|
||||
"Poor practice for a newly-built CA root management tool"
|
||||
],
|
||||
"context": "RFC 5280 historically specified SHA-1 for SKI, but modern standards recommend SHA-256."
|
||||
},
|
||||
{
|
||||
"id": "SEC-031",
|
||||
"title": "28+ Unbounded Dependencies in ML Packages",
|
||||
"severity": "medium",
|
||||
"category": "Supply Chain",
|
||||
"status": "open",
|
||||
"repo": "unstructured-inference, unstructured-od-models",
|
||||
"files": [
|
||||
"unstructured-inference/pyproject.toml (28 unbounded)",
|
||||
"unstructured-od-models/pyproject.toml (17 unbounded)"
|
||||
],
|
||||
"description": "unstructured-inference has 28 dependencies with >= but no upper bound (e.g., torch>=2.10.0, transformers>=4.25.1, numpy>=1.26.0). unstructured-od-models has 17. The lockfile mitigates for uv sync, but anyone who pip-installs these packages gets unbounded resolution.",
|
||||
"impact": [
|
||||
"pip install gets unbounded dependency resolution — new major versions pulled in automatically",
|
||||
"Major version bumps could introduce breaking changes or vulnerabilities",
|
||||
"45 total unbounded deps across the two ML packages",
|
||||
"Contrast: the main unstructured repo correctly uses >=X, <Y ranges"
|
||||
],
|
||||
"context": "The lockfile mitigates for internal builds, but published packages are installed by external users without the lockfile."
|
||||
},
|
||||
{
|
||||
"id": "SEC-032",
|
||||
"title": "Socket.dev Security Check is Non-Blocking",
|
||||
"severity": "medium",
|
||||
"category": "Supply Chain",
|
||||
"status": "open",
|
||||
"repo": "github-workflows",
|
||||
"files": ["github-workflows/.github/workflows/security.yaml"],
|
||||
"description": "The Socket.dev security check uses continue-on-error: true, meaning security scan failures don't block the build. A PR introducing a known-vulnerable dependency can still merge even if Socket flags it.",
|
||||
"impact": [
|
||||
"Security scanning failures are silently ignored in CI",
|
||||
"PRs with vulnerable dependencies can merge without manual review of the scan",
|
||||
"Defeats the purpose of automated security scanning",
|
||||
"Partially mitigated by the org-level Socket ruleset (SEC-006)"
|
||||
],
|
||||
"context": "The org-level Socket ruleset may provide a separate blocking check, but the CI-level check explicitly allows failures."
|
||||
},
|
||||
{
|
||||
"id": "SEC-033",
|
||||
"title": "Committed RSA Private Key in Test Directory",
|
||||
"severity": "medium",
|
||||
"category": "Secrets Management",
|
||||
"status": "open",
|
||||
"repo": "platform-libs",
|
||||
"files": ["platform-libs/libs/storage/blob_storage_adapters/tests/pki/key.pem"],
|
||||
"description": "A 2048-bit RSA private key is committed to the repository. While it's a self-signed certificate for localhost, committed private keys are permanently in git history and set a precedent for credential handling.",
|
||||
"impact": [
|
||||
"Private key is permanently in git history — cannot be fully removed",
|
||||
"Sets a bad precedent for credential handling across the org",
|
||||
"If test infrastructure is reachable beyond localhost, enables MITM attacks"
|
||||
],
|
||||
"context": "The corresponding certificate is self-signed for CN=localhost, O=unstructured."
|
||||
},
|
||||
{
|
||||
"id": "SEC-034",
|
||||
"title": "version-bump.yml Grants contents: write on pull_request",
|
||||
"severity": "medium",
|
||||
"category": "CI/CD Security",
|
||||
"status": "open",
|
||||
"repo": "core-product, unstructured-inference",
|
||||
"files": [
|
||||
"core-product/.github/workflows/version-bump.yml:8-10",
|
||||
"unstructured-inference/.github/workflows/version-bump.yml:8-10"
|
||||
],
|
||||
"description": "Workflows triggered on pull_request grant contents: write at the workflow level. The job-level if: limits execution to Renovate bot PRs, but GitHub evaluates permissions before the if: condition. For same-repo branch PRs, the full write token is available. Also calls an external workflow pinned to @main (mutable).",
|
||||
"impact": [
|
||||
"Write-scoped GITHUB_TOKEN available for all same-repo branch PRs",
|
||||
"Permissions evaluated before job if: condition — the filter doesn't prevent token generation",
|
||||
"External workflow referenced at @main — mutable pin"
|
||||
],
|
||||
"context": "For forked PRs, GitHub auto-downgrades to read-only, limiting the blast radius."
|
||||
},
|
||||
{
|
||||
"id": "SEC-035",
|
||||
"title": "Command Injection via Pulumi Resource ID in Minikube Provider",
|
||||
"severity": "low",
|
||||
"category": "Code-Level Vulnerability",
|
||||
"status": "open",
|
||||
"repo": "platform-libs",
|
||||
"files": ["platform-libs/libs/cloud_abstractions/minikube/utic_cloud_provider_minikube/components/kubernetes.py:61-62,127,133-134"],
|
||||
"description": "exec_shell() splits f-strings on whitespace for subprocess.Popen. A Pulumi resource ID containing spaces could cause unintended argument injection, potentially leading to unintended cluster operations like deleting all minikube profiles.",
|
||||
"impact": [
|
||||
"Unintended cluster operations via crafted resource IDs",
|
||||
"Limited to local minikube dev environments",
|
||||
"Requires crafted Pulumi state"
|
||||
],
|
||||
"context": "Low severity — minikube is local-dev only."
|
||||
},
|
||||
{
|
||||
"id": "SEC-036",
|
||||
"title": "shell=True in Release Script with Derived Paths",
|
||||
"severity": "low",
|
||||
"category": "Code-Level Vulnerability",
|
||||
"status": "open",
|
||||
"repo": "platform-libs",
|
||||
"files": ["platform-libs/scripts/release.py:54-59"],
|
||||
"description": "The release script uses shell=True with an f-string containing a project name derived from git diff output. A maliciously-named directory under libs/ could inject shell commands. The command chain includes twine upload, making it a supply-chain risk if exploited.",
|
||||
"impact": [
|
||||
"Shell command injection via maliciously-named directory",
|
||||
"twine upload is in the command chain — could publish malicious packages",
|
||||
"Requires write access to the repo to create the directory"
|
||||
],
|
||||
"context": "Low probability but high impact if exploited — the release pipeline is the most sensitive part of any supply chain."
|
||||
},
|
||||
{
|
||||
"id": "SEC-037",
|
||||
"title": ".gitignore Missing Critical Patterns Across All Repos",
|
||||
"severity": "low",
|
||||
"category": "Secrets Management",
|
||||
"status": "open",
|
||||
"repo": "org-wide",
|
||||
"files": ["All 8 repos' .gitignore files"],
|
||||
"description": "All repos are missing gitignore patterns for: *.pem, *.key, *.p12, pip.conf, .pypirc, .netrc, credentials.json, service-account.json. The github-workflows repo is missing .env entirely (minimal .gitignore with only 6 lines).",
|
||||
"impact": [
|
||||
"Accidental commit of cryptographic material or credential files",
|
||||
"Defense-in-depth failure — gitignore is a safety net",
|
||||
"SEC-033 (committed private key) demonstrates the real risk"
|
||||
],
|
||||
"context": "No additional leaks found from these gaps beyond SEC-033, but the gap is a persistent risk."
|
||||
},
|
||||
{
|
||||
"id": "SEC-038",
|
||||
"title": "No HEALTHCHECK in Any Production Dockerfile",
|
||||
"severity": "low",
|
||||
"category": "Docker/Container Security",
|
||||
"status": "open",
|
||||
"repo": "org-wide",
|
||||
"files": ["All 6 Dockerfiles"],
|
||||
"description": "No Dockerfiles include HEALTHCHECK directives. Without healthchecks, container orchestrators (Kubernetes, Docker Swarm) cannot distinguish a running-but-unhealthy container from a healthy one. A hung process continues receiving traffic.",
|
||||
"impact": [
|
||||
"Hung or deadlocked containers continue receiving customer traffic",
|
||||
"Slower incident detection and recovery",
|
||||
"Kubernetes readiness probes may be configured separately, but the Dockerfile is the canonical definition"
|
||||
],
|
||||
"context": "Some docker-compose files have healthchecks for auxiliary services, but the production Dockerfiles (API, triton) do not."
|
||||
},
|
||||
{
|
||||
"id": "SEC-039",
|
||||
"title": "No CodeQL/SAST Coverage for 7 of 8 Repos",
|
||||
"severity": "low",
|
||||
"category": "Vulnerability Management",
|
||||
"status": "open",
|
||||
"repo": "org-wide",
|
||||
"files": ["Only unstructured/.github/workflows/codeql-analysis.yml exists"],
|
||||
"description": "GitHub CodeQL static analysis is only configured for the main unstructured repo. The other 7 repos — including core-product (the API that processes sensitive documents) and platform-libs (PKI, secrets management, cloud abstractions) — have no static application security testing.",
|
||||
"impact": [
|
||||
"Code-level vulnerabilities go undetected by automated tools in 7 of 8 repos",
|
||||
"core-product (the public-facing API) and platform-libs (PKI/secrets) are the highest-risk repos without SAST",
|
||||
"Anchore/Grype covers container image scanning but not source code analysis"
|
||||
],
|
||||
"context": "The findings in this audit (SEC-013 through SEC-017) demonstrate the kinds of vulnerabilities SAST would catch."
|
||||
}
|
||||
],
|
||||
"summary": {
|
||||
"total_findings": 39,
|
||||
"critical": 7,
|
||||
"high": 13,
|
||||
"medium": 10,
|
||||
"low": 5,
|
||||
"info": 1,
|
||||
"positive_controls": 1,
|
||||
"fixed_by_codeflash": 0,
|
||||
"open": 33,
|
||||
"active_controls": 1,
|
||||
"by_category": {
|
||||
"Supply Chain": 7,
|
||||
"CI/CD Security": 7,
|
||||
"Docker/Container Security": 5,
|
||||
"Code-Level Vulnerability": 6,
|
||||
"Secrets Management": 7,
|
||||
"Build Reproducibility": 1,
|
||||
"Vulnerability Management": 2,
|
||||
"PKI/Cryptography": 2,
|
||||
"Build Pipeline": 1,
|
||||
"Positive Control": 1
|
||||
},
|
||||
"by_repo": {
|
||||
"org-wide": 10,
|
||||
"core-product": 12,
|
||||
"github-workflows": 7,
|
||||
"unstructured": 3,
|
||||
"platform-libs": 5,
|
||||
"unstructured-inference": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
80
reports/unstructured-security/theme.py
Normal file
80
reports/unstructured-security/theme.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
"""Theme and styling constants for the Unstructured x Codeflash engagement report."""
|
||||
|
||||
# ── Colors (Codeflash dark - amber/zinc) ────────────────────────────────────
|
||||
ACCENT = "#ffd227"
|
||||
DARK = "#09090b"
|
||||
CARD_BG = "rgba(16,20,28,0.7)" # dark navy/70 — readable over animated bg
|
||||
CARD_BORDER = "rgba(63,63,70,0.35)" # zinc-700/35
|
||||
SLATE = "#e4e4e7"
|
||||
GRAY = "#a1a1aa"
|
||||
LIGHT_GRAY = "#71717a"
|
||||
BG = "#0d1117" # dark navy (codeflash.ai style)
|
||||
WHITE = "#fafafa"
|
||||
GREEN = "#4ade80"
|
||||
LIGHT_GREEN = "rgba(74,222,128,0.12)"
|
||||
RED = "#f87171"
|
||||
LIGHT_RED = "rgba(248,113,113,0.12)"
|
||||
AMBER = "#fbbf24"
|
||||
BLUE = "#60a5fa"
|
||||
PURPLE = "#a78bfa"
|
||||
PINK = "#f472b6"
|
||||
|
||||
# ── Grid overlay ────────────────────────────────────────────────────────────
|
||||
# Matches the roadmap page's subtle grid pattern.
|
||||
GRID_BG_IMAGE = (
|
||||
"linear-gradient(to right, currentColor 1px, transparent 1px),"
|
||||
"linear-gradient(to bottom, currentColor 1px, transparent 1px)"
|
||||
)
|
||||
GRID_BG_SIZE = "48px 48px"
|
||||
GRID_OVERLAY = {
|
||||
"position": "fixed",
|
||||
"top": 0,
|
||||
"left": 0,
|
||||
"right": 0,
|
||||
"bottom": 0,
|
||||
"backgroundImage": GRID_BG_IMAGE,
|
||||
"backgroundSize": GRID_BG_SIZE,
|
||||
"opacity": "0.05",
|
||||
"pointerEvents": "none",
|
||||
"zIndex": "0",
|
||||
}
|
||||
|
||||
# ── Component styles ────────────────────────────────────────────────────────
|
||||
CARD = {
|
||||
"background": CARD_BG,
|
||||
"borderRadius": "16px",
|
||||
"padding": "28px 32px",
|
||||
"border": f"1px solid {CARD_BORDER}",
|
||||
}
|
||||
FONT = "'Inter', system-ui, -apple-system, sans-serif"
|
||||
MONO = "'JetBrains Mono', 'Menlo', monospace"
|
||||
|
||||
# ── Table styles ────────────────────────────────────────────────────────────
|
||||
TABLE_HEADER: dict[str, str] = {
|
||||
"backgroundColor": "rgba(24,24,27,0.8)",
|
||||
"color": ACCENT,
|
||||
"fontWeight": "600",
|
||||
"fontSize": "13px",
|
||||
"padding": "12px 16px",
|
||||
"borderBottom": f"1px solid {CARD_BORDER}",
|
||||
}
|
||||
TABLE_CELL: dict[str, str] = {
|
||||
"textAlign": "left",
|
||||
"padding": "12px 16px",
|
||||
"fontSize": "13px",
|
||||
"fontFamily": FONT,
|
||||
"border": "none",
|
||||
"color": SLATE,
|
||||
}
|
||||
TABLE_DATA: dict[str, str] = {
|
||||
"backgroundColor": "rgba(24,24,27,0.5)",
|
||||
"color": SLATE,
|
||||
}
|
||||
TABLE_DATA_CONDITIONAL: list[dict[str, object]] = [
|
||||
{"if": {"row_index": "odd"}, "backgroundColor": "rgba(31,31,35,0.6)"}
|
||||
]
|
||||
TABLE_WRAP: dict[str, str] = {
|
||||
"borderRadius": "16px",
|
||||
"overflow": "hidden",
|
||||
"border": f"1px solid {CARD_BORDER}",
|
||||
}
|
||||
|
|
@ -413,18 +413,18 @@
|
|||
},
|
||||
{
|
||||
"id": "SEC-021",
|
||||
"title": "67% of GitHub Action References Use Mutable Pins",
|
||||
"title": "97% of GitHub Action References Use Mutable Pins",
|
||||
"severity": "high",
|
||||
"category": "Supply Chain",
|
||||
"status": "open",
|
||||
"repo": "org-wide",
|
||||
"files": ["All workflow files across 8 repos (446 action references audited)"],
|
||||
"description": "Of 446 action references audited: 147 SHA-pinned (33%), 203 tag-pinned (45.5%), 96 branch-pinned (21.5%). Core-product (the main production repo with secrets access) is at only 2.2% SHA-pinned with 87 tag-pinned actions.",
|
||||
"description": "Of 446 action references audited org-wide: 147 SHA-pinned (33%), 203 tag-pinned (45.5%), 96 branch-pinned (21.5%). Independent VM verification of core-product (latest main) shows only 2 of 96 action references are SHA-pinned (2.1%) — 97% use mutable tags or branches.",
|
||||
"impact": [
|
||||
"Compromised upstream action silently executes malicious code in CI with access to all secrets",
|
||||
"ludeeus/action-shellcheck@master — individual developer's repo, ~400 stars, pinned to master branch",
|
||||
"anthropics/claude-code-action@beta — mutable branch pin used across 5 repos",
|
||||
"core-product has 87 tag-pinned actions with access to AWS, Azure, PyPI, HuggingFace, and Slack secrets"
|
||||
"core-product has 94 mutable action references (97%) with access to 25+ distinct secrets including AWS, Azure, PyPI, HuggingFace, Anthropic, and Slack credentials"
|
||||
],
|
||||
"context": "github-workflows repo is best at 60.6% SHA-pinned. Renovate is configured with helpers:pinGitHubActionDigests but only in the shared repo."
|
||||
},
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ from theme import (
|
|||
FONT,
|
||||
GRAY,
|
||||
GREEN,
|
||||
GRID_OVERLAY,
|
||||
LIGHT_GRAY,
|
||||
LIGHT_RED,
|
||||
MONO,
|
||||
|
|
@ -59,10 +58,10 @@ SEVERITY_BG = {
|
|||
}
|
||||
|
||||
STATUS_COLORS = {
|
||||
"fixed": GREEN,
|
||||
"partially-fixed": "#f97316",
|
||||
"open": AMBER,
|
||||
"active": BLUE,
|
||||
"fixed": "#34d399", # emerald-400 (distinct from severity green)
|
||||
"partially-fixed": "#c084fc", # purple-400 (avoids orange/amber overlap)
|
||||
"open": "#f87171", # red-400 (open = still a problem)
|
||||
"active": "#22d3ee", # cyan-400 (distinct from severity blue)
|
||||
}
|
||||
|
||||
_TAB_BTN_STYLE = {
|
||||
|
|
@ -120,9 +119,25 @@ def card(children, **kw):
|
|||
return html.Div(children, style=style)
|
||||
|
||||
|
||||
SEVERITY_ICONS = {
|
||||
"critical": "\u25cf", # filled circle
|
||||
"high": "\u25b2", # filled triangle
|
||||
"medium": "\u25c6", # filled diamond
|
||||
"low": "\u25cb", # open circle
|
||||
"info": "\u2139", # info symbol
|
||||
}
|
||||
|
||||
|
||||
def severity_badge(severity):
|
||||
icon = SEVERITY_ICONS.get(severity, "")
|
||||
return html.Span(
|
||||
severity.upper(),
|
||||
[
|
||||
html.Span(
|
||||
icon,
|
||||
style={"marginRight": "5px", "fontSize": "9px"},
|
||||
),
|
||||
severity.upper(),
|
||||
],
|
||||
style={
|
||||
"fontSize": "11px",
|
||||
"fontWeight": "700",
|
||||
|
|
@ -131,6 +146,8 @@ def severity_badge(severity):
|
|||
"padding": "3px 10px",
|
||||
"borderRadius": "999px",
|
||||
"letterSpacing": "0.05em",
|
||||
"display": "inline-flex",
|
||||
"alignItems": "center",
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -336,26 +353,28 @@ def finding_card(f):
|
|||
sev = f["severity"]
|
||||
border_color = SEVERITY_COLORS.get(sev, GRAY)
|
||||
|
||||
# Header: ID + title + badges
|
||||
# Header: title first (F-pattern), ID secondary, badges right
|
||||
header = html.Div(
|
||||
[
|
||||
html.Div(
|
||||
[
|
||||
html.Span(
|
||||
f["id"],
|
||||
style={
|
||||
"fontFamily": MONO,
|
||||
"fontSize": "13px",
|
||||
"color": LIGHT_GRAY,
|
||||
"marginRight": "12px",
|
||||
},
|
||||
),
|
||||
html.Span(
|
||||
html.Div(
|
||||
f["title"],
|
||||
style={
|
||||
"fontWeight": "700",
|
||||
"color": SLATE,
|
||||
"fontSize": "16px",
|
||||
"fontSize": "17px",
|
||||
"lineHeight": "1.3",
|
||||
},
|
||||
),
|
||||
html.Span(
|
||||
f["id"],
|
||||
style={
|
||||
"fontFamily": MONO,
|
||||
"fontSize": "12px",
|
||||
"color": LIGHT_GRAY,
|
||||
"marginTop": "4px",
|
||||
"display": "block",
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
@ -375,7 +394,7 @@ def finding_card(f):
|
|||
],
|
||||
style={
|
||||
"display": "flex",
|
||||
"alignItems": "center",
|
||||
"alignItems": "flex-start",
|
||||
"gap": "16px",
|
||||
"marginBottom": "16px",
|
||||
"flexWrap": "wrap",
|
||||
|
|
@ -557,11 +576,38 @@ def finding_card(f):
|
|||
|
||||
|
||||
# ── Findings split by severity ──────────────────────────────────────────────
|
||||
# Order categories by what matters most to the audience: supply chain and
|
||||
# container security work they're already doing, then CI/CD, secrets, code.
|
||||
_CATEGORY_PRIORITY = {
|
||||
"Supply Chain": 0,
|
||||
"Docker/Container Security": 1,
|
||||
"CI/CD Security": 2,
|
||||
"Secrets Management": 3,
|
||||
"Code-Level Vulnerability": 4,
|
||||
"PKI/Cryptography": 5,
|
||||
"Vulnerability Management": 6,
|
||||
"Build Reproducibility": 7,
|
||||
"Build Pipeline": 8,
|
||||
"Positive Control": 9,
|
||||
}
|
||||
_SEV_PRIORITY = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
|
||||
|
||||
_CRITICAL_HIGH = [f for f in FINDINGS if f["severity"] in ("critical", "high")]
|
||||
_MEDIUM_LOW = [
|
||||
f for f in FINDINGS if f["severity"] not in ("critical", "high")
|
||||
]
|
||||
|
||||
def _finding_sort_key(f):
|
||||
return (
|
||||
_SEV_PRIORITY.get(f["severity"], 9),
|
||||
_CATEGORY_PRIORITY.get(f["category"], 9),
|
||||
)
|
||||
|
||||
|
||||
_CRITICAL_HIGH = sorted(
|
||||
[f for f in FINDINGS if f["severity"] in ("critical", "high")],
|
||||
key=_finding_sort_key,
|
||||
)
|
||||
_MEDIUM_LOW = sorted(
|
||||
[f for f in FINDINGS if f["severity"] not in ("critical", "high")],
|
||||
key=_finding_sort_key,
|
||||
)
|
||||
|
||||
|
||||
# ── Tab content builders ────────────────────────────────────────────────────
|
||||
|
|
@ -578,17 +624,18 @@ def _build_summary_tab():
|
|||
[
|
||||
html.P(
|
||||
[
|
||||
"We conducted a comprehensive security audit across all ",
|
||||
"We audited all ",
|
||||
html.Span(
|
||||
"8 Unstructured repositories",
|
||||
style={"fontWeight": "700", "color": SLATE},
|
||||
),
|
||||
f", examining {AUDIT_META.get('workflow_files_audited', 69)} CI/CD workflow files, "
|
||||
f" end-to-end: {AUDIT_META.get('workflow_files_audited', 69)} CI/CD workflows, "
|
||||
f"{AUDIT_META.get('dockerfiles_audited', 6)} Dockerfiles, "
|
||||
f"and {AUDIT_META.get('action_references_audited', 446)} GitHub Action references. "
|
||||
"The audit covered supply chain integrity, CI/CD pipeline security, "
|
||||
"container hardening, code-level vulnerabilities, secrets management, "
|
||||
"and PKI/cryptography.",
|
||||
"The scope covers the same layers you're already hardening \u2014 "
|
||||
"supply chain integrity, container security, CI/CD pipeline permissions, "
|
||||
"and dependency management \u2014 plus code-level vulnerabilities, "
|
||||
"secrets handling, and PKI.",
|
||||
],
|
||||
style={
|
||||
"color": GRAY,
|
||||
|
|
@ -599,7 +646,7 @@ def _build_summary_tab():
|
|||
),
|
||||
html.P(
|
||||
[
|
||||
"We identified ",
|
||||
"We found ",
|
||||
html.Span(
|
||||
f"{SUMMARY['total_findings']} findings",
|
||||
style={"fontWeight": "700", "color": SLATE},
|
||||
|
|
@ -617,14 +664,11 @@ def _build_summary_tab():
|
|||
"color": "#f97316",
|
||||
},
|
||||
),
|
||||
" severity issues across every layer of the stack. ",
|
||||
html.Span(
|
||||
f"{SUMMARY['open']} open findings",
|
||||
style={"fontWeight": "700", "color": AMBER},
|
||||
),
|
||||
" span every layer of the stack \u2014 from Docker images running as root "
|
||||
"to API endpoints leaking credentials in error responses to CI workflows "
|
||||
"executable by any GitHub user.",
|
||||
" severity issues. The lockfile-bypass pattern you fixed in core-product "
|
||||
"(PR #1465) still persists in CI steps and Makefiles across the org. "
|
||||
"Supply chain, container, and CI/CD injection issues account for the "
|
||||
"majority \u2014 the same categories where Renovate and Anchore give you "
|
||||
"visibility but not full coverage.",
|
||||
],
|
||||
style={
|
||||
"color": GRAY,
|
||||
|
|
@ -634,9 +678,11 @@ def _build_summary_tab():
|
|||
},
|
||||
),
|
||||
html.P(
|
||||
"For a platform that processes sensitive documents (PDFs, emails, financial records, "
|
||||
"medical documents) for customers in regulated industries (HIPAA, SOC 2, PCI-DSS), "
|
||||
"these findings represent material risk to both compliance posture and customer trust.",
|
||||
[
|
||||
"For a platform processing sensitive documents across regulated industries "
|
||||
"(HIPAA, SOC 2, PCI-DSS), these gaps are material \u2014 especially ahead "
|
||||
"of the next NCC Group assessment cycle.",
|
||||
],
|
||||
style={
|
||||
"color": GRAY,
|
||||
"fontSize": "15px",
|
||||
|
|
@ -683,42 +729,50 @@ def _build_summary_tab():
|
|||
style={
|
||||
"marginTop": "56px",
|
||||
"textAlign": "center",
|
||||
"padding": "48px 24px",
|
||||
"background": f"linear-gradient(135deg, rgba(239,68,68,0.05) 0%, {CARD_BG} 50%, rgba(239,68,68,0.05) 100%)",
|
||||
"padding": "56px 32px",
|
||||
"background": f"linear-gradient(135deg, rgba(239,68,68,0.06) 0%, {CARD_BG} 40%, {CARD_BG} 60%, rgba(239,68,68,0.06) 100%)",
|
||||
"borderRadius": "16px",
|
||||
"border": f"1px solid {CARD_BORDER}",
|
||||
},
|
||||
children=[
|
||||
# Loss-framed percentage anchor (Change #4)
|
||||
html.Div(
|
||||
f"{SUMMARY['open']}",
|
||||
f"{round(SUMMARY['open'] / SUMMARY['total_findings'] * 100)}%",
|
||||
style={
|
||||
"fontSize": "64px",
|
||||
"fontSize": "72px",
|
||||
"fontWeight": "800",
|
||||
"color": RED,
|
||||
"lineHeight": "1",
|
||||
},
|
||||
),
|
||||
html.Div(
|
||||
"Open Findings",
|
||||
"of findings remain unresolved",
|
||||
style={
|
||||
"fontSize": "20px",
|
||||
"fontWeight": "700",
|
||||
"fontWeight": "600",
|
||||
"color": SLATE,
|
||||
"marginTop": "8px",
|
||||
},
|
||||
),
|
||||
html.Div(
|
||||
f"{SUMMARY['open']} of {SUMMARY['total_findings']} security issues across {len(SUMMARY.get('by_repo', {}))} repositories",
|
||||
style={
|
||||
"fontSize": "14px",
|
||||
"fontFamily": MONO,
|
||||
"color": LIGHT_GRAY,
|
||||
"marginTop": "8px",
|
||||
},
|
||||
),
|
||||
# Urgency line (loss framing)
|
||||
html.P(
|
||||
[
|
||||
"We have remediation plans ready for all open issues \u2014 ",
|
||||
"spanning CI/CD hardening, container security, supply chain integrity, "
|
||||
"API-level fixes, and PKI modernization.",
|
||||
],
|
||||
"These span supply chain, CI/CD injection, container, and secrets management "
|
||||
"\u2014 the same categories where one engineer can't cover everything alone.",
|
||||
style={
|
||||
"color": GRAY,
|
||||
"fontSize": "16px",
|
||||
"fontSize": "15px",
|
||||
"lineHeight": "1.7",
|
||||
"margin": "16px auto 0",
|
||||
"maxWidth": "600px",
|
||||
"margin": "24px auto 0",
|
||||
"maxWidth": "560px",
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
@ -736,7 +790,7 @@ def _build_critical_high_tab():
|
|||
children=[
|
||||
section(
|
||||
"Critical & High Findings",
|
||||
f"{n} findings requiring immediate attention",
|
||||
f"{n} findings \u2014 supply chain, container, CI/CD, and secrets",
|
||||
),
|
||||
*[finding_card(f) for f in _CRITICAL_HIGH],
|
||||
],
|
||||
|
|
@ -819,14 +873,149 @@ app.index_string = """<!DOCTYPE html>
|
|||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&family=JetBrains+Mono:wght@400;600;700&display=swap" rel="stylesheet">
|
||||
{%favicon%}
|
||||
{%css%}
|
||||
<style>
|
||||
#lightspeed-canvas {
|
||||
position: fixed;
|
||||
top: 0; left: 0;
|
||||
width: 100%; height: 100%;
|
||||
pointer-events: none;
|
||||
z-index: 9999;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<canvas id="lightspeed-canvas"></canvas>
|
||||
{%app_entry%}
|
||||
<footer>
|
||||
{%config%}
|
||||
{%scripts%}
|
||||
{%renderer%}
|
||||
</footer>
|
||||
<script>
|
||||
(function() {
|
||||
var canvas = document.getElementById('lightspeed-canvas');
|
||||
var ctx = canvas.getContext('2d');
|
||||
var dpr = window.devicePixelRatio || 1;
|
||||
var W, H;
|
||||
|
||||
function resize() {
|
||||
W = window.innerWidth;
|
||||
H = window.innerHeight;
|
||||
canvas.width = W * dpr;
|
||||
canvas.height = H * dpr;
|
||||
canvas.style.width = W + 'px';
|
||||
canvas.style.height = H + 'px';
|
||||
ctx.setTransform(dpr, 0, 0, dpr, 0, 0);
|
||||
}
|
||||
resize();
|
||||
window.addEventListener('resize', resize);
|
||||
|
||||
// Palette: codeflash.ai dark navy blues + occasional amber
|
||||
var palette = [
|
||||
{r:50, g:65, b:95, w:5}, // dark slate-blue (dominant)
|
||||
{r:65, g:80, b:115, w:4}, // medium slate
|
||||
{r:80, g:100, b:140, w:3}, // blue-gray
|
||||
{r:45, g:55, b:80, w:3}, // deep navy
|
||||
{r:100, g:120, b:160, w:2}, // lighter steel
|
||||
{r:255, g:210, b:39, w:1}, // #ffd227 amber (rare accent)
|
||||
];
|
||||
var wc = [];
|
||||
for (var p = 0; p < palette.length; p++)
|
||||
for (var w = 0; w < palette[p].w; w++) wc.push(palette[p]);
|
||||
|
||||
var NUM = 45;
|
||||
var bars = [];
|
||||
|
||||
function makeBar(scatter) {
|
||||
var c = wc[Math.floor(Math.random() * wc.length)];
|
||||
|
||||
// Shallow angle like codeflash.ai: ~8-20 deg
|
||||
var angleDeg = 8 + Math.random() * 14;
|
||||
var angle = angleDeg * Math.PI / 180;
|
||||
|
||||
// Bar dimensions: long and rectangular
|
||||
var len = 120 + Math.random() * 500;
|
||||
var thickness = 3 + Math.random() * 10; // chunky bars
|
||||
var speed = 0.15 + Math.random() * 0.6; // slow drift
|
||||
|
||||
// Spawn off the left edge, scattered vertically
|
||||
var x = -len - Math.random() * 400;
|
||||
var y = Math.random() * H * 1.4 - H * 0.2;
|
||||
|
||||
if (scatter) {
|
||||
x = Math.random() * (W + len) - len * 0.5;
|
||||
y = Math.random() * H;
|
||||
}
|
||||
|
||||
return {
|
||||
x: x, y: y,
|
||||
angle: angle,
|
||||
speed: speed,
|
||||
len: len,
|
||||
thickness: thickness,
|
||||
color: c,
|
||||
alpha: 0.04 + Math.random() * 0.1,
|
||||
life: 0,
|
||||
maxLife: 800 + Math.random() * 2000
|
||||
};
|
||||
}
|
||||
|
||||
for (var i = 0; i < NUM; i++) bars.push(makeBar(true));
|
||||
|
||||
function draw() {
|
||||
ctx.clearRect(0, 0, W, H);
|
||||
|
||||
for (var i = 0; i < NUM; i++) {
|
||||
var b = bars[i];
|
||||
b.x += Math.cos(b.angle) * b.speed;
|
||||
b.y += Math.sin(b.angle) * b.speed;
|
||||
b.life++;
|
||||
|
||||
if (b.x > W + b.len * 2 || b.life > b.maxLife) {
|
||||
bars[i] = makeBar(false);
|
||||
b = bars[i];
|
||||
}
|
||||
|
||||
// Lifecycle fade
|
||||
var lp = b.life / b.maxLife;
|
||||
var fade = lp < 0.15 ? lp / 0.15 :
|
||||
lp > 0.7 ? (1 - lp) / 0.3 : 1;
|
||||
var a = b.alpha * Math.max(0, fade);
|
||||
|
||||
var cos = Math.cos(b.angle);
|
||||
var sin = Math.sin(b.angle);
|
||||
// Perpendicular offset for thickness
|
||||
var px = -sin * b.thickness * 0.5;
|
||||
var py = cos * b.thickness * 0.5;
|
||||
|
||||
// Four corners of the parallelogram bar
|
||||
var x0 = b.x;
|
||||
var y0 = b.y;
|
||||
var x1 = b.x + cos * b.len;
|
||||
var y1 = b.y + sin * b.len;
|
||||
|
||||
// Gradient along the bar length: fade ends, solid middle
|
||||
var grad = ctx.createLinearGradient(x0, y0, x1, y1);
|
||||
var cr = b.color.r, cg = b.color.g, cb = b.color.b;
|
||||
grad.addColorStop(0, 'rgba(' + cr + ',' + cg + ',' + cb + ',0)');
|
||||
grad.addColorStop(0.2, 'rgba(' + cr + ',' + cg + ',' + cb + ',' + a + ')');
|
||||
grad.addColorStop(0.8, 'rgba(' + cr + ',' + cg + ',' + cb + ',' + a + ')');
|
||||
grad.addColorStop(1, 'rgba(' + cr + ',' + cg + ',' + cb + ',0)');
|
||||
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(x0 + px, y0 + py);
|
||||
ctx.lineTo(x1 + px, y1 + py);
|
||||
ctx.lineTo(x1 - px, y1 - py);
|
||||
ctx.lineTo(x0 - px, y0 - py);
|
||||
ctx.closePath();
|
||||
ctx.fillStyle = grad;
|
||||
ctx.fill();
|
||||
}
|
||||
requestAnimationFrame(draw);
|
||||
}
|
||||
requestAnimationFrame(draw);
|
||||
})();
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
|
|
@ -838,12 +1027,10 @@ app.layout = html.Div(
|
|||
"position": "relative",
|
||||
},
|
||||
children=[
|
||||
# ── Grid overlay ──
|
||||
html.Div(style=GRID_OVERLAY),
|
||||
# ── Hero ──
|
||||
html.Div(
|
||||
style={
|
||||
"background": f"linear-gradient(135deg, {BG} 0%, #1a0f0f 50%, {BG} 100%)",
|
||||
"background": f"linear-gradient(135deg, {BG} 0%, #131a24 50%, {BG} 100%)",
|
||||
"padding": "60px 24px 52px",
|
||||
"textAlign": "center",
|
||||
"borderBottom": f"1px solid {CARD_BORDER}",
|
||||
|
|
|
|||
73
reports/unstructured/talking-points-lawrence.md
Normal file
73
reports/unstructured/talking-points-lawrence.md
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
# Talking Points: Lawrence Elitzer (Unstructured)
|
||||
|
||||
## Who is Lawrence
|
||||
|
||||
- **Lawrence Elitzer ("LoLo")** — sole security engineer at Unstructured since Nov 2025
|
||||
- **Background**: CACI (defense/intel cyber) -> LGS Innovations (Bell Labs network security) -> JumpCloud (EM/Director, Security) -> Unstructured
|
||||
- **GitHub**: `lawrence-u10d` — 100+ PRs since joining, extremely active
|
||||
- **Tools**: Uses Claude Code and Cursor daily. Already an AI-assisted security practitioner
|
||||
- **Style**: Deeply technical. Writes detailed PR descriptions with CVE tables, CVSS scores, version diffs, and test plans. Do NOT give him executive risk dashboards — he's the one writing the CVE tables
|
||||
|
||||
## What He's Actively Working On
|
||||
|
||||
These are his actual PRs from the last 6 weeks. Use them to demonstrate we understand his world:
|
||||
|
||||
1. **Supply chain lockdown** (his #1 priority)
|
||||
- Built custom OpenCV wheels from source with `WITH_FFMPEG=OFF` to eliminate 14 bundled ffmpeg CVEs (PR #4336)
|
||||
- SHA256-verified spaCy model downloads at runtime (PR #4258)
|
||||
- Deployed Renovate across all 8 repos for automated dep updates
|
||||
- Weekly CVE triage with structured dependency upgrade PRs across every repo
|
||||
|
||||
2. **Lockfile enforcement** — PR #1465 (`fix: make uv.lock authoritative for Docker builds`)
|
||||
- **This is SEC-001/SEC-002 from our audit.** He already found and fixed it for core-product
|
||||
- Key quote from his PR: "The lockfile is theater — uv.lock is committed and maintained but never consumed by the Docker build"
|
||||
- **Our angle**: "You fixed it in core-product, but `uv pip install` still shows up in 6+ CI steps and Makefiles across the org. Same pattern, different attack surface."
|
||||
|
||||
3. **Container security** — Chainguard/wolfi-base adoption, Anchore/Grype scanning
|
||||
- He **disabled Anchore fail-on-crit** (PR #4285) because it was blocking the pipeline
|
||||
- This means: he's overwhelmed by CVE noise, making pragmatic tradeoffs, needs help triaging
|
||||
|
||||
4. **CI/CD observability** — Built full OpenTelemetry instrumentation for CI pipelines with Honeycomb (PR #339)
|
||||
- Also hardened GHA workflows: moved `${{ inputs.* }}` from `run:` to `env:` to prevent injection
|
||||
- He already understands the CI/CD injection problem — show him the ones he hasn't found yet
|
||||
|
||||
5. **NCC Group compliance** — validation builds across repos indicate third-party pen test cycles
|
||||
|
||||
## How to Open the Conversation
|
||||
|
||||
**DO**: Reference his actual work. "We saw you made `uv.lock` authoritative for Docker builds in core-product — that's exactly the pattern we flagged in SEC-001. The same bypass still exists in 6 CI workflows and the Makefile."
|
||||
|
||||
**DO**: Frame as bandwidth, not education. "You've already deployed Renovate, Anchore, and Chainguard. The gap isn't knowledge — it's coverage across 8 repos with one engineer."
|
||||
|
||||
**DO**: Emphasize automation. "All fixes delivered as PRs that integrate with your existing workflows. No new tools to adopt."
|
||||
|
||||
**DON'T**: Explain what a supply chain attack is. He literally builds custom wheels from source with SHA256 verification.
|
||||
|
||||
**DON'T**: Use executive framing. No "risk scores" or "security posture" abstractions. Show CVEs, affected files, and dependency chains.
|
||||
|
||||
**DON'T**: Pitch tools he already has. He already runs Renovate, Anchore, Chainguard, Socket.dev. We're offering engineering bandwidth, not another scanner.
|
||||
|
||||
## Findings That Will Hit Hardest (in order)
|
||||
|
||||
| Finding | Why It Resonates |
|
||||
|---|---|
|
||||
| SEC-001: Lockfile bypass | He fixed this himself in core-product. We show it still exists elsewhere — proves we're thorough |
|
||||
| SEC-009: Chinese PyPI mirror | Supply chain is his obsession. A `pip install` hitting a third-party mirror in a Docker build is nightmare fuel |
|
||||
| SEC-010: Creds in Actions cache | He's doing automated dep upgrades — if the cache leaks private PyPI creds, his Renovate pipeline is a vector |
|
||||
| SEC-021: 67% mutable Action pins | He SHA256-verifies everything (spaCy wheels, otel-cli). Seeing 67% of Actions on mutable tags will sting |
|
||||
| SEC-012: Docker images run as root | He's doing Chainguard/wolfi adoption. Root containers undermine all that work |
|
||||
| SEC-007: Claude Code Action injection | He uses Claude Code himself. A workflow triggered by any GitHub commenter is a personal-stakes finding |
|
||||
|
||||
## The Core Pitch
|
||||
|
||||
> "You're one engineer doing the security work of a team across 8 repos. You've already built the right infrastructure — Renovate for deps, Anchore for containers, OTel for CI visibility. But there are 33 findings across layers that one person can't get to fast enough. We have scoped PRs ready for all of them, built to plug into the workflows you already run. No new tools, just the bandwidth you don't have."
|
||||
|
||||
## Anticipated Objections
|
||||
|
||||
| Objection | Response |
|
||||
|---|---|
|
||||
| "We already have Renovate/Anchore for this" | "Renovate catches published CVEs in direct deps. 7 of our findings are in CI workflow injection, secrets handling, and container config — layers Renovate doesn't touch." |
|
||||
| "I disabled Anchore fail-on-crit because of noise" | "Exactly. You're triaging noise when the real issues are deeper — unpinned Actions, secrets in cache, root containers. We cut through the noise to what matters." |
|
||||
| "These are all fixable, just haven't gotten to them" | "That's the point. You know what needs to happen. We're offering engineering time to make it happen, delivered as PRs in your existing workflow." |
|
||||
| "How do I know the fixes are good?" | "Every fix is a reviewed PR. You've already seen our work in the uv workspace migration (PR #361). Same quality, same workflow." |
|
||||
| "We're about to do NCC Group assessment" | "Perfect timing. Closing these 33 findings before the assessment means fewer findings in the report, less remediation pressure afterward." |
|
||||
|
|
@ -3,12 +3,12 @@
|
|||
# ── Colors (Codeflash dark - amber/zinc) ────────────────────────────────────
|
||||
ACCENT = "#ffd227"
|
||||
DARK = "#09090b"
|
||||
CARD_BG = "rgba(24,24,27,0.5)" # zinc-900/50
|
||||
CARD_BORDER = "rgba(63,63,70,0.4)" # zinc-700/40
|
||||
CARD_BG = "rgba(16,20,28,0.7)" # dark navy/70 — readable over animated bg
|
||||
CARD_BORDER = "rgba(63,63,70,0.35)" # zinc-700/35
|
||||
SLATE = "#e4e4e7"
|
||||
GRAY = "#a1a1aa"
|
||||
LIGHT_GRAY = "#71717a"
|
||||
BG = "#18181b" # zinc-900
|
||||
BG = "#0d1117" # dark navy (codeflash.ai style)
|
||||
WHITE = "#fafafa"
|
||||
GREEN = "#4ade80"
|
||||
LIGHT_GREEN = "rgba(74,222,128,0.12)"
|
||||
|
|
|
|||
16
uv.lock
16
uv.lock
|
|
@ -22,6 +22,7 @@ members = [
|
|||
"codeflash-service",
|
||||
"codeflash-workspace",
|
||||
"unstructured-report",
|
||||
"unstructured-security-audit",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -3078,6 +3079,21 @@ requires-dist = [
|
|||
{ name = "plotly", specifier = ">=6.7" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unstructured-security-audit"
|
||||
version = "0.1.0"
|
||||
source = { virtual = "reports/unstructured-security" }
|
||||
dependencies = [
|
||||
{ name = "dash", extra = ["cloud"] },
|
||||
{ name = "plotly" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "dash", extras = ["cloud"], specifier = ">=4.1" },
|
||||
{ name = "plotly", specifier = ">=6.7" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "2.6.3"
|
||||
|
|
|
|||
Loading…
Reference in a new issue