codeflash-agent/.codeflash/standups/generate.py

"""Pull GitHub PR data and standup notes into data.json for the Dash app."""

from __future__ import annotations

import functools
import json
import os
import re
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta, timezone
from pathlib import Path

import requests

REPOS = [
    "codeflash",
    "codeflash-internal",
    "codeflash-agent",
    "github-workflows",
]
ORG = "codeflash-ai"
NOTES_DIR = Path(__file__).parent / "notes"
DATA_FILE = Path(__file__).parent / "data.json"
REPO_ROOT = Path(__file__).parents[2]
CI_AUDIT_FILE = REPO_ROOT / "reports" / "codeflash-ci-audit" / "data.json"


@functools.lru_cache(maxsize=1)
def gh_token() -> str:
    token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN", "")
    if not token:
        import subprocess

        result = subprocess.run(
            ["gh", "auth", "token"],
            capture_output=True,
            text=True,
            check=False,
        )
        token = result.stdout.strip()
    return token


def gh_headers() -> dict[str, str]:
    return {
        "Authorization": f"token {gh_token()}",
        "Accept": "application/vnd.github+json",
    }


def _fetch_repo_prs(
    repo: str, state: str, headers: dict, since: datetime | None = None
) -> list[dict]:
    url = f"https://api.github.com/repos/{ORG}/{repo}/pulls"
    params: dict[str, str] = {
        "state": state,
        "per_page": "30",
        "sort": "updated",
        "direction": "desc",
    }
    resp = requests.get(url, headers=headers, params=params, timeout=15)
    if resp.status_code != 200:
        return []
    prs = []
    for pr in resp.json():
        updated = datetime.fromisoformat(
            pr["updated_at"].replace("Z", "+00:00")
        )
        if since and updated < since:
            continue
        prs.append(
            {
                "repo": repo,
                "number": pr["number"],
                "title": pr["title"],
                "state": pr["state"],
                "author": pr["user"]["login"],
                "url": pr["html_url"],
                "created_at": pr["created_at"],
                "updated_at": pr["updated_at"],
                "merged_at": pr.get("merged_at"),
                "draft": pr.get("draft", False),
            }
        )
    return prs


def fetch_prs(state: str, since: datetime | None = None) -> list[dict]:
    headers = gh_headers()
    with ThreadPoolExecutor(max_workers=len(REPOS)) as pool:
        futures = [
            pool.submit(_fetch_repo_prs, repo, state, headers, since)
            for repo in REPOS
        ]
    prs = []
    for f in futures:
        prs.extend(f.result())
    return prs


def parse_note(path: Path) -> dict:
    text = path.read_text(encoding="utf-8")
    sections: dict[str, list[str]] = {}
    current = None
    title = None
    in_code = False
    code_lines: list[str] = []
    for line in text.splitlines():
        if line.strip().startswith("```"):
            if in_code:
                if current:
                    sections[current].append("code:" + "\n".join(code_lines))
                code_lines = []
                in_code = False
            else:
                in_code = True
            continue
        if in_code:
            code_lines.append(line)
            continue
        h1 = re.match(r"^#\s+(.+)", line)
        if h1 and not title:
            title = h1.group(1).strip()
            continue
        heading = re.match(r"^##\s+(.+)", line)
        if heading:
            current = heading.group(1).strip().lower()
            sections[current] = []
        elif current and line.strip().startswith("- "):
            sections[current].append(line.strip().removeprefix("- "))
        elif current and line.strip():
            sections[current].append(line.strip())
    return {
        "date": path.stem,
        "title": title or path.stem,
        "sections": sections,
    }


def load_notes() -> list[dict]:
    if not NOTES_DIR.exists():
        return []
    notes = []
    for f in sorted(NOTES_DIR.glob("*.md"), reverse=True):
        notes.append(parse_note(f))
    return notes


def main():
    now = datetime.now(timezone.utc)
    week_ago = now - timedelta(days=7)

    open_prs = fetch_prs("open")
    closed_prs = fetch_prs("closed", since=week_ago)
    merged_prs = [pr for pr in closed_prs if pr["merged_at"]]
    notes = load_notes()

    ci_audit = None
    resolved = CI_AUDIT_FILE.resolve()
    if resolved.exists():
        ci_audit = json.loads(resolved.read_text(encoding="utf-8"))

    data = {
        "generated_at": now.isoformat(),
        "org": ORG,
        "repos": REPOS,
        "open_prs": open_prs,
        "merged_prs": merged_prs,
        "notes": notes,
        "summary": {
            "total_open": len(open_prs),
            "total_merged_7d": len(merged_prs),
            "draft_count": sum(1 for pr in open_prs if pr["draft"]),
            "repos_with_open_prs": len({pr["repo"] for pr in open_prs}),
        },
        "ci_audit": ci_audit,
    }

    DATA_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
    print(
        f"Wrote {DATA_FILE} ({len(open_prs)} open, {len(merged_prs)} merged)"
    )


if __name__ == "__main__":
    main()
chore: add standup dashboard with CI audit integration (#36) Dash app at .codeflash/standups/ for weekly eng meetings. Pulls live PR data across 4 org repos, renders markdown standup notes, integrates CI audit report with corrected billing numbers from real GitHub API data. Deployed to Plotly Cloud. 2026-04-23 23:52:33 +00:00			`"""Pull GitHub PR data and standup notes into data.json for the Dash app."""`

			`from __future__ import annotations`

			`import functools`
			`import json`
			`import os`
			`import re`
			`from concurrent.futures import ThreadPoolExecutor`
			`from datetime import datetime, timedelta, timezone`
			`from pathlib import Path`

			`import requests`

			`REPOS = [`
			`"codeflash",`
			`"codeflash-internal",`
			`"codeflash-agent",`
			`"github-workflows",`
			`]`
			`ORG = "codeflash-ai"`
			`NOTES_DIR = Path(__file__).parent / "notes"`
			`DATA_FILE = Path(__file__).parent / "data.json"`
			`REPO_ROOT = Path(__file__).parents[2]`
			`CI_AUDIT_FILE = REPO_ROOT / "reports" / "codeflash-ci-audit" / "data.json"`


			`@functools.lru_cache(maxsize=1)`
			`def gh_token() -> str:`
			`token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN", "")`
			`if not token:`
			`import subprocess`

			`result = subprocess.run(`
			`["gh", "auth", "token"],`
			`capture_output=True,`
			`text=True,`
			`check=False,`
			`)`
			`token = result.stdout.strip()`
			`return token`


			`def gh_headers() -> dict[str, str]:`
			`return {`
			`"Authorization": f"token {gh_token()}",`
			`"Accept": "application/vnd.github+json",`
			`}`


			`def _fetch_repo_prs(`
			`repo: str, state: str, headers: dict, since: datetime \| None = None`
			`) -> list[dict]:`
			`url = f"https://api.github.com/repos/{ORG}/{repo}/pulls"`
			`params: dict[str, str] = {`
			`"state": state,`
			`"per_page": "30",`
			`"sort": "updated",`
			`"direction": "desc",`
			`}`
			`resp = requests.get(url, headers=headers, params=params, timeout=15)`
			`if resp.status_code != 200:`
			`return []`
			`prs = []`
			`for pr in resp.json():`
			`updated = datetime.fromisoformat(`
			`pr["updated_at"].replace("Z", "+00:00")`
			`)`
			`if since and updated < since:`
			`continue`
			`prs.append(`
			`{`
			`"repo": repo,`
			`"number": pr["number"],`
			`"title": pr["title"],`
			`"state": pr["state"],`
			`"author": pr["user"]["login"],`
			`"url": pr["html_url"],`
			`"created_at": pr["created_at"],`
			`"updated_at": pr["updated_at"],`
			`"merged_at": pr.get("merged_at"),`
			`"draft": pr.get("draft", False),`
			`}`
			`)`
			`return prs`


			`def fetch_prs(state: str, since: datetime \| None = None) -> list[dict]:`
			`headers = gh_headers()`
			`with ThreadPoolExecutor(max_workers=len(REPOS)) as pool:`
			`futures = [`
			`pool.submit(_fetch_repo_prs, repo, state, headers, since)`
			`for repo in REPOS`
			`]`
			`prs = []`
			`for f in futures:`
			`prs.extend(f.result())`
			`return prs`


			`def parse_note(path: Path) -> dict:`
			`text = path.read_text(encoding="utf-8")`
			`sections: dict[str, list[str]] = {}`
			`current = None`
			`title = None`
			`in_code = False`
			`code_lines: list[str] = []`
			`for line in text.splitlines():`
			if line.strip().startswith("```"):
			`if in_code:`
			`if current:`
			`sections[current].append("code:" + "\n".join(code_lines))`
			`code_lines = []`
			`in_code = False`
			`else:`
			`in_code = True`
			`continue`
			`if in_code:`
			`code_lines.append(line)`
			`continue`
			`h1 = re.match(r"^#\s+(.+)", line)`
			`if h1 and not title:`
			`title = h1.group(1).strip()`
			`continue`
			`heading = re.match(r"^##\s+(.+)", line)`
			`if heading:`
			`current = heading.group(1).strip().lower()`
			`sections[current] = []`
			`elif current and line.strip().startswith("- "):`
			`sections[current].append(line.strip().removeprefix("- "))`
			`elif current and line.strip():`
			`sections[current].append(line.strip())`
			`return {`
			`"date": path.stem,`
			`"title": title or path.stem,`
			`"sections": sections,`
			`}`


			`def load_notes() -> list[dict]:`
			`if not NOTES_DIR.exists():`
			`return []`
			`notes = []`
			`for f in sorted(NOTES_DIR.glob("*.md"), reverse=True):`
			`notes.append(parse_note(f))`
			`return notes`


			`def main():`
			`now = datetime.now(timezone.utc)`
			`week_ago = now - timedelta(days=7)`

			`open_prs = fetch_prs("open")`
			`closed_prs = fetch_prs("closed", since=week_ago)`
			`merged_prs = [pr for pr in closed_prs if pr["merged_at"]]`
			`notes = load_notes()`

			`ci_audit = None`
			`resolved = CI_AUDIT_FILE.resolve()`
			`if resolved.exists():`
			`ci_audit = json.loads(resolved.read_text(encoding="utf-8"))`

			`data = {`
			`"generated_at": now.isoformat(),`
			`"org": ORG,`
			`"repos": REPOS,`
			`"open_prs": open_prs,`
			`"merged_prs": merged_prs,`
			`"notes": notes,`
			`"summary": {`
			`"total_open": len(open_prs),`
			`"total_merged_7d": len(merged_prs),`
			`"draft_count": sum(1 for pr in open_prs if pr["draft"]),`
			`"repos_with_open_prs": len({pr["repo"] for pr in open_prs}),`
			`},`
			`"ci_audit": ci_audit,`
			`}`

			`DATA_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")`
			`print(`
			`f"Wrote {DATA_FILE} ({len(open_prs)} open, {len(merged_prs)} merged)"`
			`)`


			`if __name__ == "__main__":`
			`main()`