diff --git a/.codex/config.toml b/.codex/config.toml new file mode 100644 index 000000000..433432493 --- /dev/null +++ b/.codex/config.toml @@ -0,0 +1,4 @@ +[mcp_servers.tessl] +type = "stdio" +command = "tessl" +args = [ "mcp", "start" ] diff --git a/.codex/skills/tessl:add-api-endpoint b/.codex/skills/tessl:add-api-endpoint new file mode 120000 index 000000000..88fc7bd32 --- /dev/null +++ b/.codex/skills/tessl:add-api-endpoint @@ -0,0 +1 @@ +../../.tessl/tiles/codeflash/codeflash-internal-skills/skills/add-api-endpoint \ No newline at end of file diff --git a/.codex/skills/tessl:add-language-support b/.codex/skills/tessl:add-language-support new file mode 120000 index 000000000..7036d69fb --- /dev/null +++ b/.codex/skills/tessl:add-language-support @@ -0,0 +1 @@ +../../.tessl/tiles/codeflash/codeflash-internal-skills/skills/add-language-support \ No newline at end of file diff --git a/.codex/skills/tessl:debug-optimization-failure b/.codex/skills/tessl:debug-optimization-failure new file mode 120000 index 000000000..e4263a2b7 --- /dev/null +++ b/.codex/skills/tessl:debug-optimization-failure @@ -0,0 +1 @@ +../../.tessl/tiles/codeflash/codeflash-internal-skills/skills/debug-optimization-failure \ No newline at end of file diff --git a/.codex/skills/tessl:debug-test-generation b/.codex/skills/tessl:debug-test-generation new file mode 120000 index 000000000..e7ad3ee2b --- /dev/null +++ b/.codex/skills/tessl:debug-test-generation @@ -0,0 +1 @@ +../../.tessl/tiles/codeflash/codeflash-internal-skills/skills/debug-test-generation \ No newline at end of file diff --git a/.gemini/settings.json b/.gemini/settings.json new file mode 100644 index 000000000..ebfccaac7 --- /dev/null +++ b/.gemini/settings.json @@ -0,0 +1,12 @@ +{ + "mcpServers": { + "tessl": { + "type": "stdio", + "command": "tessl", + "args": [ + "mcp", + "start" + ] + } + } +} diff --git a/.gemini/skills/tessl:add-api-endpoint b/.gemini/skills/tessl:add-api-endpoint new file mode 120000 index 000000000..88fc7bd32 --- /dev/null +++ b/.gemini/skills/tessl:add-api-endpoint @@ -0,0 +1 @@ +../../.tessl/tiles/codeflash/codeflash-internal-skills/skills/add-api-endpoint \ No newline at end of file diff --git a/.gemini/skills/tessl:add-language-support b/.gemini/skills/tessl:add-language-support new file mode 120000 index 000000000..7036d69fb --- /dev/null +++ b/.gemini/skills/tessl:add-language-support @@ -0,0 +1 @@ +../../.tessl/tiles/codeflash/codeflash-internal-skills/skills/add-language-support \ No newline at end of file diff --git a/.gemini/skills/tessl:debug-optimization-failure b/.gemini/skills/tessl:debug-optimization-failure new file mode 120000 index 000000000..e4263a2b7 --- /dev/null +++ b/.gemini/skills/tessl:debug-optimization-failure @@ -0,0 +1 @@ +../../.tessl/tiles/codeflash/codeflash-internal-skills/skills/debug-optimization-failure \ No newline at end of file diff --git a/.gemini/skills/tessl:debug-test-generation b/.gemini/skills/tessl:debug-test-generation new file mode 120000 index 000000000..e7ad3ee2b --- /dev/null +++ b/.gemini/skills/tessl:debug-test-generation @@ -0,0 +1 @@ +../../.tessl/tiles/codeflash/codeflash-internal-skills/skills/debug-test-generation \ No newline at end of file diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 000000000..ebfccaac7 --- /dev/null +++ b/.mcp.json @@ -0,0 +1,12 @@ +{ + "mcpServers": { + "tessl": { + "type": "stdio", + "command": "tessl", + "args": [ + "mcp", + "start" + ] + } + } +} diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..ea641555e --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,5 @@ + + +# Agent Rules + +@.tessl/RULES.md follow the [instructions](.tessl/RULES.md) diff --git a/CLAUDE.md b/CLAUDE.md index 44a90139f..b1e920c18 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -3,3 +3,5 @@ Monorepo: `django/aiservice` (Python backend), `js/` (cf-api, cf-webapp, common, VSC-Extension), `cli/`, `deployment/`, `experiments/`. Pre-commit: `uv run prek run --all-files` from repo root. + +@AGENTS.md diff --git a/tessl.json b/tessl.json new file mode 100644 index 000000000..e08d23455 --- /dev/null +++ b/tessl.json @@ -0,0 +1,15 @@ +{ + "name": "my-project", + "mode": "vendored", + "dependencies": { + "codeflash/codeflash-internal-rules": { + "version": "0.1.0" + }, + "codeflash/codeflash-internal-docs": { + "version": "0.1.0" + }, + "codeflash/codeflash-internal-skills": { + "version": "0.1.0" + } + } +} diff --git a/tiles/codeflash-internal-docs/docs/aiservice-endpoints.md b/tiles/codeflash-internal-docs/docs/aiservice-endpoints.md new file mode 100644 index 000000000..303fa8ec1 --- /dev/null +++ b/tiles/codeflash-internal-docs/docs/aiservice-endpoints.md @@ -0,0 +1,29 @@ +# AIService Endpoints + +All Django-Ninja API endpoints registered in `aiservice/urls.py`. + +## Endpoint Map + +| Path | API | Module | Description | +|------|-----|--------|-------------| +| `/ai/optimize` | `optimize_api` | `core.shared.optimizer_router` | Main optimization — dispatches by language | +| `/ai/optimize-line-profiler` | `optimize_line_profiler_api` | `core.languages.python.optimizer.optimizer_line_profiler` | Line profiler optimization | +| `/ai/testgen` | `testgen_api` | `core.shared.testgen_router` | Test generation — dispatches by language | +| `/ai/log_features` | `features_api` | `core.log_features.log_features` | Feature logging | +| `/ai/refinement` | `refinement_api` | `core.languages.python.optimizer.refinement` | Candidate refinement | +| `/ai/explain` | `explanations_api` | `core.languages.python.explanations.explanations` | Code explanations | +| `/ai/rank` | `ranker_api` | `core.shared.ranker.ranker` | Function ranking | +| `/ai/optimization_review` | `optimization_review_api` | `core.languages.python.optimization_review.optimization_review` | Optimization review | +| `/ai/code_repair` | `code_repair_api` | `core.languages.python.code_repair.code_repair` | Code repair for failed candidates | +| `/ai/adaptive_optimize` | `adaptive_optimize_api` | `core.languages.python.adaptive_optimizer.adaptive_optimizer` | Adaptive optimization | +| `/ai/workflow-gen` | `workflow_gen_api` | `core.shared.workflow_gen.workflow_gen` | Workflow generation | +| `/ai/rewrite_jit` | `jit_rewrite_api` | `core.languages.python.jit_rewrite.jit_rewrite` | JIT rewrite | + +## Common Patterns + +- All endpoints are `async def` +- Authentication via `AuthenticatedRequest` (HttpBearer + django_auth) +- Request schemas use `ninja.Schema` (Pydantic under the hood) +- Response schemas define typed success and error responses: `response={200: SuccessSchema, 400: ErrorSchema, 500: ErrorSchema}` +- Multi-language endpoints (optimize, testgen) dispatch by `data.language` field +- Python-only endpoints import directly from `core.languages.python.*` diff --git a/tiles/codeflash-internal-docs/docs/cf-api-endpoints.md b/tiles/codeflash-internal-docs/docs/cf-api-endpoints.md new file mode 100644 index 000000000..38db2cd2d --- /dev/null +++ b/tiles/codeflash-internal-docs/docs/cf-api-endpoints.md @@ -0,0 +1,58 @@ +# CF-API Endpoints + +Express routes in `js/cf-api/routes/`. The cf-api acts as middleware between clients (VSC-Extension, CLI) and the aiservice backend. + +## Route Registration Order (`routes/index.ts`) + +Registration order matters — webhook routes must be before the body parser: + +1. **Webhook routes** — before `express.json()` (raw body for signature verification) +2. **Body parser** — `express.json({ limit: JSON_BODY_LIMIT })` +3. **Public routes** — no authentication required +4. **Protected routes** — require API key (`checkForValidAPIKey` middleware) + +## Route Files + +### `webhook.routes.ts` + +- `POST /github/webhooks` — GitHub App webhook handler (Octokit signature verification) +- `POST /stripe/webhooks` — Stripe webhook handler +- Both need raw body access (before JSON parser) + +### `optimization.routes.ts` + +Protected optimization endpoints: +- `POST /suggest-pr-changes` — suggest PR changes +- `POST /create-pr` — create optimization PR +- `POST /verify-existing-optimizations` — check existing optimizations +- `POST /is-already-optimized` — check if code was already optimized +- `POST /add-code-hash` — add optimized code context hash +- `POST /mark-as-success` — mark optimization as successful +- `POST /create-staging` — create staging review +- `POST /get-staging-code` — get staged code +- `POST /commit-staging-code` — commit staged code +- `POST /test-repo` — add repository manually + +### `github.routes.ts` + +GitHub-related endpoints for repository management. + +### `subscription.routes.ts` + +Subscription management endpoints. + +### `user.routes.ts` + +User management endpoints. + +### `public.routes.ts` + +Public endpoints (no authentication): health checks, version info. + +## Middleware Stack + +- `checkForValidAPIKey` — API key authentication +- `trackEndpointCalls` — PostHog endpoint tracking +- `idLimiter` — rate limiting +- `logAuthEvent` / `logRequestBody` — enhanced logging (dev only) +- `trackUsage` — usage tracking for optimization endpoints diff --git a/tiles/codeflash-internal-docs/docs/configuration-thresholds.md b/tiles/codeflash-internal-docs/docs/configuration-thresholds.md new file mode 100644 index 000000000..d46741809 --- /dev/null +++ b/tiles/codeflash-internal-docs/docs/configuration-thresholds.md @@ -0,0 +1,49 @@ +# Configuration & Thresholds + +Key constants and configuration values used across the optimization pipeline. + +## Model Distribution (`core/shared/optimizer_config.py`) + +- `MAX_OPTIMIZER_CALLS = 6` — maximum parallel optimization calls +- `MAX_OPTIMIZER_LP_CALLS = 7` — maximum line profiler optimization calls +- Distribution formula: `claude_calls = (total - 1) // 2`, `gpt_calls = total - claude_calls` +- Example: 6 total → 4 OpenAI + 2 Anthropic + +## Default Request Values + +- `OptimizeSchema.n_candidates = 5` — default optimization candidates +- `OptimizeSchemaLP.n_candidates = 6` — default line profiler candidates +- `OptimizeSchema.language = "python"` — default language + +## LLM Model Costs (USD per 1M tokens) + +| Model | Input | Cached Input | Output | +|-------|-------|-------------|--------| +| GPT-4.1 | $2.00 | $0.50 | $8.00 | +| GPT-5-mini | $0.25 | $0.03 | $2.00 | +| Claude Sonnet 4.5 | $3.00 | — | $15.00 | +| Claude Haiku 4.5 | $1.00 | — | $5.00 | + +## Model Assignments + +| Purpose | Primary | Fallback | +|---------|---------|----------| +| Optimization | GPT-5-mini | Claude Sonnet 4.5 | +| Explanation | GPT-5-mini | Claude Sonnet 4.5 | +| Planning | GPT-5-mini | Claude Sonnet 4.5 | +| Execution | GPT-5-mini | Claude Sonnet 4.5 | +| Ranking | GPT-5-mini | Claude Sonnet 4.5 | +| Refinement | Claude Sonnet 4.5 | GPT-5-mini | +| Code Repair | Claude Sonnet 4.5 | GPT-5-mini | +| Explanations | Claude Sonnet 4.5 | GPT-5-mini | +| Optimization Review | Claude Sonnet 4.5 | GPT-5-mini | +| Adaptive Optimize | Claude Sonnet 4.5 | GPT-5-mini | +| Cost-effective (testgen) | Claude Haiku 4.5 | GPT-5-mini | + +## Ruff Configuration (`pyproject.toml`) + +- `line-length = 120` +- `select = ["ALL"]` with specific ignores (see `tool.ruff.lint.ignore`) +- `fix = true`, `show-fixes = true` +- Runtime-evaluated base classes: `pydantic.BaseModel`, `ninja.Schema`, `typing.TypedDict` +- Excluded paths: `core/languages/python/testgen/tests`, `core/languages/python/testgen/sqlalchemy` diff --git a/tiles/codeflash-internal-docs/docs/context-extraction.md b/tiles/codeflash-internal-docs/docs/context-extraction.md new file mode 100644 index 000000000..d5773f315 --- /dev/null +++ b/tiles/codeflash-internal-docs/docs/context-extraction.md @@ -0,0 +1,49 @@ +# Context Extraction + +How code context is extracted and prepared for LLM optimization prompts. + +## Context Types + +### Single-File Context (`SingleOptimizerContext`) + +Used when the function to optimize lives in a single file: +- Extracts the function source code +- Collects helper functions and class definitions +- Formats as system prompt + user prompt + +### Multi-File Context (`MultiOptimizerContext`) + +Used when the function spans or depends on multiple files: +- Collects code from multiple source files +- Manages file-path-annotated code blocks + +## `BaseOptimizerContext` (`optimizer_context.py`) + +Abstract base class for all context types: + +### Factory Method + +`get_dynamic_context()` — dispatches to `SingleOptimizerContext` or `MultiOptimizerContext` based on the input. + +### Prompt Construction + +- `get_system_prompt(python_version_str)` — builds system prompt with language version +- `get_user_prompt(dependency_code, line_profiler_results)` — builds user prompt with code and optional profiler data + +### LLM Response Parsing + +- `extract_code_and_explanation_from_llm_res(content)` — parses markdown code blocks from LLM output, extracts code and explanation text +- `parse_and_generate_candidate_schema()` — converts extracted code into `OptimizeResponseItemSchema` +- `is_valid_code()` — validates the extracted code is syntactically correct + +## Code Formatting + +LLM responses use markdown code blocks with file path annotations: + +``` +\`\`\`python:path/to/file.py +# optimized code here +\`\`\` +``` + +The context extraction system both generates this format (for prompts) and parses it (from responses). diff --git a/tiles/codeflash-internal-docs/docs/domain-types.md b/tiles/codeflash-internal-docs/docs/domain-types.md new file mode 100644 index 000000000..d6aaaeecd --- /dev/null +++ b/tiles/codeflash-internal-docs/docs/domain-types.md @@ -0,0 +1,59 @@ +# Domain Types + +Core schemas and models used across the codeflash-internal backend. + +## Request/Response Schemas (`core/shared/optimizer_models.py`) + +### `OptimizedCandidateSource` enum + +How a candidate was generated: `OPTIMIZE`, `OPTIMIZE_LP`, `REFINE`, `REPAIR`, `ADAPTIVE`, `JIT_REWRITE`. + +### `OptimizeSchema` (ninja.Schema) + +Main optimization request: +- `source_code: str` — code to optimize +- `dependency_code: str | None` — read-only dependency code +- `trace_id: str` — unique request identifier +- `language: str = "python"` — language identifier (python, javascript, typescript) +- `language_version: str | None` — e.g., "ES2022", "Node 20", or Python version +- `n_candidates: int = 5` — number of optimization candidates to generate +- `is_async: bool | None = False` — whether the function is async +- `python_version: str | None` — optional, for multi-language support +- `experiment_metadata`, `codeflash_version`, `current_username`, `repo_owner`, `repo_name` — tracking fields + +### `OptimizeSchemaLP` (ninja.Schema) + +Line profiler variant — same as `OptimizeSchema` plus: +- `line_profiler_results: str | None` — line profiler output +- `n_candidates: int = 6` — default higher for line profiler optimization + +## Domain Models (`core/log_features/models.py`) + +### `OptimizationFeatures` (Django Model) + +Stores per-optimization data: +- `trace_id` (PK) — unique optimization identifier +- `original_code` — original source code +- `optimizations_raw` / `optimizations_post` — raw and postprocessed candidate code +- `speedup_ratio` — best speedup achieved +- `test_results` — test execution results +- Approval workflow: `approval_status`, `approved_by`, `approved_at` + +### `OptimizationEvents` (Django Model) + +Event logging for optimization lifecycle: +- `pr_created`, `pr_merged`, `pr_closed` — PR events +- `llm_cost` — total LLM API cost +- `speedup_x` / `speedup_pct` — performance improvement metrics + +### `Repositories` (Django Model) + +GitHub repository metadata: +- `installation_id` — GitHub App installation ID +- `is_private` — whether the repo is private +- `optimizations_limit` / `optimizations_used` — usage tracking + +## Response Schemas (`core/shared/optimizer_schemas.py`) + +- `OptimizeResponseSchema` — successful optimization response with candidates +- `OptimizeErrorResponseSchema` — error response with message diff --git a/tiles/codeflash-internal-docs/docs/index.md b/tiles/codeflash-internal-docs/docs/index.md new file mode 100644 index 000000000..b4d3dd779 --- /dev/null +++ b/tiles/codeflash-internal-docs/docs/index.md @@ -0,0 +1,21 @@ +# Codeflash Internal Documentation + +CodeFlash's AI-powered code optimization backend. The aiservice (Django-Ninja) receives optimization requests from cf-api, dispatches to language-specific handlers, calls LLMs, postprocesses results, and returns optimized candidates. + +## Service Flow + +``` +VSC-Extension / CLI → cf-api (Express, :3001) → aiservice (Django-Ninja, :8000) +cf-webapp (:3000) reads from the same PostgreSQL DB via Prisma +``` + +## Documentation Pages + +- [Domain Types](domain-types.md) — Core schemas and domain models +- [Optimization Pipeline](optimization-pipeline.md) — End-to-end optimization flow +- [Test Generation Pipeline](test-generation-pipeline.md) — Test generation flow +- [Context Extraction](context-extraction.md) — How code context is extracted for LLM prompts +- [AIService Endpoints](aiservice-endpoints.md) — All Django-Ninja endpoints +- [CF-API Endpoints](cf-api-endpoints.md) — All Express routes +- [Configuration & Thresholds](configuration-thresholds.md) — Model distribution, costs, constants +- [LLM Provider Abstraction](llm-provider-abstraction.md) — llm.py usage, client creation, cost tracking diff --git a/tiles/codeflash-internal-docs/docs/llm-provider-abstraction.md b/tiles/codeflash-internal-docs/docs/llm-provider-abstraction.md new file mode 100644 index 000000000..7d54962a4 --- /dev/null +++ b/tiles/codeflash-internal-docs/docs/llm-provider-abstraction.md @@ -0,0 +1,63 @@ +# LLM Provider Abstraction + +Unified LLM interface in `aiservice/llm.py` — all LLM calls go through this module. + +## Model Definition (`LLM` dataclass) + +```python +@pydantic_dataclass +class LLM: + name: str # deployment name (e.g., "gpt-5-mini") + max_tokens: int # max context window + model_type: Literal["openai", "anthropic", "google"] + input_cost: float # USD per 1M tokens + cached_input_cost: float # USD per 1M cached tokens + output_cost: float # USD per 1M tokens +``` + +Concrete models: `OpenAI_GPT_4_1`, `OpenAI_GPT_5_Mini`, `Anthropic_Claude_Sonnet_4_5`, `Anthropic_Claude_Haiku_4_5`. + +## Client Setup + +- `_create_openai_client()` — returns `AsyncAzureOpenAI` (reads `AZURE_OPENAI_*` env vars) +- `_create_anthropic_client()` — returns `AsyncAnthropicFoundry` (reads `ANTHROPIC_FOUNDRY_API_KEY` + `ANTHROPIC_FOUNDRY_BASE_URL`) +- `get_llm_client(model_type)` — creates a fresh client per request to avoid event loop issues with Django dev server + +## Calling LLMs + +```python +from aiservice.llm import call_llm, LLM + +response: LLMResponse = await call_llm( + llm=model, # LLM instance + messages=messages, # OpenAI-format messages + call_type="optimization", # tracking label + trace_id=trace_id, # request identifier + max_tokens=16384, # max output tokens + user_id=user_id, # optional tracking +) +# response.content: str +# response.usage: LLMUsage(input_tokens, output_tokens) +# response.raw_response: ChatCompletion | AnthropicMessage +``` + +### Provider Handling + +- **OpenAI (Azure)**: Uses `client.chat.completions.create()`. GPT-5-mini uses `max_completion_tokens`, older models use `max_tokens` +- **Anthropic (Foundry)**: Extracts system prompt from messages list, passes separately via `system=` kwarg. Concatenates text blocks from response + +### Observability + +- Every call is recorded to database via `record_llm_call()` in the `finally` block +- Includes: trace_id, call_type, model, messages, result, error, cost, latency + +## Cost Calculation + +`calculate_llm_cost(response, llm)` accounts for cached vs non-cached input tokens: +- **Anthropic**: `cache_read_input_tokens` and `cache_creation_input_tokens` are additive to `input_tokens` +- **OpenAI**: `cached_tokens` is a subset of `prompt_tokens` + +## Response Types + +- `LLMResponse` — wraps `content: str`, `usage: LLMUsage`, `raw_response` +- `LLMUsage` — `input_tokens: int`, `output_tokens: int` diff --git a/tiles/codeflash-internal-docs/docs/optimization-pipeline.md b/tiles/codeflash-internal-docs/docs/optimization-pipeline.md new file mode 100644 index 000000000..0e0adfd83 --- /dev/null +++ b/tiles/codeflash-internal-docs/docs/optimization-pipeline.md @@ -0,0 +1,70 @@ +# Optimization Pipeline + +End-to-end flow from optimization request to response. + +## 1. Request Entry (`core/shared/optimizer_router.py`) + +The `optimize_api` NinjaAPI router receives a POST request with `OptimizeSchema`. It dispatches by `data.language`: +- `"javascript"` / `"typescript"` → `core.languages.js_ts.optimizer.optimize_javascript` +- `"java"` → `core.languages.java.optimizer.optimize_java` +- Default → `core.languages.python.optimizer.optimizer.optimize_python` + +All imports are lazy (inside the function body) to avoid circular dependencies. + +## 2. Python Optimization (`core/languages/python/optimizer/optimizer.py`) + +### `optimize_python(request, data) → (status, response)` + +Entry point for Python optimization. Extracts user ID from request, builds context, and calls `optimize_python_code()`. + +### `optimize_python_code_single()` + +Single LLM optimization call: +1. Builds system + user prompts from `BaseOptimizerContext` +2. Calls `call_llm()` with the optimize model +3. Parses response via `ctx.extract_code_and_explanation_from_llm_res()` +4. Validates via `ctx.parse_and_generate_candidate_schema()` +5. Returns `(OptimizeResponseItemSchema, llm_cost, model_name)` or `(None, cost, model)` + +### `optimize_python_code()` + +Parallel optimization with multiple models: +1. Gets model distribution via `get_model_distribution(n_candidates, MAX_OPTIMIZER_CALLS)` +2. Runs parallel calls using `asyncio.TaskGroup` +3. Each call gets a `call_sequence` number for tracking +4. Collects results, costs, and model info from all parallel calls + +## 3. Context Extraction (`optimizer_context.py`) + +- `BaseOptimizerContext.get_dynamic_context()` — factory dispatching to `SingleOptimizerContext` or `MultiOptimizerContext` +- Handles prompt construction with `get_system_prompt()` / `get_user_prompt()` +- `extract_code_and_explanation_from_llm_res()` — parses markdown code blocks from LLM output +- `parse_and_generate_candidate_schema()` — converts extracted code to response schema + +## 4. Postprocessing (`postprocess.py`) + +- `deduplicate_optimizations()` — AST-based dedup using `ast.parse()` + `ast.dump()` +- `equality_check()` — filters out candidates identical to original code +- Uses `libcst` for all code transformations + +## 5. Model Distribution (`optimizer_config.py`) + +- `MAX_OPTIMIZER_CALLS = 6`, `MAX_OPTIMIZER_LP_CALLS = 7` +- `get_model_distribution()` splits between OpenAI and Anthropic +- Formula: `claude_calls = (total - 1) // 2`, `gpt_calls = total - claude_calls` +- Returns `[(OPENAI_MODEL, gpt_calls), (ANTHROPIC_MODEL, claude_calls)]` + +## 6. Response + +Returns `(200, OptimizeResponseSchema)` with list of `OptimizeResponseItemSchema` candidates, or `(400/500, OptimizeErrorResponseSchema)` on failure. + +## Key Files + +| File | Role | +|------|------| +| `core/shared/optimizer_router.py` | Language dispatch | +| `core/languages/python/optimizer/optimizer.py` | Python optimization flow | +| `core/languages/python/optimizer/context_utils/optimizer_context.py` | Context & prompt management | +| `core/languages/python/optimizer/postprocess.py` | Dedup & validation | +| `core/shared/optimizer_config.py` | Model distribution | +| `core/shared/optimizer_models.py` | Request/response schemas | diff --git a/tiles/codeflash-internal-docs/docs/test-generation-pipeline.md b/tiles/codeflash-internal-docs/docs/test-generation-pipeline.md new file mode 100644 index 000000000..710452c80 --- /dev/null +++ b/tiles/codeflash-internal-docs/docs/test-generation-pipeline.md @@ -0,0 +1,55 @@ +# Test Generation Pipeline + +Flow from test generation request to instrumented test output. + +## 1. Request Entry (`core/shared/testgen_router.py`) + +The `testgen_api` NinjaAPI router receives a POST request and dispatches by `data.language`: +- `"javascript"` / `"typescript"` → `core.languages.js_ts.testgen.generate_tests_javascript` +- `"java"` → `core.languages.java.testgen.generate_tests_java` +- Default → `core.languages.python.testgen.testgen.generate_tests_python` + +## 2. Python Testgen (`core/languages/python/testgen/testgen.py`) + +### `build_prompt()` + +Jinja2-based prompt builder for test generation: +- Constructs system and user prompts from context +- Handles async/sync function variants +- Includes source code, dependency code, and test context + +### `instrument_tests()` + +Applies instrumentation to generated tests: +- Behavior instrumentation (captures return values, stdout) +- Performance instrumentation (timing loops) + +### `LLMOutputParseError` + +Custom exception for LLM response parsing failures — includes raw output for debugging. + +## 3. Instrumentation (`testgen/instrumentation/instrument_new_tests.py`) + +### Framework Detection + +- `detect_frameworks_from_code()` — parses imports to identify ML frameworks (PyTorch, TensorFlow, JAX) and their aliases +- Used to add GPU sync calls in timing blocks + +### Device Sync + +- `_create_device_sync_precompute_statements()` — pre-computes device sync checks (CUDA, MPS) outside timing blocks +- Ensures accurate timing for GPU-accelerated code + +## 4. Postprocessing (`testgen/postprocessing/`) + +- Import management: `add_missing_imports.py` adds `from __future__ import annotations` +- Test validation and cleanup + +## Key Files + +| File | Role | +|------|------| +| `core/shared/testgen_router.py` | Language dispatch | +| `core/languages/python/testgen/testgen.py` | Testgen flow | +| `core/languages/python/testgen/instrumentation/instrument_new_tests.py` | Test instrumentation | +| `core/languages/python/testgen/postprocessing/` | Import management, cleanup | diff --git a/tiles/codeflash-internal-docs/tile.json b/tiles/codeflash-internal-docs/tile.json new file mode 100644 index 000000000..5f93afac8 --- /dev/null +++ b/tiles/codeflash-internal-docs/tile.json @@ -0,0 +1,7 @@ +{ + "name": "codeflash/codeflash-internal-docs", + "version": "0.1.0", + "summary": "Internal documentation for the codeflash-internal aiservice backend", + "private": true, + "docs": "docs/index.md" +} diff --git a/tiles/codeflash-internal-rules/rules/architecture.md b/tiles/codeflash-internal-rules/rules/architecture.md new file mode 100644 index 000000000..6b3630621 --- /dev/null +++ b/tiles/codeflash-internal-rules/rules/architecture.md @@ -0,0 +1,55 @@ +# Architecture + +## Service Flow + +``` +VSC-Extension / CLI → cf-api (Express, :3001) → aiservice (Django-Ninja, :8000) +cf-webapp (:3000) reads from the same PostgreSQL DB via Prisma +``` + +## Monorepo Layout + +``` +codeflash-internal/ +├── django/aiservice/ # Python backend (Django-Ninja, ASGI) +│ ├── aiservice/ # Django project: settings, urls.py, llm.py +│ ├── authapp/ # Authentication (HttpBearer + django_auth) +│ ├── core/ # Business logic +│ │ ├── shared/ # Cross-language routers (optimizer_router, testgen_router, ranker) +│ │ ├── languages/ # Per-language handlers (python/, js_ts/, java/) +│ │ ├── protocols/ # Handler protocols (LanguageHandler, OptimizerProtocol, etc.) +│ │ ├── registry.py # Language handler registration +│ │ ├── dispatcher.py # Handler lookup by language + feature +│ │ └── log_features/ # Domain models (OptimizationFeatures, OptimizationEvents) +│ └── tests/ # pytest tests by feature: optimizer/, testgen/, integration/ +├── js/ +│ ├── cf-api/ # Express middleware layer (:3001) +│ ├── cf-webapp/ # Next.js dashboard (:3000) +│ ├── common/ # Shared Prisma schema + utilities (CommonJS) +│ └── VSC-Extension/ # VS Code extension +├── cli/ # CLI tools +└── deployment/ # Infrastructure configs +``` + +## Glossary + +- **Optimization Candidate** — LLM-generated code that may be faster +- **Read-Write Context** — code the LLM can modify +- **Read-Only Context** — code provided as info only (not modified) +- **Tracer** — collects input args for a Python function at runtime +- **Replay Test** — reruns traced inputs to verify behavior +- **Comparator** — compares two Python objects for equality + +## Key Entry Points + +| Task | Start here | +|------|------------| +| Optimization dispatch | `core/shared/optimizer_router.py` | +| Testgen dispatch | `core/shared/testgen_router.py` | +| Python optimization | `core/languages/python/optimizer/optimizer.py` | +| LLM provider abstraction | `aiservice/llm.py` | +| Endpoint registration | `aiservice/urls.py` | +| Domain models | `core/log_features/models.py` | +| Request/response schemas | `core/shared/optimizer_models.py` | +| Handler registration | `core/registry.py` + `core/dispatcher.py` | +| cf-api route registration | `js/cf-api/routes/index.ts` | diff --git a/tiles/codeflash-internal-rules/rules/code-style.md b/tiles/codeflash-internal-rules/rules/code-style.md new file mode 100644 index 000000000..45805fa5c --- /dev/null +++ b/tiles/codeflash-internal-rules/rules/code-style.md @@ -0,0 +1,24 @@ +# Code Style + +## Python (aiservice) + +- **Python 3.12+** — use modern syntax (type unions `X | Y`, `match` statements) +- **Line length**: 120 characters +- **Tooling**: Ruff for linting/formatting (`ruff check .`, `ruff format .`), mypy strict mode, ty for type checking, prek for pre-commit (`uv run prek run --all-files`) +- **Package management**: Always use `uv` — run commands via `uv run` +- **Comments**: Minimal — only explain "why", not "what" +- **Docstrings**: Do not add unless explicitly requested +- **Source transforms**: Use `libcst` for code modification/transformation (preserves formatting); `ast` is acceptable for read-only analysis +- **Async**: All endpoints are `async def` — runs under ASGI via uvicorn. Use `asyncio.TaskGroup` for concurrent operations +- **Schemas**: Use Pydantic `BaseModel` or `ninja.Schema` for all request/response types +- **LLM calls**: Use `aiservice/llm.py` — never call provider APIs directly +- **Prompts**: Stored as `.md` files alongside their modules, rendered with Jinja2 +- **Lazy imports**: Use inside function bodies in routers to avoid circular dependencies (`# noqa: PLC0415`) + +## JavaScript/TypeScript (cf-api, cf-webapp, VSC-Extension) + +- All JS/TS packages use ESLint + Prettier. Run commands from each package directory +- **cf-api**: Express app. Webhook routes MUST be registered before body parser (raw body needed for signature verification). `instrument.ts` must be imported first in entry point (Sentry). Tests use dependency injection: `setXxxDependencies()` / `resetXxxDependencies()` +- **cf-webapp**: Next.js. Default to server components; `"use client"` only for interactivity. Server actions in `"use server"` files. Prisma queries in server components only. Path alias `@/*` → `./src/*` +- **VSC-Extension**: Different prettier config (80 width + semicolons vs 100/no-semi elsewhere). npm workspaces for local `@codeflash/*` packages. Sidebar is a separate Vite/React app embedded via webview postMessage +- **Prisma**: Schema in `common/prisma/schema.prisma`, shared by cf-api and cf-webapp. `common` is CommonJS — use `require`-style imports diff --git a/tiles/codeflash-internal-rules/rules/git-conventions.md b/tiles/codeflash-internal-rules/rules/git-conventions.md new file mode 100644 index 000000000..e02efd52d --- /dev/null +++ b/tiles/codeflash-internal-rules/rules/git-conventions.md @@ -0,0 +1,10 @@ +# Git Conventions + +- **Always create a new branch from `main`** — never commit directly to `main` +- Use conventional commit format: `fix:`, `feat:`, `refactor:`, `docs:`, `test:`, `chore:` +- Keep commits atomic — one logical change per commit +- Commit message body should be concise (1-2 sentences max) +- PR titles should also use conventional format +- Branch naming: `cf-#-title` (lowercase, hyphenated) where `#` is the Linear issue number +- If related to a Linear issue, include `CF-#` in the PR body +- Pre-commit: `uv run prek run --all-files` from repo root diff --git a/tiles/codeflash-internal-rules/rules/multi-language-handlers.md b/tiles/codeflash-internal-rules/rules/multi-language-handlers.md new file mode 100644 index 000000000..7e1298b3f --- /dev/null +++ b/tiles/codeflash-internal-rules/rules/multi-language-handlers.md @@ -0,0 +1,32 @@ +# Multi-Language Handlers + +## Registry Pattern + +- `core/registry.py` provides `LanguageRegistry` — a dict mapping `language_id → handler_class` +- Module-level singleton: `registry = LanguageRegistry()` +- Decorator registration: `@register_handler("python")` on handler classes +- Duplicate registration raises `ValueError` + +## Dispatcher + +- `core/dispatcher.py` provides `get_handler_for_language()` and `get_handler_for_feature()` +- `Feature` enum maps feature names to capability flags: `TESTGEN`, `OPTIMIZER`, `CODE_REPAIR`, `JIT_REWRITE`, `OPTIMIZATION_REVIEW`, `EXPLANATIONS` +- `get_handler_for_feature()` checks `supports_*` attribute before instantiation, raises `HandlerNotImplementedError` if unsupported + +## Protocols + +- `core/protocols/` defines runtime-checkable protocols: + - `LanguageHandler` (base): declares `language`, `supports_*` booleans + - `OptimizerProtocol`: `optimizer_optimize(request, data)` + - `TestGenProtocol`: `testgen_generate(request, data)` + - `CodeRepairProtocol`: `code_repair_repair(user_id, optimization_id, ctx)` + +## Adding a New Language + +1. Create `core/languages//` directory +2. Implement handler class with `@register_handler("")` decorator +3. Set `supports_*` flags for implemented features +4. Implement protocol methods for each supported feature +5. Import the module in `core/languages/__init__.py` so the decorator fires +6. Add language routing in `core/shared/optimizer_router.py` and `core/shared/testgen_router.py` +7. Add tests in `tests/` for the new handler diff --git a/tiles/codeflash-internal-rules/rules/optimization-patterns.md b/tiles/codeflash-internal-rules/rules/optimization-patterns.md new file mode 100644 index 000000000..97a5281aa --- /dev/null +++ b/tiles/codeflash-internal-rules/rules/optimization-patterns.md @@ -0,0 +1,39 @@ +# Optimization Patterns + +## Router Dispatch + +- Shared routers in `core/shared/` dispatch by the `language` field on the request schema +- Lazy imports inside the endpoint body to avoid circular dependencies: + ```python + if data.language in ("javascript", "typescript"): + from core.languages.js_ts.optimizer import optimize_javascript # noqa: PLC0415 + return await optimize_javascript(request, data) + ``` +- Default language is Python + +## Context Extraction + +- `BaseOptimizerContext` in `optimizer_context.py` handles prompt management and code extraction +- Two context types: `SingleOptimizerContext` (single-file) and `MultiOptimizerContext` (multi-file) +- `extract_code_and_explanation_from_llm_res()` parses LLM markdown response into code blocks +- `parse_and_generate_candidate_schema()` converts extracted code to `OptimizeResponseItemSchema` + +## Model Distribution + +- `get_model_distribution()` in `optimizer_config.py` splits calls between OpenAI and Anthropic +- Formula: `claude_calls = (total - 1) // 2`, `gpt_calls = total - claude_calls` +- `MAX_OPTIMIZER_CALLS = 6`, `MAX_OPTIMIZER_LP_CALLS = 7` +- Claude gets fewer calls as it's more expensive + +## Postprocessing + +- `postprocess.py` handles deduplication and validation of optimization candidates +- Deduplication: normalize with `ast.parse()` + `ast.dump()`, skip duplicates with identical AST +- Equality check: compare optimized code to original to skip no-ops +- Uses `libcst` for all code transformations (preserves formatting) + +## Prompt Templates + +- Prompts stored as `.md` files alongside their modules +- Rendered with Jinja2 (e.g., `build_prompt()` in testgen) +- System and user prompts are constructed per-context type diff --git a/tiles/codeflash-internal-rules/rules/testing-rules.md b/tiles/codeflash-internal-rules/rules/testing-rules.md new file mode 100644 index 000000000..e7aa79195 --- /dev/null +++ b/tiles/codeflash-internal-rules/rules/testing-rules.md @@ -0,0 +1,20 @@ +# Testing Rules + +## Python (aiservice) + +- Tests in `tests/` by feature: `optimizer/`, `testgen/`, `testgen_postprocessing/`, `testgen_instrumentation/`, `integration/`, `validators/` +- `@pytest.mark.asyncio` for async tests +- `pytest.ini` sets `DJANGO_SETTINGS_MODULE = aiservice.settings` +- `conftest.py` provides `normalize_code()` helper (AST parse/unparse for quote normalization) +- Test factories: `create_optimizer_context()`, `create_refiner_context()` +- Run tests: `uv run pytest` (all), `uv run pytest tests/path/test_file.py::test_name -v` (single) + +## JS/TS (cf-api) + +- Tests use dependency injection: `setXxxDependencies()` / `resetXxxDependencies()` + +## PR Review + +- **Always comment on**: Logic errors, security vulnerabilities, test name typos, breaking changes without migration +- **Never comment on**: Code style/formatting (linters handle it), "consider" suggestions, performance without profiling data +- Limit to 5-7 high-signal comments per review diff --git a/tiles/codeflash-internal-rules/tile.json b/tiles/codeflash-internal-rules/tile.json new file mode 100644 index 000000000..d933a8687 --- /dev/null +++ b/tiles/codeflash-internal-rules/tile.json @@ -0,0 +1,26 @@ +{ + "name": "codeflash/codeflash-internal-rules", + "version": "0.1.0", + "summary": "Coding standards and conventions for the codeflash-internal monorepo", + "private": true, + "rules": { + "code-style": { + "rules": "rules/code-style.md" + }, + "architecture": { + "rules": "rules/architecture.md" + }, + "optimization-patterns": { + "rules": "rules/optimization-patterns.md" + }, + "git-conventions": { + "rules": "rules/git-conventions.md" + }, + "testing-rules": { + "rules": "rules/testing-rules.md" + }, + "multi-language-handlers": { + "rules": "rules/multi-language-handlers.md" + } + } +} diff --git a/tiles/codeflash-internal-skills/skills/add-api-endpoint/SKILL.md b/tiles/codeflash-internal-skills/skills/add-api-endpoint/SKILL.md new file mode 100644 index 000000000..38d204a7c --- /dev/null +++ b/tiles/codeflash-internal-skills/skills/add-api-endpoint/SKILL.md @@ -0,0 +1,170 @@ +--- +name: add-api-endpoint +description: > + Guide for adding a new API endpoint to the codeflash-internal system. + Use when adding a new endpoint, creating a route, or implementing a new API. + Covers both aiservice (Django-Ninja) and cf-api (Express) endpoints + including schemas, routers, authentication, URL registration, and tests. +--- + +# Add API Endpoint + +Use this workflow when adding a new endpoint to either the aiservice backend or the cf-api middleware. Follow the section that matches your target service. + +## Part A: AIService Endpoint (Django-Ninja) + +### Step 1: Define Schemas + +Create request and response schemas. + +1. Create or update schemas in the appropriate module (e.g., `core/shared/` for shared, `core/languages/python/` for Python-specific) +2. Use `ninja.Schema` for all request/response types: + ```python + from ninja import Schema + + class MyRequestSchema(Schema): + source_code: str + trace_id: str + language: str = "python" + + class MyResponseSchema(Schema): + results: list[str] + + class MyErrorResponseSchema(Schema): + message: str + ``` +3. Follow existing patterns in `core/shared/optimizer_models.py` + +**Checkpoint**: Schemas should use Pydantic validation. Test with `MyRequestSchema.model_validate(data)`. + +### Step 2: Create the Router + +Create a NinjaAPI router for the endpoint. + +1. Create a new module (e.g., `core/shared/my_feature.py` or `core/languages/python/my_feature/my_feature.py`) +2. Define the router and endpoint: + ```python + from ninja import NinjaAPI + from authapp.auth import AuthenticatedRequest + + my_feature_api = NinjaAPI(urls_namespace="my_feature") + + @my_feature_api.post( + "/", response={200: MyResponseSchema, 400: MyErrorResponseSchema, 500: MyErrorResponseSchema} + ) + async def my_endpoint( + request: AuthenticatedRequest, data: MyRequestSchema + ) -> tuple[int, MyResponseSchema | MyErrorResponseSchema]: + # Implementation here + return 200, MyResponseSchema(results=[]) + ``` +3. All endpoints must be `async def` +4. Use `AuthenticatedRequest` for authenticated endpoints +5. For multi-language endpoints, add dispatch by `data.language` with lazy imports + +**Checkpoint**: The endpoint should handle success and error cases with proper status codes. + +### Step 3: Register in urls.py + +Add the endpoint to `aiservice/urls.py`. + +1. Import the API object: + ```python + from core.shared.my_feature import my_feature_api + ``` +2. Add to `urlpatterns`: + ```python + path("ai/my-feature", my_feature_api.urls), + ``` +3. Follow the naming convention: `ai/` + +**Checkpoint**: The endpoint should be accessible at `/ai/my-feature/`. + +### Step 4: Add Tests + +Write tests for the endpoint. + +1. Create test file in `tests/` matching the source structure +2. Test the handler function directly (not via HTTP): + ```python + @pytest.mark.asyncio + async def test_my_endpoint(): + # Mock request and data + result = await my_endpoint(mock_request, mock_data) + assert result[0] == 200 + ``` +3. Run: `uv run pytest tests/path/test_file.py -v` + +**Checkpoint**: Tests pass. Run `uv run prek run --all-files`. + +## Part B: CF-API Endpoint (Express) + +### Step 1: Create Endpoint Handler + +Create the endpoint handler in `js/cf-api/endpoints/`. + +1. Create `js/cf-api/endpoints/my-feature.ts`: + ```typescript + import { Request, Response } from "express" + + export async function myFeature(req: Request, res: Response) { + // Implementation + res.status(200).json({ results: [] }) + } + ``` +2. Follow existing patterns in `endpoints/` + +**Checkpoint**: Handler should return appropriate status codes and JSON responses. + +### Step 2: Create Route File or Add to Existing + +Add the route to the appropriate route file. + +1. If it's a new domain, create `js/cf-api/routes/my-feature.routes.ts`: + ```typescript + import { Router } from "express" + import { addAsync } from "@awaitjs/express" + import { myFeature } from "../endpoints/my-feature.js" + import { ROUTES } from "../constants/index.js" + + const router = addAsync(Router()) as any + router.postAsync(ROUTES.MY_FEATURE, myFeature) + export default router + ``` +2. If it belongs to an existing domain, add to the corresponding route file +3. Add the route constant to `constants/index.ts` + +**Checkpoint**: Route file exports a router with the endpoint registered. + +### Step 3: Register in Route Index + +Register the route in `js/cf-api/routes/index.ts`. + +1. Import the route module +2. Register in the correct section: + - Before body parser: webhook routes only + - Public routes: no auth required + - Protected routes: after `checkForValidAPIKey` middleware +3. Apply middleware as needed (`trackUsage`, `idLimiter`, etc.) + +**Checkpoint**: The endpoint should be accessible with proper authentication. + +### Step 4: Add Tests + +Write tests using the dependency injection pattern. + +1. Create test file in `js/cf-api/__tests__/` +2. Use `setXxxDependencies()` / `resetXxxDependencies()` for mocking +3. Follow existing test patterns + +**Checkpoint**: Tests pass with `npm test`. + +## Key Files Reference + +| File | What to modify | +|------|---------------| +| `core/shared/optimizer_models.py` | Schema patterns | +| `aiservice/urls.py` | AIService endpoint registration | +| `js/cf-api/routes/index.ts` | CF-API route registration | +| `js/cf-api/constants/index.ts` | Route constants | +| `authapp/auth.py` | Authentication patterns | diff --git a/tiles/codeflash-internal-skills/skills/add-language-support/SKILL.md b/tiles/codeflash-internal-skills/skills/add-language-support/SKILL.md new file mode 100644 index 000000000..f3c1b4260 --- /dev/null +++ b/tiles/codeflash-internal-skills/skills/add-language-support/SKILL.md @@ -0,0 +1,135 @@ +--- +name: add-language-support +description: > + Guide for adding a new programming language to the multi-language system. + Use when extending the aiservice to support a new language (e.g., Rust, Go). + Covers directory creation, handler implementation, registry registration, + protocol compliance, prompt templates, router updates, and tests. +--- + +# Add Language Support + +Use this workflow when adding a new programming language to the codeflash-internal optimization system. Follow steps in order — each builds on the previous. + +## Step 1: Create Language Directory + +Create the directory structure under `core/languages/`. + +1. Create `core/languages//` with `__init__.py` +2. Create subdirectories for each feature: + ``` + core/languages// + ├── __init__.py # Handler class + @register_handler decorator + ├── optimizer/ # Optimization handler + │ ├── __init__.py + │ └── optimizer.py + └── testgen/ # Test generation handler (if supported) + ├── __init__.py + └── testgen.py + ``` +3. Follow existing patterns from `core/languages/python/` or `core/languages/js_ts/` + +**Checkpoint**: The directory structure should match existing language modules. Verify with `ls core/languages/`. + +## Step 2: Implement Handler Class + +Create the handler class in `core/languages//__init__.py`. + +1. Read `core/protocols/base.py` for the `LanguageHandler` protocol +2. Implement the handler: + ```python + from core.registry import register_handler + + @register_handler("") + class Handler: + language = "" + supports_testgen = False # Set True if implementing testgen + supports_optimizer = True # Set True if implementing optimizer + supports_code_repair = False + supports_jit_rewrite = False + supports_optimization_review = False + supports_explanations = False + ``` +3. Set `supports_*` flags for each feature you implement +4. The `@register_handler` decorator registers the class with the global registry + +**Checkpoint**: After this step, `registry.list_available()` should include your language ID. + +## Step 3: Register the Module + +Ensure the handler module is imported so the decorator fires. + +1. Read `core/languages/__init__.py` — check how existing languages are imported +2. Add your language import so `@register_handler` fires on startup +3. Import should be at module level or via lazy import pattern + +**Checkpoint**: Run `python -c "from core.languages import "` to verify no import errors. + +## Step 4: Implement Protocol Methods + +Implement the protocol methods for each supported feature. + +1. Read `core/protocols/optimizer.py` — `OptimizerProtocol` requires `optimizer_optimize(request, data)` +2. Read `core/protocols/testgen.py` — `TestGenProtocol` requires `testgen_generate(request, data)` +3. Read `core/protocols/code_repair.py` — `CodeRepairProtocol` requires `code_repair_repair(user_id, optimization_id, ctx)` +4. Follow the pattern from Python/JS handlers: + ```python + async def optimizer_optimize(self, request, data): + # 1. Build context from data.source_code + # 2. Call call_llm() with language-specific prompts + # 3. Parse response + # 4. Return (status_code, response_schema) + ``` + +**Checkpoint**: Each method must be `async def` and return the expected response type. + +## Step 5: Create Prompt Templates + +Create language-specific prompt templates. + +1. Create `.md` files alongside the handler modules (e.g., `optimizer/system_prompt.md`) +2. Use Jinja2 templating for dynamic content +3. Prompts should include language-specific context (version, conventions, stdlib) +4. Follow the pattern from `core/languages/python/optimizer/context_utils/` + +**Checkpoint**: Prompts should produce valid, non-empty system and user messages. + +## Step 6: Update Routers + +Add language dispatch to the shared routers. + +1. Edit `core/shared/optimizer_router.py` — add a branch for your language: + ```python + if data.language == "": + from core.languages..optimizer import optimize_ # noqa: PLC0415 + return await optimize_(request, data) + ``` +2. Edit `core/shared/testgen_router.py` — add the same pattern if testgen is supported +3. Use lazy imports (inside the function body) to avoid circular dependencies + +**Checkpoint**: Both routers should dispatch correctly for the new language. + +## Step 7: Add Tests + +Write tests for the new language handler. + +1. Create `tests//` directory mirroring the source structure +2. Test handler registration: verify `registry.get_handler("")` returns the correct class +3. Test feature dispatch: verify `get_handler_for_feature("", "optimizer")` works +4. Test optimization flow end-to-end (mock LLM calls) +5. Use `@pytest.mark.asyncio` for async tests +6. Run: `uv run pytest tests// -v` + +**Checkpoint**: All tests must pass. Run `uv run prek run --all-files` for formatting/lint. + +## Key Files Reference + +| File | What to modify | +|------|---------------| +| `core/languages//` | New handler directory | +| `core/registry.py` | No changes needed (decorator handles it) | +| `core/dispatcher.py` | No changes needed (dynamic lookup) | +| `core/protocols/` | Reference for protocol methods | +| `core/shared/optimizer_router.py` | Add language dispatch branch | +| `core/shared/testgen_router.py` | Add language dispatch branch (if testgen) | +| `core/languages/__init__.py` | Add import for decorator registration | diff --git a/tiles/codeflash-internal-skills/skills/debug-optimization-failure/SKILL.md b/tiles/codeflash-internal-skills/skills/debug-optimization-failure/SKILL.md new file mode 100644 index 000000000..97acda3ec --- /dev/null +++ b/tiles/codeflash-internal-skills/skills/debug-optimization-failure/SKILL.md @@ -0,0 +1,107 @@ +--- +name: debug-optimization-failure +description: > + Diagnose why an optimization produced no results or failed silently. + Use when an optimization request returns errors, empty results, or all + candidates are rejected. Walks through request validation, router dispatch, + context extraction, LLM calls, postprocessing, and logging stages. +--- + +# Debug Optimization Failure + +Use this workflow when an optimization request fails or produces no results. Work through the stages sequentially — stop at the first failure found. + +## Step 1: Validate the Request + +Check that the incoming `OptimizeSchema` is well-formed. + +1. Read `core/shared/optimizer_models.py` — verify the request matches `OptimizeSchema` fields +2. Check required fields: `source_code`, `trace_id` must be non-empty +3. Check `language` field — must be `"python"`, `"javascript"`, `"typescript"`, or `"java"` +4. Check `n_candidates` — default is 5, must be positive + +**Checkpoint**: If the request schema is invalid, the error comes from Pydantic validation. Check the 400 response for field-level errors. + +## Step 2: Check Router Dispatch + +Verify the correct language handler is invoked. + +1. Read `core/shared/optimizer_router.py` — the `optimize()` endpoint dispatches by `data.language` +2. Supported routes: + - `"javascript"` / `"typescript"` → `core.languages.js_ts.optimizer.optimize_javascript` + - `"java"` → `core.languages.java.optimizer.optimize_java` + - Default → `core.languages.python.optimizer.optimizer.optimize_python` +3. Check for import errors — lazy imports inside the function body may fail if a language module is missing + +**Checkpoint**: If dispatch fails, you'll see an `ImportError`. Check that the language module exists under `core/languages/`. + +## Step 3: Check Context Extraction + +Verify the optimization context is built correctly. + +1. Read `core/languages/python/optimizer/context_utils/optimizer_context.py` +2. `BaseOptimizerContext.get_dynamic_context()` dispatches to Single or Multi context +3. Check `get_system_prompt()` and `get_user_prompt()` — they should produce non-empty prompts +4. Check `extract_code_and_explanation_from_llm_res()` — this parses markdown code blocks from the LLM response + +**Checkpoint**: If context extraction returns empty prompts, check that `source_code` in the request is valid Python/JS code. + +## Step 4: Check LLM Calls + +Verify the LLM is called correctly and returns valid responses. + +1. Read `aiservice/llm.py` — `call_llm()` is the universal call handler +2. Check `get_llm_client(model_type)` returns a valid client (not `None`) +3. Environment variables required: + - OpenAI: `AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_ENDPOINT`, `OPENAI_API_VERSION` + - Anthropic: `ANTHROPIC_FOUNDRY_API_KEY`, `ANTHROPIC_FOUNDRY_BASE_URL` +4. Check `optimizer_config.py` — `get_model_distribution()` determines how many calls per model +5. Look for exceptions: `"LLM client for model type '...' is not available"` + +**Checkpoint**: If LLM calls fail, check environment variables and API key validity. Network errors will raise exceptions. + +## Step 5: Check Postprocessing + +Verify candidates survive postprocessing. + +1. Read `core/languages/python/optimizer/postprocess.py` +2. `deduplicate_optimizations()` — removes candidates with identical AST (via `ast.parse()` + `ast.dump()`) +3. `equality_check()` — removes candidates identical to the original code +4. Check if ALL candidates were deduplicated or matched the original + +**Checkpoint**: If all candidates are removed by postprocessing, the LLM is generating identical or no-op code. Try increasing `n_candidates` or checking prompt quality. + +## Step 6: Check Response Construction + +Verify the response is properly constructed. + +1. Each successful candidate produces an `OptimizeResponseItemSchema` +2. `parse_and_generate_candidate_schema()` converts extracted code to the schema +3. `is_valid_code()` validates syntax — `cst.ParserSyntaxError` or `ValidationError` means malformed output +4. If parsing fails, the candidate is dropped and a Sentry message is captured + +**Checkpoint**: If candidates parse but the response is empty, check the validation step in the optimizer flow. + +## Step 7: Check Logging + +Verify the optimization was logged for debugging. + +1. Read `core/log_features/models.py` — `OptimizationFeatures` stores per-trace-id data +2. Check `optimizations_raw` (before postprocessing) vs `optimizations_post` (after) +3. LLM calls are recorded via `record_llm_call()` in the `finally` block of `call_llm()` +4. PostHog events track `aiservice-optimize-openai-usage` + +**Checkpoint**: If logging shows raw candidates but no post candidates, postprocessing removed them all. + +## Key Files Reference + +| File | What to check | +|------|---------------| +| `core/shared/optimizer_router.py` | Language dispatch | +| `core/shared/optimizer_models.py` | Request validation | +| `core/languages/python/optimizer/optimizer.py` | Optimization flow | +| `core/languages/python/optimizer/context_utils/optimizer_context.py` | Context extraction | +| `core/languages/python/optimizer/postprocess.py` | Dedup and validation | +| `aiservice/llm.py` | LLM calls and client setup | +| `core/shared/optimizer_config.py` | Model distribution | +| `core/log_features/models.py` | Logging and tracking | diff --git a/tiles/codeflash-internal-skills/skills/debug-test-generation/SKILL.md b/tiles/codeflash-internal-skills/skills/debug-test-generation/SKILL.md new file mode 100644 index 000000000..98a8d42ca --- /dev/null +++ b/tiles/codeflash-internal-skills/skills/debug-test-generation/SKILL.md @@ -0,0 +1,103 @@ +--- +name: debug-test-generation +description: > + Diagnose why test generation failed or produced invalid tests. + Use when testgen returns errors, empty results, or produces tests + that fail to compile or run. Walks through request validation, + router dispatch, context building, prompt construction, LLM calls, + postprocessing, instrumentation, and output validation. +--- + +# Debug Test Generation + +Use this workflow when test generation fails or produces invalid tests. Work through the stages sequentially — stop at the first failure found. + +## Step 1: Validate the Request + +Check that the incoming testgen request is well-formed. + +1. Read the testgen request schema in the relevant testgen module +2. Verify required fields: `source_code`, `trace_id` must be non-empty +3. Check `language` field — must match a supported language +4. Check for valid code — source code should parse without syntax errors + +**Checkpoint**: If the request schema is invalid, the error comes from Pydantic validation. Check the 400 response. + +## Step 2: Check Router Dispatch + +Verify the correct language handler is invoked. + +1. Read `core/shared/testgen_router.py` — the `testgen()` endpoint dispatches by `data.language` +2. Supported routes: + - `"javascript"` / `"typescript"` → `core.languages.js_ts.testgen.generate_tests_javascript` + - `"java"` → `core.languages.java.testgen.generate_tests_java` + - Default → `core.languages.python.testgen.testgen.generate_tests_python` +3. Check for `ImportError` — lazy imports may fail if a language module is broken + +**Checkpoint**: If dispatch fails, check that the language module exists and imports cleanly. + +## Step 3: Check Context Building + +Verify the testgen context is constructed correctly. + +1. Read `core/languages/python/testgen/testgen.py` — `build_prompt()` constructs the prompts +2. Check that source code and dependency code are passed correctly +3. Verify the Jinja2 template renders without errors +4. Check for async/sync variants — the prompt builder handles both + +**Checkpoint**: If context is empty or malformed, check the input `source_code` and `dependency_code`. + +## Step 4: Check Prompt Construction + +Verify the LLM prompts are well-formed. + +1. The `build_prompt()` function uses Jinja2 templates (`.md` files alongside the module) +2. System prompt sets the role and language context +3. User prompt includes the source code and test context +4. Check that prompts are non-empty and contain the function to test + +**Checkpoint**: If prompts are empty, check the template files and Jinja2 rendering. + +## Step 5: Check LLM Response + +Verify the LLM returns valid test code. + +1. Read `aiservice/llm.py` — `call_llm()` handles the API call +2. Check for network errors or API key issues (same as optimization debugging) +3. Look for `LLMOutputParseError` — this means the LLM returned unparseable output +4. Check the raw response content — it should contain markdown code blocks with test code + +**Checkpoint**: If the LLM returns malformed output, check the prompt quality and model selection. + +## Step 6: Check Postprocessing + +Verify generated tests survive postprocessing. + +1. Read `core/languages/python/testgen/postprocessing/` directory +2. `add_missing_imports.py` — adds `from __future__ import annotations` if needed +3. Check for syntax errors in generated test code — `cst.ParserSyntaxError` means malformed code +4. Verify imports are resolved correctly + +**Checkpoint**: If postprocessing fails, the LLM generated syntactically invalid code. Check the raw output. + +## Step 7: Check Instrumentation + +Verify tests are properly instrumented. + +1. Read `core/languages/python/testgen/instrumentation/instrument_new_tests.py` +2. `instrument_tests()` applies behavior and performance instrumentation +3. `detect_frameworks_from_code()` identifies ML frameworks (PyTorch, TensorFlow, JAX) +4. `_create_device_sync_precompute_statements()` adds GPU sync calls for timing accuracy +5. Check that instrumented tests still compile — instrumentation may introduce syntax errors + +**Checkpoint**: If instrumented tests fail to compile, check the instrumentation transforms. The issue is usually in import handling or device sync injection. + +## Key Files Reference + +| File | What to check | +|------|---------------| +| `core/shared/testgen_router.py` | Language dispatch | +| `core/languages/python/testgen/testgen.py` | Testgen flow, prompt building | +| `core/languages/python/testgen/postprocessing/` | Import management, cleanup | +| `core/languages/python/testgen/instrumentation/instrument_new_tests.py` | Test instrumentation | +| `aiservice/llm.py` | LLM calls and client setup | diff --git a/tiles/codeflash-internal-skills/tile.json b/tiles/codeflash-internal-skills/tile.json new file mode 100644 index 000000000..aff7fe750 --- /dev/null +++ b/tiles/codeflash-internal-skills/tile.json @@ -0,0 +1,20 @@ +{ + "name": "codeflash/codeflash-internal-skills", + "version": "0.1.0", + "summary": "Procedural workflows for developing and debugging codeflash-internal", + "private": true, + "skills": { + "debug-optimization-failure": { + "path": "skills/debug-optimization-failure/SKILL.md" + }, + "add-language-support": { + "path": "skills/add-language-support/SKILL.md" + }, + "add-api-endpoint": { + "path": "skills/add-api-endpoint/SKILL.md" + }, + "debug-test-generation": { + "path": "skills/debug-test-generation/SKILL.md" + } + } +}