perf: rewrite PR data query to use UNION CTE for personal accounts

The getOptimizationPRs data query used the same 3-way OR pattern
(repository_id IN (...) OR user_id OR current_username) that causes
slow bitmap OR merges with 100+ repo UUIDs.

For personal accounts, use a CTE with UNION to identify candidate
event IDs via three independent index scans, then INNER JOIN for
the data fields. This matches the UNION approach already used by
the count query in the same function.
This commit is contained in:
Kevin Turcios 2026-04-11 03:46:01 -05:00
parent bc71512073
commit 2444d1b4ed

View file

@ -534,27 +534,9 @@ export async function getOptimizationPRs(
`
}
// Run data + count queries in parallel.
// Count uses UNION (personal) or flat WHERE (org) to avoid bitmap OR.
// Data query JOINs optimization_features only for the LIMIT'd rows.
const [events, countRows] = await Promise.all([
prisma.$queryRawUnsafe<
Array<{
id: string
event_type: string
pr_url: string | null
function_name: string | null
file_path: string | null
speedup_x: number | null
speedup_pct: number | null
created_at: Date
repository_id: string | null
repo_name: string | null
repo_full_name: string | null
}>
>(
`
SELECT
// Build data query — for personal accounts, use UNION CTE to avoid bitmap
// OR merge, same approach as the count query.
const selectFields = `
oe.id,
oe.event_type,
COALESCE(
@ -608,15 +590,73 @@ export async function getOptimizationPRs(
oe.created_at,
oe.repository_id,
r.name AS repo_name,
r.full_name AS repo_full_name
r.full_name AS repo_full_name`
let dataSql: string
if ("orgId" in payload) {
dataSql = `
SELECT ${selectFields}
FROM optimization_events oe
LEFT JOIN optimization_features of ON oe.trace_id = of.trace_id
LEFT JOIN repositories r ON oe.repository_id = r.id
WHERE ${dataWhereClause}
ORDER BY oe.created_at DESC
LIMIT ${pageSize} OFFSET ${offset}
`,
),
`
} else {
// Personal: CTE with UNION to identify candidate event IDs via index
// scans, then JOIN for the data fields (only for the LIMIT'd set).
const uid = payload.userId.replace(/'/g, "''")
const uname = payload.username.replace(/'/g, "''")
const eventFilter =
eventTypeFilter && eventTypeFilter !== "all"
? `event_type = '${String(eventTypeFilter).replace(/'/g, "''")}'`
: `event_type IN ('pr_created','pr_merged','pr_closed')`
const repoFilter = repositoryId
? `AND repository_id = '${String(repositoryId).replace(/'/g, "''")}'`
: ""
const branchFilters = `AND ${eventFilter} AND is_optimization_found = true ${repoFilter}`
dataSql = `
WITH candidate_ids AS (
SELECT id FROM optimization_events
WHERE repository_id IN (${repoIdsString}) ${branchFilters}
UNION
SELECT id FROM optimization_events
WHERE user_id = '${uid}' ${branchFilters}
UNION
SELECT id FROM optimization_events
WHERE current_username = '${uname}' ${branchFilters}
)
SELECT ${selectFields}
FROM optimization_events oe
INNER JOIN candidate_ids ci ON ci.id = oe.id
LEFT JOIN optimization_features of ON oe.trace_id = of.trace_id
LEFT JOIN repositories r ON oe.repository_id = r.id
WHERE (oe.pr_url IS NOT NULL OR of.pull_request IS NOT NULL)
ORDER BY oe.created_at DESC
LIMIT ${pageSize} OFFSET ${offset}
`
}
// Run data + count queries in parallel.
// Both use UNION (personal) or flat WHERE (org) to avoid bitmap OR.
const [events, countRows] = await Promise.all([
prisma.$queryRawUnsafe<
Array<{
id: string
event_type: string
pr_url: string | null
function_name: string | null
file_path: string | null
speedup_x: number | null
speedup_pct: number | null
created_at: Date
repository_id: string | null
repo_name: string | null
repo_full_name: string | null
}>
>(dataSql),
prisma.$queryRawUnsafe<Array<{ count: bigint }>>(countSql),
])