perf: restructure getOptimizationPRs to limit before joining
The data query was LEFT JOINing optimization_features and repositories across ALL matching candidate events before sorting and applying LIMIT. For accounts with thousands of events, this meant joining and sorting far more rows than needed. Restructure both org and personal paths to use a two-phase CTE: 1. page_ids CTE: identify the page of event IDs using EXISTS for the PR filter (no full JOIN to optimization_features), sort, and LIMIT 2. Outer query: JOIN only the ~10 result IDs with optimization_features and repositories for display fields Also remove the now-unused dataWhereClause variable.
This commit is contained in:
parent
d6cab273bc
commit
ee535ae9bc
1 changed files with 46 additions and 22 deletions
|
|
@ -523,17 +523,6 @@ export async function getOptimizationPRs(
|
|||
${prCondition}
|
||||
`
|
||||
|
||||
const dataWhereClause = `
|
||||
${accountCondition}
|
||||
${eventTypeCondition}
|
||||
${repositoryCondition}
|
||||
AND oe.is_optimization_found = true
|
||||
AND (
|
||||
oe.pr_url IS NOT NULL
|
||||
OR of.pull_request IS NOT NULL
|
||||
)
|
||||
`
|
||||
|
||||
const safePageSize = Math.trunc(pageSize)
|
||||
const offset = Math.trunc((page - 1) * safePageSize)
|
||||
|
||||
|
|
@ -640,18 +629,43 @@ export async function getOptimizationPRs(
|
|||
|
||||
let dataSql: string
|
||||
if ("orgId" in payload) {
|
||||
// Two-phase: first identify the page of event IDs (cheap — no JOINs
|
||||
// to optimization_features for display data), then JOIN only those IDs.
|
||||
dataSql = `
|
||||
WITH page_ids AS (
|
||||
SELECT oe.id
|
||||
FROM optimization_events oe
|
||||
WHERE ${accountCondition}
|
||||
${eventTypeCondition}
|
||||
${repositoryCondition}
|
||||
AND oe.is_optimization_found = true
|
||||
AND (
|
||||
oe.pr_url IS NOT NULL
|
||||
OR EXISTS (
|
||||
SELECT 1 FROM optimization_features of2
|
||||
WHERE of2.trace_id = oe.trace_id
|
||||
AND of2.pull_request IS NOT NULL
|
||||
)
|
||||
)
|
||||
ORDER BY oe.created_at DESC
|
||||
LIMIT ${safePageSize} OFFSET ${offset}
|
||||
)
|
||||
SELECT ${selectFields}
|
||||
FROM optimization_events oe
|
||||
INNER JOIN page_ids pi ON pi.id = oe.id
|
||||
LEFT JOIN optimization_features of ON oe.trace_id = of.trace_id
|
||||
LEFT JOIN repositories r ON oe.repository_id = r.id
|
||||
WHERE ${dataWhereClause}
|
||||
ORDER BY oe.created_at DESC
|
||||
LIMIT ${safePageSize} OFFSET ${offset}
|
||||
`
|
||||
} else {
|
||||
// Personal: CTE with UNION to identify candidate event IDs via index
|
||||
// scans, then JOIN for the data fields (only for the LIMIT'd set).
|
||||
// Personal: two-phase CTE approach to avoid joining large tables
|
||||
// before sorting and limiting.
|
||||
//
|
||||
// Phase 1 (candidates): UNION for index-backed scans, carrying
|
||||
// id + created_at + pr_url + trace_id for filtering and sorting.
|
||||
// Phase 2 (page_ids): Filter for PR presence (pr_url OR optimization_features),
|
||||
// sort by created_at DESC, and LIMIT — so the expensive JOINs only
|
||||
// happen for the final page of results.
|
||||
const uid = sqlUserId(payload.userId)
|
||||
const uname = sqlUsername(payload.username)
|
||||
const eventFilter =
|
||||
|
|
@ -662,24 +676,34 @@ export async function getOptimizationPRs(
|
|||
const branchFilters = `AND ${eventFilter} AND is_optimization_found = true ${repoFilter}`
|
||||
|
||||
dataSql = `
|
||||
WITH candidate_ids AS (
|
||||
SELECT id FROM optimization_events
|
||||
WITH candidates AS (
|
||||
SELECT id, created_at, pr_url, trace_id FROM optimization_events
|
||||
WHERE repository_id IN (${repoIdsString}) ${branchFilters}
|
||||
UNION
|
||||
SELECT id FROM optimization_events
|
||||
SELECT id, created_at, pr_url, trace_id FROM optimization_events
|
||||
WHERE user_id = '${uid}' ${branchFilters}
|
||||
UNION
|
||||
SELECT id FROM optimization_events
|
||||
SELECT id, created_at, pr_url, trace_id FROM optimization_events
|
||||
WHERE current_username = '${uname}' ${branchFilters}
|
||||
),
|
||||
page_ids AS (
|
||||
SELECT id
|
||||
FROM candidates c
|
||||
WHERE c.pr_url IS NOT NULL
|
||||
OR EXISTS (
|
||||
SELECT 1 FROM optimization_features of2
|
||||
WHERE of2.trace_id = c.trace_id
|
||||
AND of2.pull_request IS NOT NULL
|
||||
)
|
||||
ORDER BY c.created_at DESC
|
||||
LIMIT ${safePageSize} OFFSET ${offset}
|
||||
)
|
||||
SELECT ${selectFields}
|
||||
FROM optimization_events oe
|
||||
INNER JOIN candidate_ids ci ON ci.id = oe.id
|
||||
INNER JOIN page_ids pi ON pi.id = oe.id
|
||||
LEFT JOIN optimization_features of ON oe.trace_id = of.trace_id
|
||||
LEFT JOIN repositories r ON oe.repository_id = r.id
|
||||
WHERE (oe.pr_url IS NOT NULL OR of.pull_request IS NOT NULL)
|
||||
ORDER BY oe.created_at DESC
|
||||
LIMIT ${safePageSize} OFFSET ${offset}
|
||||
`
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue