rank only, change formula

This commit is contained in:
Kevin Turcios 2025-07-02 18:14:15 -07:00
parent 9addd95f56
commit 70cecaf8c4
4 changed files with 45 additions and 31 deletions

View file

@ -14,19 +14,17 @@ if TYPE_CHECKING:
class FunctionRanker:
"""Ranks and filters functions for optimization based on profiling trace data using the ttX scoring method.
"""Ranks and filters functions based on a ttX score derived from profiling data.
The FunctionRanker analyzes function-level timing statistics from a trace file and assigns a ttX score to each function:
The ttX score is calculated as:
ttX = own_time + (time_spent_in_callees / call_count)
ttX = own_time + (time_spent_in_callees x call_count)
This score prioritizes functions that are computationally heavy themselves (high `own_time`)
or that make expensive calls to other functions (high average `time_spent_in_callees`).
This scoring prioritizes functions that:
1. Consume significant time themselves (own_time)
2. Are called frequently and have expensive subcalls (time_spent_in_callees x call_count)
first, filters out functions whose own_time is less than a specified percentage (importance_threshold = minimum fraction of total runtime a function must account for to be considered important) of the total runtime, considering them unimportant for optimization.
The remaining functions are then ranked in descending order by their ttX score, prioritizing those most likely to yield performance improvements if optimized.
Functions are first filtered by an importance threshold based on their `own_time` as a
fraction of the total runtime. The remaining functions are then ranked by their ttX score
to identify the best candidates for optimization.
"""
def __init__(self, trace_file_path: Path) -> None:
@ -59,7 +57,7 @@ class FunctionRanker:
time_in_callees_ns = cumulative_time_ns - total_time_ns
# Calculate ttX score
ttx_score = own_time_ns + (time_in_callees_ns * call_count)
ttx_score = own_time_ns + (time_in_callees_ns / call_count)
function_key = f"{filename}:{qualified_name}"
self._function_stats[function_key] = {
@ -99,11 +97,27 @@ class FunctionRanker:
return stats["ttx_score"] if stats else 0.0
def rank_functions(self, functions_to_optimize: list[FunctionToOptimize]) -> list[FunctionToOptimize]:
return sorted(functions_to_optimize, key=self.get_function_ttx_score, reverse=True)
ranked = sorted(functions_to_optimize, key=self.get_function_ttx_score, reverse=True)
logger.info(
f"Function ranking order: {[f'{func.function_name} (ttX={self.get_function_ttx_score(func):.2f})' for func in ranked]}"
)
return ranked
def get_function_stats_summary(self, function_to_optimize: FunctionToOptimize) -> dict | None:
return self._get_function_stats(function_to_optimize)
def rerank_functions(self, functions_to_optimize: list[FunctionToOptimize]) -> list[FunctionToOptimize]:
"""Ranks functions based on their ttX score.
This method calculates the ttX score for each function and returns
the functions sorted in descending order of their ttX score.
"""
if not self._function_stats:
logger.warning("No function stats available to rank functions.")
return []
return self.rank_functions(functions_to_optimize)
def rerank_and_filter_functions(self, functions_to_optimize: list[FunctionToOptimize]) -> list[FunctionToOptimize]:
"""Reranks and filters functions based on their impact on total runtime.

View file

@ -218,7 +218,7 @@ def get_functions_to_optimize(
all_functions.extend(file_functions)
if all_functions:
ranked_functions = ranker.rerank_and_filter_functions(all_functions)
ranked_functions = ranker.rank_functions(all_functions)
functions_count = len(ranked_functions)
ranked_dict = {}

View file

@ -58,8 +58,8 @@ def test_load_function_stats(function_ranker):
# Verify funcA specific values
assert func_a_stats["function_name"] == "funcA"
assert func_a_stats["call_count"] == 1
assert func_a_stats["own_time_ns"] == 153000
assert func_a_stats["cumulative_time_ns"] == 5960000
assert func_a_stats["own_time_ns"] == 63000
assert func_a_stats["cumulative_time_ns"] == 5443000
def test_get_function_ttx_score(function_ranker, workload_functions):
@ -72,9 +72,9 @@ def test_get_function_ttx_score(function_ranker, workload_functions):
assert func_a is not None
ttx_score = function_ranker.get_function_ttx_score(func_a)
# Expected ttX score: own_time + (time_in_callees * call_count)
# = 153000 + ((5960000 - 153000) * 1) = 5960000
assert ttx_score == 5960000
# Expected ttX score: own_time + (time_in_callees / call_count)
# = 63000 + ((5443000 - 63000) / 1) = 5443000
assert ttx_score == 5443000
def test_rank_functions(function_ranker, workload_functions):
@ -112,9 +112,9 @@ def test_get_function_stats_summary(function_ranker, workload_functions):
assert stats is not None
assert stats["function_name"] == "funcA"
assert stats["own_time_ns"] == 153000
assert stats["cumulative_time_ns"] == 5960000
assert stats["ttx_score"] == 5960000
assert stats["own_time_ns"] == 63000
assert stats["cumulative_time_ns"] == 5443000
assert stats["ttx_score"] == 5443000
@ -134,8 +134,8 @@ def test_importance_calculation(function_ranker):
assert func_a_stats is not None
importance = func_a_stats["own_time_ns"] / total_program_time
# funcA importance should be approximately 1.0% (153000/15281000)
assert abs(importance - 0.01001) < 0.001
# funcA importance should be approximately 0.57% (63000/10968000)
assert abs(importance - 0.0057) < 0.001
def test_simple_model_predict_stats(function_ranker, workload_functions):
@ -152,15 +152,15 @@ def test_simple_model_predict_stats(function_ranker, workload_functions):
assert stats is not None
assert stats["function_name"] == "predict"
assert stats["call_count"] == 1
assert stats["own_time_ns"] == 2368000
assert stats["cumulative_time_ns"] == 4103000
assert stats["ttx_score"] == 4103000
assert stats["own_time_ns"] == 2289000
assert stats["cumulative_time_ns"] == 4017000
assert stats["ttx_score"] == 4017000
# Test ttX score calculation
ttx_score = function_ranker.get_function_ttx_score(predict_func)
# Expected ttX score: own_time + (time_in_callees * call_count)
# = 2368000 + ((4103000 - 2368000) * 1) = 4103000
assert ttx_score == 4103000
# Expected ttX score: own_time + (time_in_callees / call_count)
# = 2289000 + ((4017000 - 2289000) / 1) = 4017000
assert ttx_score == 4017000
# Test importance calculation for predict function
total_program_time = sum(
@ -168,5 +168,5 @@ def test_simple_model_predict_stats(function_ranker, workload_functions):
if s.get("own_time_ns", 0) > 0
)
importance = stats["own_time_ns"] / total_program_time
# predict importance should be approximately 15.5% (2368000/15281000)
assert abs(importance - 0.155) < 0.01
# predict importance should be approximately 20.9% (2289000/10968000)
assert abs(importance - 0.209) < 0.01