mirror of
https://github.com/codeflash-ai/codeflash.git
synced 2026-05-04 18:25:17 +00:00
rank only, change formula
This commit is contained in:
parent
9addd95f56
commit
70cecaf8c4
4 changed files with 45 additions and 31 deletions
Binary file not shown.
|
|
@ -14,19 +14,17 @@ if TYPE_CHECKING:
|
|||
|
||||
|
||||
class FunctionRanker:
|
||||
"""Ranks and filters functions for optimization based on profiling trace data using the ttX scoring method.
|
||||
"""Ranks and filters functions based on a ttX score derived from profiling data.
|
||||
|
||||
The FunctionRanker analyzes function-level timing statistics from a trace file and assigns a ttX score to each function:
|
||||
The ttX score is calculated as:
|
||||
ttX = own_time + (time_spent_in_callees / call_count)
|
||||
|
||||
ttX = own_time + (time_spent_in_callees x call_count)
|
||||
This score prioritizes functions that are computationally heavy themselves (high `own_time`)
|
||||
or that make expensive calls to other functions (high average `time_spent_in_callees`).
|
||||
|
||||
This scoring prioritizes functions that:
|
||||
1. Consume significant time themselves (own_time)
|
||||
2. Are called frequently and have expensive subcalls (time_spent_in_callees x call_count)
|
||||
|
||||
first, filters out functions whose own_time is less than a specified percentage (importance_threshold = minimum fraction of total runtime a function must account for to be considered important) of the total runtime, considering them unimportant for optimization.
|
||||
|
||||
The remaining functions are then ranked in descending order by their ttX score, prioritizing those most likely to yield performance improvements if optimized.
|
||||
Functions are first filtered by an importance threshold based on their `own_time` as a
|
||||
fraction of the total runtime. The remaining functions are then ranked by their ttX score
|
||||
to identify the best candidates for optimization.
|
||||
"""
|
||||
|
||||
def __init__(self, trace_file_path: Path) -> None:
|
||||
|
|
@ -59,7 +57,7 @@ class FunctionRanker:
|
|||
time_in_callees_ns = cumulative_time_ns - total_time_ns
|
||||
|
||||
# Calculate ttX score
|
||||
ttx_score = own_time_ns + (time_in_callees_ns * call_count)
|
||||
ttx_score = own_time_ns + (time_in_callees_ns / call_count)
|
||||
|
||||
function_key = f"{filename}:{qualified_name}"
|
||||
self._function_stats[function_key] = {
|
||||
|
|
@ -99,11 +97,27 @@ class FunctionRanker:
|
|||
return stats["ttx_score"] if stats else 0.0
|
||||
|
||||
def rank_functions(self, functions_to_optimize: list[FunctionToOptimize]) -> list[FunctionToOptimize]:
|
||||
return sorted(functions_to_optimize, key=self.get_function_ttx_score, reverse=True)
|
||||
ranked = sorted(functions_to_optimize, key=self.get_function_ttx_score, reverse=True)
|
||||
logger.info(
|
||||
f"Function ranking order: {[f'{func.function_name} (ttX={self.get_function_ttx_score(func):.2f})' for func in ranked]}"
|
||||
)
|
||||
return ranked
|
||||
|
||||
def get_function_stats_summary(self, function_to_optimize: FunctionToOptimize) -> dict | None:
|
||||
return self._get_function_stats(function_to_optimize)
|
||||
|
||||
def rerank_functions(self, functions_to_optimize: list[FunctionToOptimize]) -> list[FunctionToOptimize]:
|
||||
"""Ranks functions based on their ttX score.
|
||||
|
||||
This method calculates the ttX score for each function and returns
|
||||
the functions sorted in descending order of their ttX score.
|
||||
"""
|
||||
if not self._function_stats:
|
||||
logger.warning("No function stats available to rank functions.")
|
||||
return []
|
||||
|
||||
return self.rank_functions(functions_to_optimize)
|
||||
|
||||
def rerank_and_filter_functions(self, functions_to_optimize: list[FunctionToOptimize]) -> list[FunctionToOptimize]:
|
||||
"""Reranks and filters functions based on their impact on total runtime.
|
||||
|
||||
|
|
|
|||
|
|
@ -218,7 +218,7 @@ def get_functions_to_optimize(
|
|||
all_functions.extend(file_functions)
|
||||
|
||||
if all_functions:
|
||||
ranked_functions = ranker.rerank_and_filter_functions(all_functions)
|
||||
ranked_functions = ranker.rank_functions(all_functions)
|
||||
functions_count = len(ranked_functions)
|
||||
|
||||
ranked_dict = {}
|
||||
|
|
|
|||
|
|
@ -58,8 +58,8 @@ def test_load_function_stats(function_ranker):
|
|||
# Verify funcA specific values
|
||||
assert func_a_stats["function_name"] == "funcA"
|
||||
assert func_a_stats["call_count"] == 1
|
||||
assert func_a_stats["own_time_ns"] == 153000
|
||||
assert func_a_stats["cumulative_time_ns"] == 5960000
|
||||
assert func_a_stats["own_time_ns"] == 63000
|
||||
assert func_a_stats["cumulative_time_ns"] == 5443000
|
||||
|
||||
|
||||
def test_get_function_ttx_score(function_ranker, workload_functions):
|
||||
|
|
@ -72,9 +72,9 @@ def test_get_function_ttx_score(function_ranker, workload_functions):
|
|||
assert func_a is not None
|
||||
ttx_score = function_ranker.get_function_ttx_score(func_a)
|
||||
|
||||
# Expected ttX score: own_time + (time_in_callees * call_count)
|
||||
# = 153000 + ((5960000 - 153000) * 1) = 5960000
|
||||
assert ttx_score == 5960000
|
||||
# Expected ttX score: own_time + (time_in_callees / call_count)
|
||||
# = 63000 + ((5443000 - 63000) / 1) = 5443000
|
||||
assert ttx_score == 5443000
|
||||
|
||||
|
||||
def test_rank_functions(function_ranker, workload_functions):
|
||||
|
|
@ -112,9 +112,9 @@ def test_get_function_stats_summary(function_ranker, workload_functions):
|
|||
|
||||
assert stats is not None
|
||||
assert stats["function_name"] == "funcA"
|
||||
assert stats["own_time_ns"] == 153000
|
||||
assert stats["cumulative_time_ns"] == 5960000
|
||||
assert stats["ttx_score"] == 5960000
|
||||
assert stats["own_time_ns"] == 63000
|
||||
assert stats["cumulative_time_ns"] == 5443000
|
||||
assert stats["ttx_score"] == 5443000
|
||||
|
||||
|
||||
|
||||
|
|
@ -134,8 +134,8 @@ def test_importance_calculation(function_ranker):
|
|||
assert func_a_stats is not None
|
||||
importance = func_a_stats["own_time_ns"] / total_program_time
|
||||
|
||||
# funcA importance should be approximately 1.0% (153000/15281000)
|
||||
assert abs(importance - 0.01001) < 0.001
|
||||
# funcA importance should be approximately 0.57% (63000/10968000)
|
||||
assert abs(importance - 0.0057) < 0.001
|
||||
|
||||
|
||||
def test_simple_model_predict_stats(function_ranker, workload_functions):
|
||||
|
|
@ -152,15 +152,15 @@ def test_simple_model_predict_stats(function_ranker, workload_functions):
|
|||
assert stats is not None
|
||||
assert stats["function_name"] == "predict"
|
||||
assert stats["call_count"] == 1
|
||||
assert stats["own_time_ns"] == 2368000
|
||||
assert stats["cumulative_time_ns"] == 4103000
|
||||
assert stats["ttx_score"] == 4103000
|
||||
assert stats["own_time_ns"] == 2289000
|
||||
assert stats["cumulative_time_ns"] == 4017000
|
||||
assert stats["ttx_score"] == 4017000
|
||||
|
||||
# Test ttX score calculation
|
||||
ttx_score = function_ranker.get_function_ttx_score(predict_func)
|
||||
# Expected ttX score: own_time + (time_in_callees * call_count)
|
||||
# = 2368000 + ((4103000 - 2368000) * 1) = 4103000
|
||||
assert ttx_score == 4103000
|
||||
# Expected ttX score: own_time + (time_in_callees / call_count)
|
||||
# = 2289000 + ((4017000 - 2289000) / 1) = 4017000
|
||||
assert ttx_score == 4017000
|
||||
|
||||
# Test importance calculation for predict function
|
||||
total_program_time = sum(
|
||||
|
|
@ -168,5 +168,5 @@ def test_simple_model_predict_stats(function_ranker, workload_functions):
|
|||
if s.get("own_time_ns", 0) > 0
|
||||
)
|
||||
importance = stats["own_time_ns"] / total_program_time
|
||||
# predict importance should be approximately 15.5% (2368000/15281000)
|
||||
assert abs(importance - 0.155) < 0.01
|
||||
# predict importance should be approximately 20.9% (2289000/10968000)
|
||||
assert abs(importance - 0.209) < 0.01
|
||||
|
|
|
|||
Loading…
Reference in a new issue