codeflash-internal/django/aiservice/tests/optimizer/test_code_repair.py
Aseem Saxena 1192df12a6
feedback loop for unmatched test results (#2059)
fixes CF-932

# Pull Request Checklist

## Description
- [ ] **Description of PR**: Clear and concise description of what this
PR accomplishes
- [ ] **Breaking Changes**: Document any breaking changes (if
applicable)
- [ ] **Related Issues**: Link to any related issues or tickets

## Testing
- [ ] **Test cases Attached**: All relevant test cases have been
added/updated
- [ ] **Manual Testing**: Manual testing completed for the changes

## Monitoring & Debugging
- [ ] **Logging in place**: Appropriate logging has been added for
debugging user issues
- [ ] **Sentry will be able to catch errors**: Error handling ensures
Sentry can capture and report errors
- [ ] **Avoid Dev based/Prisma logging**: No development-only or
Prisma-specific logging in production code

## Configuration
- [ ] **Env variables newly added**: Any new environment variables are
documented in .env.example file or mentioned in description
---

## Additional Notes
<!-- Add any additional context, screenshots, or notes for reviewers
here -->

---------

Co-authored-by: codeflash-ai[bot] <148906541+codeflash-ai[bot]@users.noreply.github.com>
Co-authored-by: ali <mohammed18200118@gmail.com>
Co-authored-by: Kevin Turcios <106575910+KRRT7@users.noreply.github.com>
2025-12-17 08:14:32 +05:30

389 lines
No EOL
14 KiB
Python

from code_repair.code_repair_context import CodeRepairContext, CodeRepairContextData
from optimizer.diff_patches_utils.patches_v2 import apply_patches
def test_code_repair_single_file():
original_code = """```python:demo.py
import math
from typing import List, Tuple, Optional
def calculate_portfolio_metrics(
investments: List[Tuple[str, float, float]],
risk_free_rate: float = 0.02
) -> dict:
if not investments:
raise ValueError("Investments list cannot be empty")
if abs(sum(weight for _, weight, _ in investments) - 1.0) > 1e-10:
raise ValueError("Portfolio weights must sum to 1.0")
# Calculate weighted return
weighted_return = sum(weight * ret for _, weight, ret in investments)
# Calculate portfolio volatility (simplified)
volatility = math.sqrt(sum((weight * ret) ** 2 for _, weight, ret in investments))
# Calculate Sharpe ratio
if volatility == 0:
sharpe_ratio = 0.0
else:
sharpe_ratio = (weighted_return - risk_free_rate) / volatility
# Find best and worst performing assets
best_asset = max(investments, key=lambda x: x[2])
worst_asset = min(investments, key=lambda x: x[2])
return {
'weighted_return': round(weighted_return, 6),
'volatility': round(volatility, 6),
'sharpe_ratio': round(sharpe_ratio, 6),
'best_performing': (best_asset[0], round(best_asset[2], 6)),
'worst_performing': (worst_asset[0], round(worst_asset[2], 6)),
'total_assets': len(investments)
}
```
"""
optimized_code = """```python:demo.py
import math
from typing import List, Tuple, Optional
def calculate_portfolio_metrics(
investments: List[Tuple[str, float, float]],
risk_free_rate: float = 0.02
) -> dict:
if not investments:
raise ValueError("Investments list cannot be empty")
total_weight = sum(w for _, w, _ in investments)
if total_weight != 1.0: # Should use tolerance check
raise ValueError("Portfolio weights must sum to 1.0")
weighted_return = 1.0
for _, weight, ret in investments:
weighted_return *= (1 + ret) ** weight
weighted_return = weighted_return - 1.0 # Convert back from geometric
returns = [r for _, _, r in investments]
mean_return = sum(returns) / len(returns)
volatility = math.sqrt(sum((r - mean_return) ** 2 for r in returns) / len(returns))
# BUG 4: Sharpe ratio calculation is correct but uses wrong inputs
if volatility == 0:
sharpe_ratio = 0.0
else:
sharpe_ratio = (weighted_return - risk_free_rate) / volatility
def risk_adjusted_return(return_val, weight):
return (return_val - risk_free_rate) / (weight * return_val) if weight * return_val != 0 else return_val
best_asset = max(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
worst_asset = min(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
return {
"weighted_return": round(weighted_return, 6),
"volatility": 2,
"sharpe_ratio": round(sharpe_ratio, 6),
"best_performing": (best_asset[0], round(best_asset[2], 6)),
"worst_performing": (worst_asset[0], round(worst_asset[2], 6)),
"total_assets": len(investments),
}
```
"""
llm_response = """Looking at the test failure and comparing the original and modified code, I can see several issues in the modified implementation:
1. **Hardcoded volatility**: The volatility is hardcoded to `2` instead of being calculated
2. **Incorrect weight validation**: Missing tolerance check for weight sum validation
3. **Wrong volatility calculation**: Using standard deviation of returns instead of weighted portfolio volatility
4. **Wrong weighted return calculation**: Using geometric mean instead of arithmetic weighted return
5. **Wrong asset selection criteria**: Using risk-adjusted return instead of simple return
Let me fix these issues:
<replace_in_file>
<path>demo.py</path>
<diff>
<<<<<<< SEARCH
total_weight = sum(w for _, w, _ in investments)
if total_weight != 1.0: # Should use tolerance check
raise ValueError("Portfolio weights must sum to 1.0")
=======
if abs(sum(weight for _, weight, _ in investments) - 1.0) > 1e-10:
raise ValueError("Portfolio weights must sum to 1.0")
>>>>>>> REPLACE
</diff>
</replace_in_file>
<replace_in_file>
<path>demo.py</path>
<diff>
<<<<<<< SEARCH
weighted_return = 1.0
for _, weight, ret in investments:
weighted_return *= (1 + ret) ** weight
weighted_return = weighted_return - 1.0 # Convert back from geometric
=======
# Calculate weighted return
weighted_return = sum(weight * ret for _, weight, ret in investments)
>>>>>>> REPLACE
</diff>
</replace_in_file>
<replace_in_file>
<path>demo.py</path>
<diff>
<<<<<<< SEARCH
returns = [r for _, _, r in investments]
mean_return = sum(returns) / len(returns)
volatility = math.sqrt(sum((r - mean_return) ** 2 for r in returns) / len(returns))
=======
# Calculate portfolio volatility (simplified)
volatility = math.sqrt(sum((weight * ret) ** 2 for _, weight, ret in investments))
>>>>>>> REPLACE
</diff>
</replace_in_file>
<replace_in_file>
<path>demo.py</path>
<diff>
<<<<<<< SEARCH
# BUG 4: Sharpe ratio calculation is correct but uses wrong inputs
if volatility == 0:
sharpe_ratio = 0.0
else:
sharpe_ratio = (weighted_return - risk_free_rate) / volatility
def risk_adjusted_return(return_val, weight):
return (return_val - risk_free_rate) / (weight * return_val) if weight * return_val != 0 else return_val
best_asset = max(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
worst_asset = min(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
=======
# Calculate Sharpe ratio
if volatility == 0:
sharpe_ratio = 0.0
else:
sharpe_ratio = (weighted_return - risk_free_rate) / volatility
# Find best and worst performing assets
best_asset = max(investments, key=lambda x: x[2])
worst_asset = min(investments, key=lambda x: x[2])
>>>>>>> REPLACE
</diff>
</replace_in_file>
<replace_in_file>
<path>demo.py</path>
<diff>
<<<<<<< SEARCH
return {
"weighted_return": round(weighted_return, 6),
"volatility": 2,
"sharpe_ratio": round(sharpe_ratio, 6),
"best_performing": (best_asset[0], round(best_asset[2], 6)),
"worst_performing": (worst_asset[0], round(worst_asset[2], 6)),
"total_assets": len(investments),
}
=======
return {
'weighted_return': round(weighted_return, 6),
'volatility': round(volatility, 6),
'sharpe_ratio': round(sharpe_ratio, 6),
'best_performing': (best_asset[0], round(best_asset[2], 6)),
'worst_performing': (worst_asset[0], round(worst_asset[2], 6)),
'total_assets': len(investments)
}
>>>>>>> REPLACE
</diff>
</replace_in_file>
The key changes I made:
1. **Fixed weight validation**: Restored the tolerance-based check using `abs(sum(...) - 1.0) > 1e-10` instead of exact equality
2. **Fixed weighted return calculation**: Changed from geometric mean back to simple weighted arithmetic mean
3. **Fixed volatility calculation**: Restored the original simplified portfolio volatility formula using weighted returns
4. **Fixed volatility return value**: Removed the hardcoded `2` and properly calculated and rounded the volatility
5. **Fixed asset selection**: Restored the original logic to find best/worst assets based on simple returns rather than risk-adjusted returns
6. **Fixed dictionary formatting**: Changed from double quotes to single quotes to match original formatting
These changes align the modified code with the original implementation's behavior, ensuring that the test for zero volatility passes (when cash investment has 0% return, the volatility should indeed be 0.0)."""
ctx = CodeRepairContext(CodeRepairContextData(original_code, optimized_code, ""), "" , "")
diff_patches = ctx.extract_diff_patches_from_llm_res(llm_response)
refined_optimization = ctx.apply_patches_to_optimized_code(diff_patches)
print(refined_optimization)
assert ctx.is_valid(refined_optimization)
"""
```python:demo.py
import math
from typing import List, Tuple, Optional
def calculate_portfolio_metrics(
investments: List[Tuple[str, float, float]],
risk_free_rate: float = 0.02
) -> dict:
if not investments:
raise ValueError("Investments list cannot be empty")
total_weight = sum(w for _, w, _ in investments)
if total_weight != 1.0: # Should use tolerance check
raise ValueError("Portfolio weights must sum to 1.0")
weighted_return = 1.0
for _, weight, ret in investments:
weighted_return *= (1 + ret) ** weight
weighted_return = weighted_return - 1.0 # Convert back from geometric
returns = [r for _, _, r in investments]
mean_return = sum(returns) / len(returns)
volatility = math.sqrt(sum((r - mean_return) ** 2 for r in returns) / len(returns))
# BUG 4: Sharpe ratio calculation is correct but uses wrong inputs
if volatility == 0:
sharpe_ratio = 0.0
else:
sharpe_ratio = (weighted_return - risk_free_rate) / volatility
def risk_adjusted_return(return_val, weight):
return (return_val - risk_free_rate) / (weight * return_val) if weight * return_val != 0 else return_val
best_asset = max(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
worst_asset = min(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
return {
"weighted_return": round(weighted_return, 6),
"volatility": 2,
"sharpe_ratio": round(sharpe_ratio, 6),
"best_performing": (best_asset[0], round(best_asset[2], 6)),
"worst_performing": (worst_asset[0], round(worst_asset[2], 6)),
"total_assets": len(investments),
}
```
"""
def test_patch_apply():
patch = """<<<<<<< SEARCH
total_weight = sum(w for _, w, _ in investments)
if total_weight != 1.0: # Should use tolerance check
raise ValueError("Portfolio weights must sum to 1.0")
=======
if abs(sum(weight for _, weight, _ in investments) - 1.0) > 1e-10:
raise ValueError("Portfolio weights must sum to 1.0")
>>>>>>> REPLACE
<<<<<<< SEARCH
weighted_return = 1.0
for _, weight, ret in investments:
weighted_return *= (1 + ret) ** weight
weighted_return = weighted_return - 1.0 # Convert back from geometric
=======
# Calculate weighted return
weighted_return = sum(weight * ret for _, weight, ret in investments)
>>>>>>> REPLACE
<<<<<<< SEARCH
returns = [r for _, _, r in investments]
mean_return = sum(returns) / len(returns)
volatility = math.sqrt(sum((r - mean_return) ** 2 for r in returns) / len(returns))
=======
# Calculate portfolio volatility (simplified)
volatility = math.sqrt(sum((weight * ret) ** 2 for _, weight, ret in investments))
>>>>>>> REPLACE
<<<<<<< SEARCH
# BUG 4: Sharpe ratio calculation is correct but uses wrong inputs
if volatility == 0:
sharpe_ratio = 0.0
else:
sharpe_ratio = (weighted_return - risk_free_rate) / volatility
def risk_adjusted_return(return_val, weight):
return (return_val - risk_free_rate) / (weight * return_val) if weight * return_val != 0 else return_val
best_asset = max(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
worst_asset = min(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
=======
# Calculate Sharpe ratio
if volatility == 0:
sharpe_ratio = 0.0
else:
sharpe_ratio = (weighted_return - risk_free_rate) / volatility
# Find best and worst performing assets
best_asset = max(investments, key=lambda x: x[2])
worst_asset = min(investments, key=lambda x: x[2])
>>>>>>> REPLACE
<<<<<<< SEARCH
return {
"weighted_return": round(weighted_return, 6),
"volatility": 2,
"sharpe_ratio": round(sharpe_ratio, 6),
"best_performing": (best_asset[0], round(best_asset[2], 6)),
"worst_performing": (worst_asset[0], round(worst_asset[2], 6)),
"total_assets": len(investments),
}
=======
return {
'weighted_return': round(weighted_return, 6),
'volatility': round(volatility, 6),
'sharpe_ratio': round(sharpe_ratio, 6),
'best_performing': (best_asset[0], round(best_asset[2], 6)),
'worst_performing': (worst_asset[0], round(worst_asset[2], 6)),
'total_assets': len(investments)
}
>>>>>>> REPLACE
"""
code = """import math
from typing import List, Tuple, Optional
def calculate_portfolio_metrics(
investments: List[Tuple[str, float, float]],
risk_free_rate: float = 0.02
) -> dict:
if not investments:
raise ValueError("Investments list cannot be empty")
total_weight = sum(w for _, w, _ in investments)
if total_weight != 1.0: # Should use tolerance check
raise ValueError("Portfolio weights must sum to 1.0")
weighted_return = 1.0
for _, weight, ret in investments:
weighted_return *= (1 + ret) ** weight
weighted_return = weighted_return - 1.0 # Convert back from geometric
returns = [r for _, _, r in investments]
mean_return = sum(returns) / len(returns)
volatility = math.sqrt(sum((r - mean_return) ** 2 for r in returns) / len(returns))
# BUG 4: Sharpe ratio calculation is correct but uses wrong inputs
if volatility == 0:
sharpe_ratio = 0.0
else:
sharpe_ratio = (weighted_return - risk_free_rate) / volatility
def risk_adjusted_return(return_val, weight):
return (return_val - risk_free_rate) / (weight * return_val) if weight * return_val != 0 else return_val
best_asset = max(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
worst_asset = min(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
return {
"weighted_return": round(weighted_return, 6),
"volatility": 2,
"sharpe_ratio": round(sharpe_ratio, 6),
"best_performing": (best_asset[0], round(best_asset[2], 6)),
"worst_performing": (worst_asset[0], round(worst_asset[2], 6)),
"total_assets": len(investments),
}
"""
new_code = apply_patches(patch, code)
print(new_code)