File size: 13,447 Bytes
ec4aa90 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 |
"""
Code Transformer - Generates modernized code using AI with RAG.
Supports multiple AI providers (Gemini, Nebius, OpenAI).
"""
import os
import json
import logging
from typing import Dict, List, Optional
from src.config import AIManager
logger = logging.getLogger(__name__)
class CodeTransformer:
"""
Transforms legacy code to modern equivalents using Gemini 2.5 Flash.
Integrates with MCP servers for examples and context.
"""
def __init__(self, mcp_manager=None, search_engine=None):
"""
Initialize Code Transformer.
Args:
mcp_manager: Optional MCPManager instance
search_engine: Optional CodeSearchEngine instance
"""
self.mcp_manager = mcp_manager
self.search_engine = search_engine
# Use centralized AI manager
self.ai_manager = AIManager()
logger.info(
f"CodeTransformer initialized with provider: {self.ai_manager.provider_name}, "
f"model: {self.ai_manager.model_name}"
)
async def transform_code(self, file_path: str, original_code: str,
transformation_plan: Dict) -> str:
"""
Transform legacy code using Gemini 2.5 Flash.
Args:
file_path: Path to the file being transformed
original_code: Original code content
transformation_plan: Plan from analyzer with steps and recommendations
Returns:
Modernized code as string
"""
logger.info(f"Transforming code: {file_path}")
# Get transformation examples from Memory MCP if available
examples_text = ""
if self.mcp_manager:
try:
from src.mcp.memory_client import MemoryMCPClient
memory_client = MemoryMCPClient(self.mcp_manager)
pattern_type = transformation_plan.get('pattern', '')
examples = await memory_client.get_transformation_examples(
pattern_type,
limit=3
)
if examples:
examples_text = "\n\nSUCCESSFUL TRANSFORMATION EXAMPLES:\n"
for i, ex in enumerate(examples, 1):
examples_text += f"\nExample {i}:\n"
examples_text += f"Before: {ex.get('before', '')[:200]}...\n"
examples_text += f"After: {ex.get('after', '')[:200]}...\n"
except Exception as e:
logger.warning(f"Could not retrieve transformation examples: {e}")
# Get similar code from search engine if available
context_text = ""
if self.search_engine:
try:
similar_files = self.search_engine.find_similar_patterns(
f"Modern code similar to {file_path}",
top_k=3
)
if similar_files:
context_text = "\n\nSIMILAR MODERN CODE EXAMPLES:\n"
for f in similar_files[:2]:
context_text += f"- {f['file_path']}: {f['text_snippet']}\n"
except Exception as e:
logger.warning(f"Could not get similar code context: {e}")
# Build transformation prompt
prompt = f"""You are an expert code modernization assistant. Transform this legacy code to modern best practices.
FILE: {file_path}
TRANSFORMATION PLAN:
{json.dumps(transformation_plan, indent=2)}
{examples_text}
{context_text}
ORIGINAL CODE:
```
{original_code}
```
SANDBOX EXECUTION CONTEXT (for reference when generating imports):
- This code will be tested in Modal Sandbox at /workspace/
- Python: Tests will be combined with source in test_<module>.py
- Java: Source in <Module>.java (package: com.modernizer), tests in <Module>Test.java
- JavaScript: Source in <module>.js (ES modules with Jest), tests in <module>.test.js
- TypeScript: Source in <module>.ts (CommonJS for Jest/ts-jest), tests in <module>.test.ts
- All files in same /workspace/ directory
- Use relative imports and ensure all external dependencies are available
CRITICAL MODULE SYSTEM RULES:
- TypeScript: Use CommonJS-compatible code (NO import.meta, NO top-level await)
- TypeScript: Jest uses ts-jest with module: "commonjs" - avoid ES module-only features
- JavaScript: Can use ES modules but avoid Node.js-specific ES module features
- Do NOT add CLI execution code (if __name__ == "__main__", import.meta.url checks, etc.)
- Focus on library/module code that can be imported and tested
REQUIREMENTS:
1. Apply the transformation plan exactly
2. Maintain behavioral equivalence (same inputs → same outputs)
3. Add type hints for all functions (Python) or appropriate types
4. Include docstrings for public functions
5. Follow language-specific style guides (PEP 8 for Python, Java conventions, etc.)
6. Add error handling where missing
7. Use environment variables for secrets/credentials
8. Add comments explaining complex logic
9. Ensure all imports are at the top
10. Remove unused imports and variables
11. Use correct relative paths for local imports (same directory imports)
12. Include necessary package declarations (Java) or module exports
13. CRITICAL: Export ALL types, interfaces, enums, and classes that might be used in tests
- TypeScript: Use 'export' keyword for all public types, interfaces, enums, classes
- JavaScript: Include all functions/classes in module.exports or export statements
- Python: All public functions/classes should be importable
- Java: Use public access modifiers for classes/methods that will be tested
IMPORTANT:
- Return ONLY the transformed code, no explanations or markdown formatting
- Do NOT include markdown code fences in the response
- Ensure imports work in sandbox environment where all files are in /workspace/
"""
try:
# Call AI with configured model
modernized_code = self.ai_manager.generate_content(
prompt=prompt,
temperature=AIManager.TEMPERATURE_MEDIUM,
max_tokens=AIManager.MAX_OUTPUT_TOKENS_LARGE
).strip()
# Extract code from markdown if present
modernized_code = self._extract_code(modernized_code)
# Validate that code is complete (not truncated)
if modernized_code:
# Check for common truncation indicators
last_lines = modernized_code.split('\n')[-5:]
last_text = '\n'.join(last_lines)
# Warn if code appears truncated
if (not modernized_code.rstrip().endswith((')', '}', ']', '"', "'")) and
len(modernized_code) > 1000 and
not any(keyword in last_text for keyword in ['if __name__', 'main()', 'return'])):
logger.warning(f"Code for {file_path} may be truncated (length: {len(modernized_code)} chars)")
logger.warning(f"Last few lines: {last_text[:200]}")
# Store successful transformation as example
if self.mcp_manager:
try:
from src.mcp.memory_client import MemoryMCPClient
memory_client = MemoryMCPClient(self.mcp_manager)
example = {
"pattern": transformation_plan.get('pattern', ''),
"before": original_code[:500],
"after": modernized_code[:500],
"file_path": file_path
}
example_id = f"{transformation_plan.get('pattern', 'unknown')}_{hash(file_path)}"
await memory_client.store_transformation_example(example_id, example)
except Exception as e:
logger.warning(f"Could not store transformation example: {e}")
logger.info(f"Transformation complete for {file_path}")
return modernized_code
except Exception as e:
logger.error(f"Error during transformation: {e}")
return original_code # Return original on error
def _extract_code(self, text: str) -> str:
"""
Extract code from markdown code blocks if present.
Handles both complete blocks and trailing markdown fences.
Args:
text: Text that may contain markdown code blocks
Returns:
Extracted code
"""
if not text:
return ""
# Check for markdown code blocks
if "```" in text:
# Try to extract code between ``` markers
parts = text.split("```")
if len(parts) >= 3:
# Get the code block (skip language identifier)
code_block = parts[1]
# Remove language identifier if present
lines = code_block.split('\n')
if lines[0].strip() in ['python', 'java', 'javascript', 'typescript', 'cpp', 'c', 'go', 'js', 'ts', 'py']:
code_block = '\n'.join(lines[1:])
return code_block.strip()
elif len(parts) == 2:
# Only one ``` found - might be trailing fence
# Take everything before the fence
return parts[0].strip()
# Remove any trailing markdown fences
text = text.strip()
if text.endswith('```'):
text = text[:-3].strip()
return text
async def bulk_transform(self, files: Dict[str, str],
transformation_plan: Dict) -> Dict[str, str]:
"""
Transform multiple files with the same pattern.
Args:
files: Dictionary mapping file paths to their contents
transformation_plan: Transformation plan to apply
Returns:
Dictionary mapping file paths to transformed code
"""
logger.info(f"Bulk transforming {len(files)} files")
results = {}
for file_path, original_code in files.items():
try:
transformed = await self.transform_code(
file_path,
original_code,
transformation_plan
)
results[file_path] = transformed
logger.info(f"✓ Transformed {file_path}")
except Exception as e:
logger.error(f"✗ Failed to transform {file_path}: {e}")
results[file_path] = original_code
logger.info(f"Bulk transformation complete: {len(results)}/{len(files)} successful")
return results
async def add_type_hints(self, file_path: str, code: str) -> str:
"""
Add type hints to Python code.
Args:
file_path: Path to the file
code: Code content
Returns:
Code with type hints added
"""
logger.info(f"Adding type hints to {file_path}")
prompt = f"""Add comprehensive type hints to this Python code.
FILE: {file_path}
CODE:
```python
{code}
```
REQUIREMENTS:
1. Add type hints to all function parameters and return types
2. Use typing module for complex types (List, Dict, Optional, etc.)
3. Add type hints to class attributes
4. Maintain all existing functionality
5. Follow PEP 484 type hinting standards
Return ONLY the code with type hints added, no explanations.
"""
try:
typed_code = self.ai_manager.generate_content(
prompt=prompt,
temperature=AIManager.TEMPERATURE_PRECISE,
max_tokens=AIManager.MAX_OUTPUT_TOKENS_MEDIUM
)
return self._extract_code(typed_code)
except Exception as e:
logger.error(f"Error adding type hints: {e}")
return code
async def add_docstrings(self, file_path: str, code: str) -> str:
"""
Add docstrings to code.
Args:
file_path: Path to the file
code: Code content
Returns:
Code with docstrings added
"""
logger.info(f"Adding docstrings to {file_path}")
prompt = f"""Add comprehensive docstrings to this code.
FILE: {file_path}
CODE:
```
{code}
```
REQUIREMENTS:
1. Add docstrings to all functions and classes
2. Use Google-style or NumPy-style docstrings
3. Include parameter descriptions, return values, and exceptions
4. Add module-level docstring if missing
5. Maintain all existing functionality
Return ONLY the code with docstrings added, no explanations.
"""
try:
documented_code = self.ai_manager.generate_content(
prompt=prompt,
temperature=AIManager.TEMPERATURE_PRECISE,
max_tokens=AIManager.MAX_OUTPUT_TOKENS_MEDIUM
)
return self._extract_code(documented_code)
except Exception as e:
logger.error(f"Error adding docstrings: {e}")
return code
|