#!/usr/bin/env python3 """ Test script for AbMelt structure generation functionality. Tests both sequence-based structure generation and PDB-based processing. """ import os import sys import logging import tempfile import shutil from pathlib import Path from typing import Dict, List, Tuple import argparse # Add src to path for imports sys.path.append(str(Path(__file__).parent / "src")) try: from structure_prep import ( prepare_structure, generate_structure_from_sequences, prepare_pdb_for_analysis, validate_structure, get_chain_sequences ) from Bio.PDB import PDBParser from Bio.SeqUtils import seq1 except ImportError as e: print(f"Failed to import required modules: {e}") print("Please ensure you're in the correct environment with required dependencies installed.") sys.exit(1) # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(sys.stdout) ] ) logger = logging.getLogger(__name__) class StructureGenerationTester: """Test class for structure generation functionality.""" def __init__(self, test_dir: str = None): """Initialize tester with optional test directory.""" self.test_dir = Path(test_dir) if test_dir else Path(tempfile.mkdtemp(prefix="abmelt_test_")) self.test_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Test directory: {self.test_dir}") # Test antibody sequences (example sequences) self.test_sequences = { "alemtuzumab": { "heavy": "QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYWMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATITADESTSTTAYMELSSLRSEDTAVYYCARGGYSSGYYFDYWGQGTLVTVSS", "light": "DIQMTQSPSSLSASVGDRVTITCRASQDISNYLNWFQQKPGKAPKLLIYYATSLADGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQGNTFPWTFGQGTKVEIKR" }, "nivolumab": { "heavy": "QVQLVQSGAEVKKPGSSVKVSCKASGYTFTSYWINWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATITADESTSTTAYMELSSLRSEDTAVYYCARGGYSSGYYFDYWGQGTLVTVSS", "light": "DIQMTQSPSSLSASVGDRVTITCRASQDISNYLNWFQQKPGKAPKLLIYYATSLADGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQGNTFPWTFGQGTKVEIKR" } } # Test configuration self.config = { "paths": { "temp_dir": str(self.test_dir), "output_dir": str(self.test_dir / "output"), "log_dir": str(self.test_dir / "logs") }, "structure": { "validate_structure": True, "extract_sequences": True, "create_work_dir": True } } # Create output directories for path in self.config["paths"].values(): Path(path).mkdir(parents=True, exist_ok=True) def test_sequence_based_generation(self) -> Dict[str, bool]: """Test structure generation from sequences.""" logger.info("=" * 60) logger.info("TESTING SEQUENCE-BASED STRUCTURE GENERATION") logger.info("=" * 60) results = {} for antibody_name, sequences in self.test_sequences.items(): logger.info(f"\nTesting {antibody_name}...") try: # Test direct sequence generation output_file = self.test_dir / f"{antibody_name}_direct.pdb" logger.info("Testing direct sequence generation...") generated_file = generate_structure_from_sequences( heavy_chain=sequences["heavy"], light_chain=sequences["light"], output_file=str(output_file) ) # Verify file was created if Path(generated_file).exists(): logger.info(f"✓ Direct generation successful: {generated_file}") results[f"{antibody_name}_direct"] = True else: logger.error(f"✗ Direct generation failed: {generated_file}") results[f"{antibody_name}_direct"] = False # Test through prepare_structure function logger.info("Testing through prepare_structure...") antibody = { "name": antibody_name, "heavy_chain": sequences["heavy"], "light_chain": sequences["light"], "type": "sequences" } structure_files = prepare_structure(antibody, self.config) # Verify structure files if self._verify_structure_files(structure_files, antibody_name): logger.info(f"✓ prepare_structure successful for {antibody_name}") results[f"{antibody_name}_prepare"] = True else: logger.error(f"✗ prepare_structure failed for {antibody_name}") results[f"{antibody_name}_prepare"] = False except Exception as e: logger.error(f"✗ Error testing {antibody_name}: {e}") results[f"{antibody_name}_error"] = False return results def test_pdb_based_processing(self) -> Dict[str, bool]: """Test PDB-based structure processing.""" logger.info("=" * 60) logger.info("TESTING PDB-BASED STRUCTURE PROCESSING") logger.info("=" * 60) results = {} # First generate some test PDBs test_pdbs = {} for antibody_name, sequences in self.test_sequences.items(): try: pdb_file = self.test_dir / f"{antibody_name}_test.pdb" generate_structure_from_sequences( heavy_chain=sequences["heavy"], light_chain=sequences["light"], output_file=str(pdb_file) ) test_pdbs[antibody_name] = str(pdb_file) logger.info(f"Generated test PDB: {pdb_file}") except Exception as e: logger.error(f"Failed to generate test PDB for {antibody_name}: {e}") continue # Test PDB processing for antibody_name, pdb_file in test_pdbs.items(): logger.info(f"\nTesting PDB processing for {antibody_name}...") try: # Test prepare_pdb_for_analysis logger.info("Testing prepare_pdb_for_analysis...") structure_files = prepare_pdb_for_analysis( pdb_file=pdb_file, output_dir=str(self.test_dir / "pdb_analysis") ) if self._verify_structure_files(structure_files, antibody_name): logger.info(f"✓ prepare_pdb_for_analysis successful for {antibody_name}") results[f"{antibody_name}_pdb_analysis"] = True else: logger.error(f"✗ prepare_pdb_for_analysis failed for {antibody_name}") results[f"{antibody_name}_pdb_analysis"] = False # Test through prepare_structure with PDB type logger.info("Testing prepare_structure with PDB type...") antibody = { "name": f"{antibody_name}_pdb", "pdb_file": pdb_file, "type": "pdb" } structure_files = prepare_structure(antibody, self.config) if self._verify_structure_files(structure_files, f"{antibody_name}_pdb"): logger.info(f"✓ prepare_structure (PDB) successful for {antibody_name}") results[f"{antibody_name}_pdb_prepare"] = True else: logger.error(f"✗ prepare_structure (PDB) failed for {antibody_name}") results[f"{antibody_name}_pdb_prepare"] = False except Exception as e: logger.error(f"✗ Error processing PDB for {antibody_name}: {e}") results[f"{antibody_name}_pdb_error"] = False return results def test_structure_validation(self) -> Dict[str, bool]: """Test structure validation functionality.""" logger.info("=" * 60) logger.info("TESTING STRUCTURE VALIDATION") logger.info("=" * 60) results = {} # Test with valid structures for antibody_name, sequences in self.test_sequences.items(): try: pdb_file = self.test_dir / f"{antibody_name}_validation.pdb" generate_structure_from_sequences( heavy_chain=sequences["heavy"], light_chain=sequences["light"], output_file=str(pdb_file) ) # Test validation is_valid = validate_structure(str(pdb_file)) if is_valid: logger.info(f"✓ Structure validation passed for {antibody_name}") results[f"{antibody_name}_validation"] = True else: logger.warning(f"⚠ Structure validation failed for {antibody_name}") results[f"{antibody_name}_validation"] = False # Test sequence extraction chains = get_chain_sequences(str(pdb_file)) if chains: logger.info(f"✓ Chain sequences extracted for {antibody_name}: {list(chains.keys())}") results[f"{antibody_name}_sequences"] = True else: logger.error(f"✗ Failed to extract chain sequences for {antibody_name}") results[f"{antibody_name}_sequences"] = False except Exception as e: logger.error(f"✗ Error in validation test for {antibody_name}: {e}") results[f"{antibody_name}_validation_error"] = False # Test with invalid file try: invalid_file = self.test_dir / "invalid.pdb" with open(invalid_file, 'w') as f: f.write("ATOM 1 N ALA A 1 20.154 16.967 23.862 1.00 11.18 N\n") f.write("ATOM 2 CA ALA A 1 19.030 16.067 23.862 1.00 11.18 C\n") # Incomplete structure - only one chain is_valid = validate_structure(str(invalid_file)) if not is_valid: logger.info("✓ Correctly identified invalid structure (single chain)") results["invalid_structure"] = True else: logger.warning("⚠ Failed to identify invalid structure") results["invalid_structure"] = False except Exception as e: logger.error(f"✗ Error testing invalid structure: {e}") results["invalid_structure_error"] = False return results def test_error_handling(self) -> Dict[str, bool]: """Test error handling for various edge cases.""" logger.info("=" * 60) logger.info("TESTING ERROR HANDLING") logger.info("=" * 60) results = {} # Test with invalid antibody type try: invalid_antibody = { "name": "test", "type": "invalid_type" } prepare_structure(invalid_antibody, self.config) logger.error("✗ Should have raised error for invalid antibody type") results["invalid_type"] = False except ValueError as e: logger.info(f"✓ Correctly raised error for invalid type: {e}") results["invalid_type"] = True except Exception as e: logger.error(f"✗ Unexpected error for invalid type: {e}") results["invalid_type"] = False # Test with missing sequences try: incomplete_antibody = { "name": "test", "heavy_chain": "QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYWMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATITADESTSTTAYMELSSLRSEDTAVYYCARGGYSSGYYFDYWGQGTLVTVSS", "type": "sequences" # Missing light_chain } prepare_structure(incomplete_antibody, self.config) logger.error("✗ Should have raised error for missing light chain") results["missing_light"] = False except KeyError as e: logger.info(f"✓ Correctly raised error for missing light chain: {e}") results["missing_light"] = True except Exception as e: logger.error(f"✗ Unexpected error for missing light chain: {e}") results["missing_light"] = False # Test with non-existent PDB file try: non_existent_antibody = { "name": "test", "pdb_file": "/non/existent/file.pdb", "type": "pdb" } prepare_structure(non_existent_antibody, self.config) logger.error("✗ Should have raised error for non-existent PDB") results["non_existent_pdb"] = False except FileNotFoundError as e: logger.info(f"✓ Correctly raised error for non-existent PDB: {e}") results["non_existent_pdb"] = True except Exception as e: logger.error(f"✗ Unexpected error for non-existent PDB: {e}") results["non_existent_pdb"] = False return results def _verify_structure_files(self, structure_files: Dict[str, str], antibody_name: str) -> bool: """Verify that structure files were created correctly.""" required_keys = ["pdb_file", "work_dir"] # Check required keys for key in required_keys: if key not in structure_files: logger.error(f"Missing required key: {key}") return False # Check if files exist pdb_file = Path(structure_files["pdb_file"]) work_dir = Path(structure_files["work_dir"]) if not pdb_file.exists(): logger.error(f"PDB file does not exist: {pdb_file}") return False if not work_dir.exists(): logger.error(f"Work directory does not exist: {work_dir}") return False # Check if PDB file is valid try: parser = PDBParser(QUIET=True) structure = parser.get_structure("test", str(pdb_file)) chains = list(structure.get_chains()) if len(chains) < 2: logger.error(f"PDB file has insufficient chains: {len(chains)}") return False except Exception as e: logger.error(f"PDB file is not valid: {e}") return False logger.info(f"✓ Structure files verified for {antibody_name}") return True def run_all_tests(self) -> Dict[str, bool]: """Run all tests and return combined results.""" logger.info("Starting comprehensive structure generation tests...") all_results = {} # Run all test categories all_results.update(self.test_sequence_based_generation()) all_results.update(self.test_pdb_based_processing()) all_results.update(self.test_structure_validation()) all_results.update(self.test_error_handling()) return all_results def print_summary(self, results: Dict[str, bool]): """Print test summary.""" logger.info("=" * 60) logger.info("TEST SUMMARY") logger.info("=" * 60) total_tests = len(results) passed_tests = sum(1 for result in results.values() if result) failed_tests = total_tests - passed_tests logger.info(f"Total tests: {total_tests}") logger.info(f"Passed: {passed_tests}") logger.info(f"Failed: {failed_tests}") logger.info(f"Success rate: {passed_tests/total_tests*100:.1f}%") if failed_tests > 0: logger.info("\nFailed tests:") for test_name, result in results.items(): if not result: logger.info(f" ✗ {test_name}") logger.info(f"\nTest directory: {self.test_dir}") logger.info("You can inspect the generated files in the test directory.") def cleanup(self): """Clean up test directory.""" if self.test_dir.exists(): shutil.rmtree(self.test_dir) logger.info(f"Cleaned up test directory: {self.test_dir}") def main(): """Main function to run structure generation tests.""" parser = argparse.ArgumentParser(description='Test AbMelt structure generation') parser.add_argument('--test-dir', type=str, help='Test directory (default: temporary)') parser.add_argument('--keep-files', action='store_true', help='Keep test files after completion') parser.add_argument('--verbose', '-v', action='store_true', help='Verbose logging') args = parser.parse_args() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) # Create tester tester = StructureGenerationTester(test_dir=args.test_dir) try: # Run all tests results = tester.run_all_tests() # Print summary tester.print_summary(results) # Cleanup unless keeping files if not args.keep_files: tester.cleanup() else: logger.info(f"Test files kept in: {tester.test_dir}") # Exit with appropriate code failed_tests = sum(1 for result in results.values() if not result) sys.exit(0 if failed_tests == 0 else 1) except KeyboardInterrupt: logger.info("\nTest interrupted by user") tester.cleanup() sys.exit(1) except Exception as e: logger.error(f"Test failed with error: {e}") tester.cleanup() sys.exit(1) if __name__ == "__main__": main()