Spaces:

hugging-science
/

abmelt-benchmark

Build error

App Files Files Community

abmelt-benchmark / tests /test_structure_generation.py

ZijianGuan

Upload folder using huggingface_hub

8ef403e verified 4 days ago

raw

history blame contribute delete

18.9 kB

	#!/usr/bin/env python3

	"""
	Test script for AbMelt structure generation functionality.
	Tests both sequence-based structure generation and PDB-based processing.
	"""

	import os
	import sys
	import logging
	import tempfile
	import shutil
	from pathlib import Path
	from typing import Dict, List, Tuple
	import argparse

	# Add src to path for imports
	sys.path.append(str(Path(__file__).parent / "src"))

	try:
	from structure_prep import (
	prepare_structure,
	generate_structure_from_sequences,
	prepare_pdb_for_analysis,
	validate_structure,
	get_chain_sequences
	)
	from Bio.PDB import PDBParser
	from Bio.SeqUtils import seq1
	except ImportError as e:
	print(f"Failed to import required modules: {e}")
	print("Please ensure you're in the correct environment with required dependencies installed.")
	sys.exit(1)

	# Setup logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	handlers=[
	logging.StreamHandler(sys.stdout)
	]
	)
	logger = logging.getLogger(__name__)


	class StructureGenerationTester:
	"""Test class for structure generation functionality."""

	def __init__(self, test_dir: str = None):
	"""Initialize tester with optional test directory."""
	self.test_dir = Path(test_dir) if test_dir else Path(tempfile.mkdtemp(prefix="abmelt_test_"))
	self.test_dir.mkdir(parents=True, exist_ok=True)
	logger.info(f"Test directory: {self.test_dir}")

	# Test antibody sequences (example sequences)
	self.test_sequences = {
	"alemtuzumab": {
	"heavy": "QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYWMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATITADESTSTTAYMELSSLRSEDTAVYYCARGGYSSGYYFDYWGQGTLVTVSS",
	"light": "DIQMTQSPSSLSASVGDRVTITCRASQDISNYLNWFQQKPGKAPKLLIYYATSLADGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQGNTFPWTFGQGTKVEIKR"
	},
	"nivolumab": {
	"heavy": "QVQLVQSGAEVKKPGSSVKVSCKASGYTFTSYWINWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATITADESTSTTAYMELSSLRSEDTAVYYCARGGYSSGYYFDYWGQGTLVTVSS",
	"light": "DIQMTQSPSSLSASVGDRVTITCRASQDISNYLNWFQQKPGKAPKLLIYYATSLADGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQGNTFPWTFGQGTKVEIKR"
	}
	}

	# Test configuration
	self.config = {
	"paths": {
	"temp_dir": str(self.test_dir),
	"output_dir": str(self.test_dir / "output"),
	"log_dir": str(self.test_dir / "logs")
	},
	"structure": {
	"validate_structure": True,
	"extract_sequences": True,
	"create_work_dir": True
	}
	}

	# Create output directories
	for path in self.config["paths"].values():
	Path(path).mkdir(parents=True, exist_ok=True)

	def test_sequence_based_generation(self) -> Dict[str, bool]:
	"""Test structure generation from sequences."""
	logger.info("=" * 60)
	logger.info("TESTING SEQUENCE-BASED STRUCTURE GENERATION")
	logger.info("=" * 60)

	results = {}

	for antibody_name, sequences in self.test_sequences.items():
	logger.info(f"\nTesting {antibody_name}...")

	try:
	# Test direct sequence generation
	output_file = self.test_dir / f"{antibody_name}_direct.pdb"
	logger.info("Testing direct sequence generation...")

	generated_file = generate_structure_from_sequences(
	heavy_chain=sequences["heavy"],
	light_chain=sequences["light"],
	output_file=str(output_file)
	)

	# Verify file was created
	if Path(generated_file).exists():
	logger.info(f"✓ Direct generation successful: {generated_file}")
	results[f"{antibody_name}_direct"] = True
	else:
	logger.error(f"✗ Direct generation failed: {generated_file}")
	results[f"{antibody_name}_direct"] = False

	# Test through prepare_structure function
	logger.info("Testing through prepare_structure...")
	antibody = {
	"name": antibody_name,
	"heavy_chain": sequences["heavy"],
	"light_chain": sequences["light"],
	"type": "sequences"
	}

	structure_files = prepare_structure(antibody, self.config)

	# Verify structure files
	if self._verify_structure_files(structure_files, antibody_name):
	logger.info(f"✓ prepare_structure successful for {antibody_name}")
	results[f"{antibody_name}_prepare"] = True
	else:
	logger.error(f"✗ prepare_structure failed for {antibody_name}")
	results[f"{antibody_name}_prepare"] = False

	except Exception as e:
	logger.error(f"✗ Error testing {antibody_name}: {e}")
	results[f"{antibody_name}_error"] = False

	return results

	def test_pdb_based_processing(self) -> Dict[str, bool]:
	"""Test PDB-based structure processing."""
	logger.info("=" * 60)
	logger.info("TESTING PDB-BASED STRUCTURE PROCESSING")
	logger.info("=" * 60)

	results = {}

	# First generate some test PDBs
	test_pdbs = {}
	for antibody_name, sequences in self.test_sequences.items():
	try:
	pdb_file = self.test_dir / f"{antibody_name}_test.pdb"
	generate_structure_from_sequences(
	heavy_chain=sequences["heavy"],
	light_chain=sequences["light"],
	output_file=str(pdb_file)
	)
	test_pdbs[antibody_name] = str(pdb_file)
	logger.info(f"Generated test PDB: {pdb_file}")
	except Exception as e:
	logger.error(f"Failed to generate test PDB for {antibody_name}: {e}")
	continue

	# Test PDB processing
	for antibody_name, pdb_file in test_pdbs.items():
	logger.info(f"\nTesting PDB processing for {antibody_name}...")

	try:
	# Test prepare_pdb_for_analysis
	logger.info("Testing prepare_pdb_for_analysis...")
	structure_files = prepare_pdb_for_analysis(
	pdb_file=pdb_file,
	output_dir=str(self.test_dir / "pdb_analysis")
	)

	if self._verify_structure_files(structure_files, antibody_name):
	logger.info(f"✓ prepare_pdb_for_analysis successful for {antibody_name}")
	results[f"{antibody_name}_pdb_analysis"] = True
	else:
	logger.error(f"✗ prepare_pdb_for_analysis failed for {antibody_name}")
	results[f"{antibody_name}_pdb_analysis"] = False

	# Test through prepare_structure with PDB type
	logger.info("Testing prepare_structure with PDB type...")
	antibody = {
	"name": f"{antibody_name}_pdb",
	"pdb_file": pdb_file,
	"type": "pdb"
	}

	structure_files = prepare_structure(antibody, self.config)

	if self._verify_structure_files(structure_files, f"{antibody_name}_pdb"):
	logger.info(f"✓ prepare_structure (PDB) successful for {antibody_name}")
	results[f"{antibody_name}_pdb_prepare"] = True
	else:
	logger.error(f"✗ prepare_structure (PDB) failed for {antibody_name}")
	results[f"{antibody_name}_pdb_prepare"] = False

	except Exception as e:
	logger.error(f"✗ Error processing PDB for {antibody_name}: {e}")
	results[f"{antibody_name}_pdb_error"] = False

	return results

	def test_structure_validation(self) -> Dict[str, bool]:
	"""Test structure validation functionality."""
	logger.info("=" * 60)
	logger.info("TESTING STRUCTURE VALIDATION")
	logger.info("=" * 60)

	results = {}

	# Test with valid structures
	for antibody_name, sequences in self.test_sequences.items():
	try:
	pdb_file = self.test_dir / f"{antibody_name}_validation.pdb"
	generate_structure_from_sequences(
	heavy_chain=sequences["heavy"],
	light_chain=sequences["light"],
	output_file=str(pdb_file)
	)

	# Test validation
	is_valid = validate_structure(str(pdb_file))
	if is_valid:
	logger.info(f"✓ Structure validation passed for {antibody_name}")
	results[f"{antibody_name}_validation"] = True
	else:
	logger.warning(f"⚠ Structure validation failed for {antibody_name}")
	results[f"{antibody_name}_validation"] = False

	# Test sequence extraction
	chains = get_chain_sequences(str(pdb_file))
	if chains:
	logger.info(f"✓ Chain sequences extracted for {antibody_name}: {list(chains.keys())}")
	results[f"{antibody_name}_sequences"] = True
	else:
	logger.error(f"✗ Failed to extract chain sequences for {antibody_name}")
	results[f"{antibody_name}_sequences"] = False

	except Exception as e:
	logger.error(f"✗ Error in validation test for {antibody_name}: {e}")
	results[f"{antibody_name}_validation_error"] = False

	# Test with invalid file
	try:
	invalid_file = self.test_dir / "invalid.pdb"
	with open(invalid_file, 'w') as f:
	f.write("ATOM 1 N ALA A 1 20.154 16.967 23.862 1.00 11.18 N\n")
	f.write("ATOM 2 CA ALA A 1 19.030 16.067 23.862 1.00 11.18 C\n")
	# Incomplete structure - only one chain

	is_valid = validate_structure(str(invalid_file))
	if not is_valid:
	logger.info("✓ Correctly identified invalid structure (single chain)")
	results["invalid_structure"] = True
	else:
	logger.warning("⚠ Failed to identify invalid structure")
	results["invalid_structure"] = False

	except Exception as e:
	logger.error(f"✗ Error testing invalid structure: {e}")
	results["invalid_structure_error"] = False

	return results

	def test_error_handling(self) -> Dict[str, bool]:
	"""Test error handling for various edge cases."""
	logger.info("=" * 60)
	logger.info("TESTING ERROR HANDLING")
	logger.info("=" * 60)

	results = {}

	# Test with invalid antibody type
	try:
	invalid_antibody = {
	"name": "test",
	"type": "invalid_type"
	}
	prepare_structure(invalid_antibody, self.config)
	logger.error("✗ Should have raised error for invalid antibody type")
	results["invalid_type"] = False
	except ValueError as e:
	logger.info(f"✓ Correctly raised error for invalid type: {e}")
	results["invalid_type"] = True
	except Exception as e:
	logger.error(f"✗ Unexpected error for invalid type: {e}")
	results["invalid_type"] = False

	# Test with missing sequences
	try:
	incomplete_antibody = {
	"name": "test",
	"heavy_chain": "QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYWMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATITADESTSTTAYMELSSLRSEDTAVYYCARGGYSSGYYFDYWGQGTLVTVSS",
	"type": "sequences"
	# Missing light_chain
	}
	prepare_structure(incomplete_antibody, self.config)
	logger.error("✗ Should have raised error for missing light chain")
	results["missing_light"] = False
	except KeyError as e:
	logger.info(f"✓ Correctly raised error for missing light chain: {e}")
	results["missing_light"] = True
	except Exception as e:
	logger.error(f"✗ Unexpected error for missing light chain: {e}")
	results["missing_light"] = False

	# Test with non-existent PDB file
	try:
	non_existent_antibody = {
	"name": "test",
	"pdb_file": "/non/existent/file.pdb",
	"type": "pdb"
	}
	prepare_structure(non_existent_antibody, self.config)
	logger.error("✗ Should have raised error for non-existent PDB")
	results["non_existent_pdb"] = False
	except FileNotFoundError as e:
	logger.info(f"✓ Correctly raised error for non-existent PDB: {e}")
	results["non_existent_pdb"] = True
	except Exception as e:
	logger.error(f"✗ Unexpected error for non-existent PDB: {e}")
	results["non_existent_pdb"] = False

	return results

	def _verify_structure_files(self, structure_files: Dict[str, str], antibody_name: str) -> bool:
	"""Verify that structure files were created correctly."""
	required_keys = ["pdb_file", "work_dir"]

	# Check required keys
	for key in required_keys:
	if key not in structure_files:
	logger.error(f"Missing required key: {key}")
	return False

	# Check if files exist
	pdb_file = Path(structure_files["pdb_file"])
	work_dir = Path(structure_files["work_dir"])

	if not pdb_file.exists():
	logger.error(f"PDB file does not exist: {pdb_file}")
	return False

	if not work_dir.exists():
	logger.error(f"Work directory does not exist: {work_dir}")
	return False

	# Check if PDB file is valid
	try:
	parser = PDBParser(QUIET=True)
	structure = parser.get_structure("test", str(pdb_file))
	chains = list(structure.get_chains())
	if len(chains) < 2:
	logger.error(f"PDB file has insufficient chains: {len(chains)}")
	return False
	except Exception as e:
	logger.error(f"PDB file is not valid: {e}")
	return False

	logger.info(f"✓ Structure files verified for {antibody_name}")
	return True

	def run_all_tests(self) -> Dict[str, bool]:
	"""Run all tests and return combined results."""
	logger.info("Starting comprehensive structure generation tests...")

	all_results = {}

	# Run all test categories
	all_results.update(self.test_sequence_based_generation())
	all_results.update(self.test_pdb_based_processing())
	all_results.update(self.test_structure_validation())
	all_results.update(self.test_error_handling())

	return all_results

	def print_summary(self, results: Dict[str, bool]):
	"""Print test summary."""
	logger.info("=" * 60)
	logger.info("TEST SUMMARY")
	logger.info("=" * 60)

	total_tests = len(results)
	passed_tests = sum(1 for result in results.values() if result)
	failed_tests = total_tests - passed_tests

	logger.info(f"Total tests: {total_tests}")
	logger.info(f"Passed: {passed_tests}")
	logger.info(f"Failed: {failed_tests}")
	logger.info(f"Success rate: {passed_tests/total_tests*100:.1f}%")

	if failed_tests > 0:
	logger.info("\nFailed tests:")
	for test_name, result in results.items():
	if not result:
	logger.info(f" ✗ {test_name}")

	logger.info(f"\nTest directory: {self.test_dir}")
	logger.info("You can inspect the generated files in the test directory.")

	def cleanup(self):
	"""Clean up test directory."""
	if self.test_dir.exists():
	shutil.rmtree(self.test_dir)
	logger.info(f"Cleaned up test directory: {self.test_dir}")


	def main():
	"""Main function to run structure generation tests."""
	parser = argparse.ArgumentParser(description='Test AbMelt structure generation')
	parser.add_argument('--test-dir', type=str, help='Test directory (default: temporary)')
	parser.add_argument('--keep-files', action='store_true', help='Keep test files after completion')
	parser.add_argument('--verbose', '-v', action='store_true', help='Verbose logging')

	args = parser.parse_args()

	if args.verbose:
	logging.getLogger().setLevel(logging.DEBUG)

	# Create tester
	tester = StructureGenerationTester(test_dir=args.test_dir)

	try:
	# Run all tests
	results = tester.run_all_tests()

	# Print summary
	tester.print_summary(results)

	# Cleanup unless keeping files
	if not args.keep_files:
	tester.cleanup()
	else:
	logger.info(f"Test files kept in: {tester.test_dir}")

	# Exit with appropriate code
	failed_tests = sum(1 for result in results.values() if not result)
	sys.exit(0 if failed_tests == 0 else 1)

	except KeyboardInterrupt:
	logger.info("\nTest interrupted by user")
	tester.cleanup()
	sys.exit(1)
	except Exception as e:
	logger.error(f"Test failed with error: {e}")
	tester.cleanup()
	sys.exit(1)


	if __name__ == "__main__":
	main()