#!/usr/bin/env python3 """ Comprehensive Testing Suite for clients.py Tests all API clients, embedding functions, and generation capabilities. """ import os import sys import time import json import traceback import threading import concurrent.futures from typing import Dict, List, Any, Optional, Tuple from datetime import datetime # Add the project directory to path if needed sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # Import the functions we want to test try: from clients import ( init_weaviate_client, get_weaviate_client, close_weaviate_client, gemini_client, groq_client, embed_texts_groq, groq_qwen_generate_content, siliconflow_qwen_generate_content, deepinfra_embedding, deepinfra_rerank, qwen_generate ) from configs import load_yaml_config from logger.custom_logger import CustomLoggerTracker # Initialize logger custom_log = CustomLoggerTracker() logger = custom_log.get_logger("clients_test") except ImportError as e: print(f"Import error: {e}") print("Make sure all required modules are available") sys.exit(1) # Load configuration try: config = load_yaml_config("config.yaml") except Exception as e: print(f"Config loading error: {e}") config = {} class ClientTester: """Comprehensive testing class for all client functions""" def __init__(self): self.test_results = {} self.test_summary = { "total_tests": 0, "passed": 0, "failed": 0, "warnings": 0, "start_time": datetime.now(), "end_time": None } def log_test_result(self, test_name: str, success: bool, message: str, duration: float = 0, warning: bool = False): """Log test result with details""" self.test_results[test_name] = { "success": success, "message": message, "duration": duration, "warning": warning, "timestamp": datetime.now().strftime("%H:%M:%S") } self.test_summary["total_tests"] += 1 if success: if warning: self.test_summary["warnings"] += 1 else: self.test_summary["passed"] += 1 else: self.test_summary["failed"] += 1 def test_environment_variables(self) -> bool: """Test if all required environment variables are present""" print("\n" + "="*60) print("Testing Environment Variables") print("="*60) required_vars = { "GROQ_URL": os.getenv("GROQ_URL"), "GROQ_API_TOKEN": os.getenv("GROQ_API_TOKEN"), "DEEPINFRA_API_KEY": os.getenv("DEEPINFRA_API_KEY"), "DEEPINFRA_URL": os.getenv("DEEPINFRA_URL"), "DEEPINFRA_EMBEDDING_URL": os.getenv("DEEPINFRA_EMBEDDING_URL"), "DEEPINFRA_RERANK_URL": os.getenv("DEEPINFRA_RERANK_URL"), "WEAVIATE_URL": os.getenv("WEAVIATE_URL"), "WEAVIATE_API_KEY": os.getenv("WEAVIATE_API_KEY"), "GEMINI_API_KEY": os.getenv("GEMINI_API_KEY"), "SILICONFLOW_URL": os.getenv("SILICONFLOW_URL"), "SILICONFLOW_API_KEY": os.getenv("SILICONFLOW_API_KEY"), } all_present = True for var_name, var_value in required_vars.items(): if var_value: status = "PASS - SET" masked_value = var_value[:8] + "..." if len(var_value) > 8 else "***" print(f" {status:<12} {var_name}: {masked_value}") else: status = "FAIL - MISSING" print(f" {status:<12} {var_name}") all_present = False self.log_test_result( "environment_variables", all_present, f"Environment variables: {'All present' if all_present else 'Some missing'}" ) return all_present def test_weaviate_connection(self) -> bool: """Test Weaviate client connection""" print("\n" + "="*60) print("Testing Weaviate Connection") print("="*60) start_time = time.time() try: print(" Testing Weaviate initialization...") client = init_weaviate_client() if client is None: self.log_test_result( "weaviate_init", False, "Weaviate client initialization failed", time.time() - start_time ) return False print(" PASS - Weaviate client initialized") print(" Testing client retrieval...") retrieved_client = get_weaviate_client() if retrieved_client: print(" PASS - Client retrieval successful") else: print(" FAIL - Client retrieval failed") try: collections = client.collections.list_all() print(f" PASS - Collections accessible: {len(collections)} found") for collection in collections[:3]: # Show first 3 print(f" - {collection}") if len(collections) > 3: print(f" ... and {len(collections) - 3} more") except Exception as e: print(f" WARN - Could not list collections: {e}") print(" Testing connection closure...") close_weaviate_client() print(" PASS - Connection closed") duration = time.time() - start_time self.log_test_result( "weaviate_connection", True, f"Weaviate connection test passed", duration ) return True except Exception as e: duration = time.time() - start_time print(f" FAIL - Weaviate connection: {e}") self.log_test_result( "weaviate_connection", False, f"Weaviate connection failed: {str(e)}", duration ) return False def test_gemini_client(self) -> bool: """Test Gemini client initialization""" print("\n" + "="*60) print("Testing Gemini Client") print("="*60) start_time = time.time() try: print(" Testing Gemini client initialization...") client = gemini_client() if client: print(" PASS - Gemini client initialized") duration = time.time() - start_time self.log_test_result( "gemini_client", True, "Gemini client initialization successful", duration ) return True else: print(" FAIL - Gemini client initialization returned None") duration = time.time() - start_time self.log_test_result( "gemini_client", False, "Gemini client initialization returned None", duration ) return False except Exception as e: duration = time.time() - start_time print(f" FAIL - Gemini client: {e}") self.log_test_result( "gemini_client", False, f"Gemini client error: {str(e)}", duration ) return False def test_groq_client(self) -> bool: """Test Groq client initialization""" print("\n" + "="*60) print("Testing Groq Client") print("="*60) start_time = time.time() try: print(" Testing Groq client initialization...") client = groq_client() if client: print(" PASS - Groq client initialized") duration = time.time() - start_time self.log_test_result( "groq_client", True, "Groq client initialization successful", duration ) return True else: print(" FAIL - Groq client initialization returned None") duration = time.time() - start_time self.log_test_result( "groq_client", False, "Groq client initialization returned None", duration ) return False except Exception as e: duration = time.time() - start_time print(f" FAIL - Groq client: {e}") self.log_test_result( "groq_client", False, f"Groq client error: {str(e)}", duration ) return False def test_groq_embeddings(self) -> bool: """Test Groq embedding functionality""" print("\n" + "="*60) print("Testing Groq Embeddings") print("="*60) start_time = time.time() test_texts = [ "What is autism?", "Autism spectrum disorders are neurodevelopmental conditions.", "Early intervention is important for autism." ] try: print(f" Testing embedding generation for {len(test_texts)} texts...") embeddings = embed_texts_groq(test_texts, batch_size=10) if embeddings and len(embeddings) >= len(test_texts): valid_embeddings = [emb for emb in embeddings if emb and len(emb) > 0] if valid_embeddings: print(f" PASS - Generated {len(valid_embeddings)} valid embeddings") print(f" - Embedding dimension: {len(valid_embeddings[0])}") print(f" - Sample values: {valid_embeddings[0][:5]}") duration = time.time() - start_time self.log_test_result( "groq_embeddings", True, f"Groq embeddings: {len(valid_embeddings)} embeddings generated", duration ) return True else: print(" FAIL - No valid embeddings generated") duration = time.time() - start_time self.log_test_result( "groq_embeddings", False, "Groq embeddings: no valid embeddings", duration ) return False else: print(f" FAIL - Expected {len(test_texts)}, got {len(embeddings) if embeddings else 0}") duration = time.time() - start_time self.log_test_result( "groq_embeddings", False, "Groq embeddings: insufficient embeddings returned", duration ) return False except Exception as e: duration = time.time() - start_time print(f" FAIL - Groq embeddings: {e}") self.log_test_result( "groq_embeddings", False, f"Groq embeddings error: {str(e)}", duration ) return False def test_groq_generation(self) -> bool: """Test Groq content generation""" print("\n" + "="*60) print("Testing Groq Content Generation") print("="*60) start_time = time.time() test_prompt = "Explain what autism is in 2-3 sentences." try: print(f" Testing content generation...") print(f" Prompt: '{test_prompt}'") response = groq_qwen_generate_content(test_prompt) if response and len(response.strip()) > 20: print(f" PASS - Generated response ({len(response)} characters)") print(f" Preview: {response[:100]}...") duration = time.time() - start_time self.log_test_result( "groq_generation", True, f"Groq generation: {len(response)} characters generated", duration ) return True else: print(f" FAIL - Response too short or empty: '{response}'") duration = time.time() - start_time self.log_test_result( "groq_generation", False, "Groq generation: response too short or empty", duration ) return False except Exception as e: duration = time.time() - start_time print(f" FAIL - Groq generation: {e}") self.log_test_result( "groq_generation", False, f"Groq generation error: {str(e)}", duration ) return False def test_siliconflow_generation(self) -> bool: """Test SiliconFlow content generation""" print("\n" + "="*60) print("Testing SiliconFlow Content Generation") print("="*60) start_time = time.time() test_prompt = "What are the main characteristics of autism? Keep it brief." try: print(f" Testing SiliconFlow generation...") print(f" Prompt: '{test_prompt}'") response = siliconflow_qwen_generate_content(test_prompt) if response and len(response.strip()) > 20: print(f" PASS - Generated response ({len(response)} characters)") print(f" Preview: {response[:100]}...") duration = time.time() - start_time self.log_test_result( "siliconflow_generation", True, f"SiliconFlow generation: {len(response)} characters generated", duration ) return True else: print(f" FAIL - Response too short or empty: '{response}'") duration = time.time() - start_time self.log_test_result( "siliconflow_generation", False, "SiliconFlow generation: response too short or empty", duration ) return False except Exception as e: duration = time.time() - start_time print(f" FAIL - SiliconFlow generation: {e}") self.log_test_result( "siliconflow_generation", False, f"SiliconFlow generation error: {str(e)}", duration ) return False def test_deepinfra_embeddings(self) -> bool: """Test DeepInfra embedding functionality""" print("\n" + "="*60) print("Testing DeepInfra Embeddings") print("="*60) start_time = time.time() test_texts = [ "Autism spectrum disorder characteristics", "Social communication challenges in autism", "Sensory processing in autistic individuals" ] try: print(f" Testing DeepInfra embedding generation for {len(test_texts)} texts...") embeddings = deepinfra_embedding(test_texts, batch_size=10) if embeddings and len(embeddings) >= len(test_texts): valid_embeddings = [emb for emb in embeddings if emb and len(emb) > 0] if valid_embeddings: print(f" PASS - Generated {len(valid_embeddings)} valid embeddings") print(f" - Embedding dimension: {len(valid_embeddings[0])}") print(f" - Sample values: {valid_embeddings[0][:5]}") duration = time.time() - start_time self.log_test_result( "deepinfra_embeddings", True, f"DeepInfra embeddings: {len(valid_embeddings)} embeddings generated", duration ) return True else: print(" FAIL - No valid embeddings generated") duration = time.time() - start_time self.log_test_result( "deepinfra_embeddings", False, "DeepInfra embeddings: no valid embeddings", duration ) return False else: print(f" FAIL - Embedding generation failed") duration = time.time() - start_time self.log_test_result( "deepinfra_embeddings", False, "DeepInfra embeddings: insufficient embeddings returned", duration ) return False except Exception as e: duration = time.time() - start_time print(f" FAIL - DeepInfra embeddings: {e}") self.log_test_result( "deepinfra_embeddings", False, f"DeepInfra embeddings error: {str(e)}", duration ) return False def test_deepinfra_rerank(self) -> bool: """Test DeepInfra reranking functionality""" print("\n" + "="*60) print("Testing DeepInfra Reranking") print("="*60) start_time = time.time() query_batch = ["What is autism spectrum disorder?"] items_to_rerank = [ "Autism is a neurodevelopmental condition affecting communication and behavior.", "The weather today is sunny with a chance of rain.", "Social challenges are common in autism spectrum disorders.", "Cooking recipes for healthy meals and nutrition." ] try: print(f" Testing reranking for {len(items_to_rerank)} items...") reranked_result = deepinfra_rerank(query_batch, items_to_rerank) if reranked_result: print(f" PASS - Reranking completed") print(f" - Original first: {items_to_rerank[0][:50]}...") print(f" - Reranked result: {reranked_result[:50]}...") duration = time.time() - start_time self.log_test_result( "deepinfra_rerank", True, "DeepInfra reranking successful", duration ) return True else: print(" FAIL - Reranking: no result returned") duration = time.time() - start_time self.log_test_result( "deepinfra_rerank", False, "DeepInfra reranking: no result returned", duration ) return False except Exception as e: duration = time.time() - start_time print(f" FAIL - DeepInfra reranking: {e}") self.log_test_result( "deepinfra_rerank", False, f"DeepInfra reranking error: {str(e)}", duration ) return False def test_unified_qwen_generate(self) -> bool: """Test the unified qwen_generate function""" print("\n" + "="*60) print("Testing Unified Qwen Generation") print("="*60) start_time = time.time() test_prompt = "Briefly explain autism spectrum disorders." try: config_num = config.get("apis_models", {}).get("num", "unknown") print(f" Testing unified generation (config num={config_num})...") response = qwen_generate(test_prompt) if response and len(response.strip()) > 20: print(f" PASS - Unified generation successful ({len(response)} characters)") print(f" Preview: {response[:100]}...") duration = time.time() - start_time self.log_test_result( "unified_qwen_generate", True, f"Unified Qwen generation: {len(response)} characters generated", duration ) return True else: print(f" FAIL - Unified generation: response too short: '{response}'") duration = time.time() - start_time self.log_test_result( "unified_qwen_generate", False, "Unified Qwen generation: response too short or empty", duration ) return False except Exception as e: duration = time.time() - start_time print(f" FAIL - Unified generation: {e}") self.log_test_result( "unified_qwen_generate", False, f"Unified Qwen generation error: {str(e)}", duration ) return False def test_configuration_loading(self) -> bool: """Test configuration loading""" print("\n" + "="*60) print("Testing Configuration Loading") print("="*60) start_time = time.time() try: print(" Testing configuration structure...") required_config_keys = [ "apis_models", "chunking", "rag", "audio" ] missing_keys = [] for key in required_config_keys: if key not in config: missing_keys.append(key) else: print(f" PASS - Config key '{key}' present") if missing_keys: print(f" FAIL - Missing config keys: {missing_keys}") duration = time.time() - start_time self.log_test_result( "configuration_loading", False, f"Configuration missing keys: {missing_keys}", duration ) return False # Test API models configuration if "apis_models" in config: api_models = config["apis_models"] if "num" in api_models: print(f" PASS - API models num: {api_models['num']}") for provider in ["groq", "deepinfra"]: if provider in api_models: print(f" PASS - {provider.capitalize()} config present") else: print(f" WARN - {provider.capitalize()} config missing") duration = time.time() - start_time self.log_test_result( "configuration_loading", True, "Configuration loading successful", duration ) return True except Exception as e: duration = time.time() - start_time print(f" FAIL - Configuration: {e}") self.log_test_result( "configuration_loading", False, f"Configuration error: {str(e)}", duration ) return False def run_all_tests(self): """Run all tests and provide comprehensive report""" print("\n" + "="*60) print("COMPREHENSIVE CLIENTS.PY TESTING SUITE") print("="*60) print(f"Started at: {self.test_summary['start_time'].strftime('%Y-%m-%d %H:%M:%S')}") # Run all tests tests = [ ("Environment Variables", self.test_environment_variables), ("Configuration Loading", self.test_configuration_loading), ("Weaviate Connection", self.test_weaviate_connection), ("Gemini Client", self.test_gemini_client), ("Groq Client", self.test_groq_client), ("Groq Embeddings", self.test_groq_embeddings), ("Groq Generation", self.test_groq_generation), ("SiliconFlow Generation", self.test_siliconflow_generation), ("DeepInfra Embeddings", self.test_deepinfra_embeddings), ("DeepInfra Reranking", self.test_deepinfra_rerank), ("Unified Qwen Generation", self.test_unified_qwen_generate), ] for test_name, test_func in tests: try: print(f"\nRunning {test_name}...") test_func() except Exception as e: print(f"FAIL - {test_name} failed with exception: {e}") traceback.print_exc() self.log_test_result( test_name.lower().replace(" ", "_"), False, f"Test failed with exception: {str(e)}" ) self.test_summary["end_time"] = datetime.now() self.print_comprehensive_report() def print_comprehensive_report(self): """Print comprehensive test report""" print("\n" + "="*60) print("COMPREHENSIVE TEST REPORT") print("="*60) # Summary statistics total_duration = (self.test_summary["end_time"] - self.test_summary["start_time"]).total_seconds() print(f"\nSUMMARY:") print(f" Total Tests: {self.test_summary['total_tests']}") print(f" Passed: {self.test_summary['passed']}") print(f" Failed: {self.test_summary['failed']}") print(f" Warnings: {self.test_summary['warnings']}") print(f" Total Duration: {total_duration:.2f}s") success_rate = (self.test_summary['passed'] / self.test_summary['total_tests'] * 100) if self.test_summary['total_tests'] > 0 else 0 print(f" Success Rate: {success_rate:.1f}%") # Detailed results print(f"\nDETAILED RESULTS:") for test_name, result in self.test_results.items(): status = "PASS" if result['success'] else "FAIL" if result['warning']: status = "WARN" duration_str = f"({result['duration']:.2f}s)" if result['duration'] > 0 else "" print(f" {status:<5} {test_name.replace('_', ' ').title()} {duration_str}") print(f" {result['message']}") # Recommendations print(f"\nRECOMMENDATIONS:") if self.test_summary['failed'] == 0: print(" All tests passed! Your client setup is working correctly.") else: print(" Fix the failed tests to ensure full functionality:") for test_name, result in self.test_results.items(): if not result['success']: print(f" - Fix {test_name.replace('_', ' ')}: {result['message']}") if self.test_summary['warnings'] > 0: print(" Address warnings for optimal performance:") for test_name, result in self.test_results.items(): if result['warning']: print(f" - {test_name.replace('_', ' ')}: {result['message']}") # Performance insights if self.test_results: slowest_test = max(self.test_results.items(), key=lambda x: x[1]['duration']) fastest_test = min(self.test_results.items(), key=lambda x: x[1]['duration']) print(f"\nPERFORMANCE INSIGHTS:") print(f" Slowest: {slowest_test[0].replace('_', ' ').title()} ({slowest_test[1]['duration']:.2f}s)") print(f" Fastest: {fastest_test[0].replace('_', ' ').title()} ({fastest_test[1]['duration']:.2f}s)") print(f"\nCompleted at: {self.test_summary['end_time'].strftime('%Y-%m-%d %H:%M:%S')}") print("Testing suite completed!") def run_performance_benchmarks(): """Run performance benchmarks for different API providers""" print("\n" + "="*60) print("API PERFORMANCE BENCHMARKS") print("="*60) test_prompt = "What is autism spectrum disorder?" benchmark_results = {} # Benchmark Groq generation try: print("\nBenchmarking Groq generation...") times = [] for i in range(3): # Run 3 times for average start_time = time.time() response = groq_qwen_generate_content(test_prompt) end_time = time.time() times.append(end_time - start_time) print(f" Run {i+1}: {times[-1]:.2f}s ({len(response)} chars)") avg_time = sum(times) / len(times) benchmark_results["Groq"] = { "avg_time": avg_time, "times": times, "status": "success" } print(f" PASS - Groq average: {avg_time:.2f}s") except Exception as e: benchmark_results["Groq"] = {"status": "failed", "error": str(e)} print(f" FAIL - Groq benchmark: {e}") # Benchmark SiliconFlow generation try: print("\nBenchmarking SiliconFlow generation...") times = [] for i in range(3): start_time = time.time() response = siliconflow_qwen_generate_content(test_prompt) end_time = time.time() times.append(end_time - start_time) print(f" Run {i+1}: {times[-1]:.2f}s ({len(response)} chars)") avg_time = sum(times) / len(times) benchmark_results["SiliconFlow"] = { "avg_time": avg_time, "times": times, "status": "success" } print(f" PASS - SiliconFlow average: {avg_time:.2f}s") except Exception as e: benchmark_results["SiliconFlow"] = {"status": "failed", "error": str(e)} print(f" FAIL - SiliconFlow benchmark: {e}") # Print benchmark summary print(f"\nBENCHMARK SUMMARY:") successful_benchmarks = {k: v for k, v in benchmark_results.items() if v["status"] == "success"} if successful_benchmarks: fastest = min(successful_benchmarks.items(), key=lambda x: x[1]["avg_time"]) slowest = max(successful_benchmarks.items(), key=lambda x: x[1]["avg_time"]) print(f" Fastest: {fastest[0]} ({fastest[1]['avg_time']:.2f}s)") print(f" Slowest: {slowest[0]} ({slowest[1]['avg_time']:.2f}s)") if len(successful_benchmarks) > 1: speed_diff = slowest[1]["avg_time"] / fastest[1]["avg_time"] print(f" Speed difference: {speed_diff:.1f}x") else: print(" No successful benchmarks to compare") return benchmark_results def run_integration_tests(): """Run integration tests combining multiple services""" print("\n" + "="*60) print("INTEGRATION TESTS") print("="*60) # Test 1: Embedding + Generation workflow print("\nTesting Embedding + Generation workflow...") try: # Generate embeddings test_texts = ["autism symptoms", "communication challenges", "sensory processing"] print(" Step 1: Generating embeddings...") embeddings = embed_texts_groq(test_texts) if embeddings and len(embeddings) >= len(test_texts): print(" PASS - Step 1: Embeddings generated successfully") # Generate content based on first text print(" Step 2: Generating content...") response = qwen_generate(f"Explain about {test_texts[0]} in autism spectrum disorders") if response and len(response.strip()) > 20: print(" PASS - Step 2: Content generation successful") print(" PASS - Integration test completed successfully") return True else: print(" FAIL - Step 2: Content generation failed") return False else: print(" FAIL - Step 1: Embedding generation failed") return False except Exception as e: print(f" FAIL - Integration test: {e}") return False def run_stress_tests(): """Run stress tests with multiple concurrent requests""" print("\n" + "="*60) print("STRESS TESTS") print("="*60) def stress_test_generation(prompt_id): """Stress test function for concurrent generation""" try: prompt = f"Test prompt {prompt_id}: What are autism characteristics?" response = qwen_generate(prompt) return {"id": prompt_id, "success": True, "length": len(response)} except Exception as e: return {"id": prompt_id, "success": False, "error": str(e)} print("Running concurrent generation tests...") # Test with 5 concurrent requests num_concurrent = 5 with concurrent.futures.ThreadPoolExecutor(max_workers=num_concurrent) as executor: start_time = time.time() futures = [executor.submit(stress_test_generation, i) for i in range(num_concurrent)] results = [future.result() for future in concurrent.futures.as_completed(futures)] end_time = time.time() # Analyze results successful = [r for r in results if r["success"]] failed = [r for r in results if not r["success"]] total_time = end_time - start_time print(f"Stress test results:") print(f" Successful: {len(successful)}/{num_concurrent}") print(f" Failed: {len(failed)}/{num_concurrent}") print(f" Total time: {total_time:.2f}s") print(f" Requests/second: {num_concurrent/total_time:.2f}") if successful: avg_length = sum(r["length"] for r in successful) / len(successful) print(f" Average response length: {avg_length:.0f} characters") return len(successful) >= num_concurrent * 0.8 # 80% success rate def run_api_health_check(): """Run a quick health check of all APIs""" print("\n" + "="*60) print("API HEALTH CHECK") print("="*60) health_results = {} # Check Groq try: print("\nChecking Groq API health...") start_time = time.time() response = groq_qwen_generate_content("Hello") duration = time.time() - start_time if response: health_results["Groq"] = {"status": "healthy", "response_time": duration} print(f" PASS - Groq API healthy ({duration:.2f}s)") else: health_results["Groq"] = {"status": "unhealthy", "error": "Empty response"} print(" FAIL - Groq API returned empty response") except Exception as e: health_results["Groq"] = {"status": "unhealthy", "error": str(e)} print(f" FAIL - Groq API unhealthy: {e}") # Check SiliconFlow try: print("\nChecking SiliconFlow API health...") start_time = time.time() response = siliconflow_qwen_generate_content("Hello") duration = time.time() - start_time if response: health_results["SiliconFlow"] = {"status": "healthy", "response_time": duration} print(f" PASS - SiliconFlow API healthy ({duration:.2f}s)") else: health_results["SiliconFlow"] = {"status": "unhealthy", "error": "Empty response"} print(" FAIL - SiliconFlow API returned empty response") except Exception as e: health_results["SiliconFlow"] = {"status": "unhealthy", "error": str(e)} print(f" FAIL - SiliconFlow API unhealthy: {e}") # Check embeddings try: print("\nChecking embedding services...") start_time = time.time() embeddings = embed_texts_groq(["test"]) duration = time.time() - start_time if embeddings and embeddings[0]: health_results["Groq_Embeddings"] = {"status": "healthy", "response_time": duration} print(f" PASS - Groq embeddings healthy ({duration:.2f}s)") else: health_results["Groq_Embeddings"] = {"status": "unhealthy", "error": "No embeddings"} print(" FAIL - Groq embeddings returned no data") except Exception as e: health_results["Groq_Embeddings"] = {"status": "unhealthy", "error": str(e)} print(f" FAIL - Groq embeddings unhealthy: {e}") # Summary healthy_apis = sum(1 for api in health_results.values() if api["status"] == "healthy") total_apis = len(health_results) print(f"\nHEALTH CHECK SUMMARY:") print(f" Healthy APIs: {healthy_apis}/{total_apis}") print(f" System Health: {(healthy_apis/total_apis)*100:.1f}%") return health_results def save_test_report(test_results: dict, filename: str = None): """Save test results to a JSON file""" if filename is None: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"test_report_{timestamp}.json" try: # Convert datetime objects to strings for JSON serialization serializable_results = {} for key, value in test_results.items(): if isinstance(value, dict): serializable_value = {} for k, v in value.items(): if isinstance(v, datetime): serializable_value[k] = v.isoformat() else: serializable_value[k] = v serializable_results[key] = serializable_value else: serializable_results[key] = value with open(filename, 'w') as f: json.dump(serializable_results, f, indent=2) print(f"\nTest report saved to: {filename}") return filename except Exception as e: print(f"Failed to save test report: {e}") return None def main(): """Main function to run the testing suite""" print("Starting Clients.py Testing Suite...") tester = ClientTester() # Check if running in command line mode if len(sys.argv) > 1: command = sys.argv[1].lower() # Map command line arguments to test functions test_map = { "env": tester.test_environment_variables, "config": tester.test_configuration_loading, "weaviate": tester.test_weaviate_connection, "gemini": tester.test_gemini_client, "groq": tester.test_groq_client, "groq-embed": tester.test_groq_embeddings, "groq-gen": tester.test_groq_generation, "silicon": tester.test_siliconflow_generation, "deepinfra-embed": tester.test_deepinfra_embeddings, "deepinfra-rerank": tester.test_deepinfra_rerank, "unified": tester.test_unified_qwen_generate, "all": tester.run_all_tests, "benchmark": run_performance_benchmarks, "integration": run_integration_tests, "stress": run_stress_tests, "health": run_api_health_check, } if command in test_map: result = test_map[command]() # Save results if it's the full test suite if command == "all": save_test_report(tester.test_results) else: print(f"Unknown command: {command}") print(f"Available commands: {', '.join(test_map.keys())}") else: # Interactive mode with enhanced menu while True: print("\n" + "="*50) print("CLIENTS.PY TESTING MENU") print("="*50) print("1. Run All Tests") print("2. Performance Benchmarks") print("3. Integration Tests") print("4. Stress Tests") print("5. API Health Check") print("6. Environment Check Only") print("7. LLM Generation Tests Only") print("8. Embedding Tests Only") print("9. Individual Test Selection") print("10. Save Last Results") print("0. Exit") choice = input("\nSelect option (0-10): ").strip() if choice == "1": tester = ClientTester() tester.run_all_tests() save_test_report(tester.test_results) elif choice == "2": run_performance_benchmarks() elif choice == "3": run_integration_tests() elif choice == "4": run_stress_tests() elif choice == "5": run_api_health_check() elif choice == "6": tester = ClientTester() tester.test_environment_variables() tester.test_configuration_loading() elif choice == "7": tester = ClientTester() tester.test_groq_generation() tester.test_siliconflow_generation() tester.test_unified_qwen_generate() elif choice == "8": tester = ClientTester() tester.test_groq_embeddings() tester.test_deepinfra_embeddings() elif choice == "9": # Individual test selection submenu print("\n" + "="*40) print("SELECT INDIVIDUAL TEST:") print("="*40) individual_tests = [ ("Environment Variables", tester.test_environment_variables), ("Configuration Loading", tester.test_configuration_loading), ("Weaviate Connection", tester.test_weaviate_connection), ("Gemini Client", tester.test_gemini_client), ("Groq Client", tester.test_groq_client), ("Groq Embeddings", tester.test_groq_embeddings), ("Groq Generation", tester.test_groq_generation), ("SiliconFlow Generation", tester.test_siliconflow_generation), ("DeepInfra Embeddings", tester.test_deepinfra_embeddings), ("DeepInfra Reranking", tester.test_deepinfra_rerank), ("Unified Qwen Generation", tester.test_unified_qwen_generate), ] for i, (test_name, _) in enumerate(individual_tests, 1): print(f"{i:2d}. {test_name}") try: test_choice = int(input(f"\nSelect test (1-{len(individual_tests)}): ")) if 1 <= test_choice <= len(individual_tests): test_name, test_func = individual_tests[test_choice - 1] print(f"\nRunning {test_name}...") test_func() else: print("Invalid selection.") except ValueError: print("Invalid input. Please enter a number.") elif choice == "10": if hasattr(tester, 'test_results') and tester.test_results: filename = save_test_report(tester.test_results) if filename: print(f"Results saved to {filename}") else: print("No test results to save. Run tests first.") elif choice == "0": print("Testing suite completed!") break else: print("Invalid choice. Please try again.") # Wait for user input before showing menu again if choice != "0": input("\nPress Enter to continue...") if __name__ == "__main__": main()