from query_utils import process_query_for_rewrite from logger.custom_logger import CustomLoggerTracker from dotenv import load_dotenv from clients import get_weaviate_client from query_utils import process_query_for_rewrite import os import time from typing import Dict, Any, Optional, List, Tuple from configs import load_yaml_config config = load_yaml_config("config.yaml") # Load .env early load_dotenv() # --------------------------- # Custom Logger Initialization # --------------------------- custom_log = CustomLoggerTracker() logger = custom_log.get_logger("unittest_doc_utils") logger.info("Logger initialized for UnitTest Documents utilities module") # --------------------------- # Environment & Globals # --------------------------- SESSION_ID = "default" pending_clarifications: Dict[str, str] = {} SILICONFLOW_API_KEY = os.getenv("SILICONFLOW_API_KEY", "") SILICONFLOW_URL = os.getenv("SILICONFLOW_URL", "").strip() SILICONFLOW_CHAT_URL = os.getenv( "SILICONFLOW_CHAT_URL", "https://api.siliconflow.com/v1/chat/completions").strip() if not SILICONFLOW_API_KEY: logger.warning( "SILICONFLOW_API_KEY is not set. LLM/Reranker calls may fail.") if not SILICONFLOW_URL: logger.warning( "SILICONFLOW_URL is not set. OpenAI client base_url will not work.") # Global variables - consider moving to a config class last_uploaded_path = None client = None # Initialize properly from ..docs_utils import * # --------------------------- # TESTING FUNCTIONS # --------------------------- def test_environment_setup(): print("๐Ÿ”ง Testing Environment Setup...") required_vars = [ "SILICONFLOW_API_KEY", "SILICONFLOW_URL", "SILICONFLOW_CHAT_URL"] missing_vars = [] for var in required_vars: if not os.getenv(var): missing_vars.append(var) if missing_vars: logger.info(f"โŒ Missing environment variables: {', '.join(missing_vars)}") return False else: logger.info("โœ… All required environment variables are set") return True def test_config_loading(): """Test configuration loading""" logger.info("๐Ÿ“‹ Testing Config Loading...") try: required_config_keys = [ ["chunking", "chunk_size"], ["chunking", "chunk_overlap"], ["chunking", "separators"], ["rag", "weavaite_collection"], ["rag", "old_doc"] ] missing_keys = [] for key_path in required_config_keys: current = config for key in key_path: if key not in current: missing_keys.append(".".join(key_path)) break current = current[key] if missing_keys: logger.info(f"โŒ Missing config keys: {', '.join(missing_keys)}") return False else: logger.info("โœ… All required config keys are present") return True except Exception as e: logger.info(f"โŒ Error loading config: {e}") return False def test_weaviate_connection(): """Test Weaviate database connection""" logger.info("๐Ÿ—„๏ธ Testing Weaviate Connection...") try: client = get_weaviate_client() # Test basic connection is_ready = client.is_ready() if is_ready: logger.info("โœ… Successfully connected to Weaviate") return True else: logger.info("โŒ Weaviate is not ready") return False except Exception as e: logger.info(f"โŒ Error connecting to Weaviate: {e}") return False def test_embedding_service(): """Test embedding service connectivity""" logger.info("๐Ÿ”ข Testing Embedding Service...") try: test_texts = ["This is a test sentence about autism."] vectors = embed_texts(test_texts) if vectors and len(vectors) > 0: logger.info( f"โœ… Embedding service working - got {len(vectors[0])} dimensions") return True else: logger.info("โŒ Embedding service returned empty result") return False except Exception as e: logger.info(f"โŒ Error testing embedding service: {e}") return False def test_query_processing(): """Test query processing pipeline""" logger.info("๐Ÿ”„ Testing Query Processing...") try: test_queries = [ "What is autism?", "Tell me about the weather", # Non-autism query "" # Empty query ] for query in test_queries: if query == "": # Test empty query handling result = user_doc_qa(query) if "valid question" in result: logger.info(f"โœ… Empty query handled correctly") else: logger.info(f"โŒ Empty query not handled properly") return False else: try: corrected_query, is_autism_related, _ = process_query_for_rewrite( query) logger.info(f"โœ… Query '{query[:20]}...' processed successfully") except Exception as e: logger.info(f"โŒ Error processing query '{query}': {e}") return False return True except Exception as e: logger.info(f"โŒ Error in query processing test: {e}") return False def test_text_splitter(): """Test text splitter functionality""" logger.info("โœ‚๏ธ Testing Text Splitter...") try: splitter = get_text_splitter() test_text = "This is a test document. " * 100 # Create long text chunks = splitter.split_text(test_text) if chunks and len(chunks) > 0: logger.info(f"โœ… Text splitter working - created {len(chunks)} chunks") return True else: logger.info("โŒ Text splitter returned no chunks") return False except Exception as e: logger.info(f"โŒ Error testing text splitter: {e}") return False def run_integration_test(): """Run a simple end-to-end test""" logger.info("๐Ÿš€ Running Integration Test...") try: # Test the main QA function test_question = "What is autism?" result = user_doc_qa(test_question) if result and len(result) > 10: # Basic sanity check logger.info( f"โœ… Integration test passed - got response: {result[:50]}...") return True else: print(f"โŒ Integration test failed - response: {result}") return False except Exception as e: logger.info(f"โŒ Integration test error: {e}") return False def run_performance_test(): """Test response times""" logger.info("โฑ๏ธ Testing Performance...") try: test_question = "What is autism?" start_time = time.time() result = user_doc_qa(test_question) end_time = time.time() response_time = end_time - start_time logger.info(f"โœ… Response time: {response_time:.2f} seconds") if response_time < 30: # Reasonable threshold logger.info("โœ… Performance test passed") return True else: logger.info("โš ๏ธ Response time is high - consider optimization") return True # Still pass, just warn except Exception as e: logger.info(f"โŒ Performance test error: {e}") return False def run_all_tests(): """Run comprehensive test suite""" print("=" * 50) logger.info("๐Ÿงช RUNNING COMPREHENSIVE TEST SUITE") print("=" * 50) tests = [ ("Environment Setup", test_environment_setup), ("Config Loading", test_config_loading), ("Weaviate Connection", test_weaviate_connection), ("Embedding Service", test_embedding_service), ("Query Processing", test_query_processing), ("Text Splitter", test_text_splitter), ("Integration Test", run_integration_test), ("Performance Test", run_performance_test), ] passed = 0 failed = 0 for test_name, test_func in tests: logger.info(f"\n--- {test_name} ---") try: if test_func(): passed += 1 else: failed += 1 except Exception as e: logger.info(f"โŒ Test {test_name} crashed: {e}") failed += 1 print("\n" + "=" * 50) logger.info(f"๐Ÿ“Š TEST RESULTS: {passed} PASSED, {failed} FAILED") print("=" * 50) return failed == 0 if __name__ == "__main__": import sys if len(sys.argv) > 1: command = sys.argv[1].lower() if command == "test": run_all_tests() elif command == "test-basic": test_environment_setup() test_config_loading() test_weaviate_connection() elif command == "test-embedding": test_embedding_service() elif command == "test-integration": run_integration_test() else: logger.info(f"Unknown command: {command}") logger.info( "Available commands: test, test-basic, test-embedding, test-integration") else: # Default behavior - run a simple QA test logger.info("Running basic QA test...") try: result = user_doc_qa("What is autism?") logger.info(f"Result: {result}") except Exception as e: logger.info(f"Error: {e}") logger.info("\nTo run comprehensive tests, use:") logger.info("python docs_utils.py test")