#!/usr/bin/env python3 """ Complete workflow for Textilindo AI: Dataset → Fine-tuning → Deployment """ import os import sys import subprocess from pathlib import Path def run_command(command, description): """Run a command and handle errors""" print(f"\nšŸ”„ {description}") print(f"Command: {command}") try: result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True) print(f"āœ… {description} completed successfully") return True except subprocess.CalledProcessError as e: print(f"āŒ {description} failed") print(f"Error: {e.stderr}") return False def check_requirements(): """Check if all requirements are met""" print("šŸ” Checking requirements...") # Check if virtual environment is activated if not hasattr(sys, 'real_prefix') and not (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix): print("āš ļø Virtual environment not detected") print("Please activate the virtual environment first:") print("source venv/bin/activate") return False # Check if API key is set api_key = os.getenv('NOVITA_API_KEY') if not api_key: print("āš ļø NOVITA_API_KEY not set") print("Please set your Novita AI API key:") print("export NOVITA_API_KEY='your_api_key'") return False # Check if dataset exists dataset_path = "data/lora_dataset_20250829_113330.jsonl" if not os.path.exists(dataset_path): print(f"āŒ Dataset not found: {dataset_path}") return False print("āœ… All requirements met") return True def step1_convert_dataset(): """Step 1: Convert dataset format""" print("\n" + "="*60) print("STEP 1: CONVERT DATASET FORMAT") print("="*60) return run_command( "python convert_dataset.py", "Converting dataset from instruction format to training format" ) def step2_download_model(): """Step 2: Download base model""" print("\n" + "="*60) print("STEP 2: DOWNLOAD BASE MODEL") print("="*60) # Check if model already exists model_path = "models/llama-3.2-1b-instruct" if os.path.exists(model_path): print(f"āœ… Model already exists: {model_path}") return True return run_command( "python scripts/download_open_models.py", "Downloading base model (Llama 3.2 1B Instruct)" ) def step3_fine_tune(): """Step 3: Fine-tune the model""" print("\n" + "="*60) print("STEP 3: FINE-TUNE MODEL") print("="*60) # Check if training config exists config_path = "configs/training_config.yaml" if not os.path.exists(config_path): print(f"āŒ Training config not found: {config_path}") print("Please run Step 1 first to create the config") return False return run_command( "python scripts/finetune_lora.py", "Fine-tuning model with LoRA" ) def step4_test_model(): """Step 4: Test the fine-tuned model""" print("\n" + "="*60) print("STEP 4: TEST FINE-TUNED MODEL") print("="*60) # Check if fine-tuned model exists lora_path = "models/textilindo-lora-weights" if not os.path.exists(lora_path): print(f"āš ļø Fine-tuned model not found: {lora_path}") print("This step will be skipped") return True return run_command( "python scripts/test_model.py", "Testing fine-tuned model" ) def step5_deploy_preparation(): """Step 5: Prepare for deployment""" print("\n" + "="*60) print("STEP 5: PREPARE FOR DEPLOYMENT") print("="*60) return run_command( "python deploy_to_novita.py", "Preparing deployment configuration" ) def main(): print("šŸš€ Textilindo AI Complete Workflow") print("="*60) print("This script will:") print("1. Convert your dataset to training format") print("2. Download a base model") print("3. Fine-tune the model with your data") print("4. Test the fine-tuned model") print("5. Prepare for deployment to Novita AI") print("="*60) # Check requirements if not check_requirements(): print("\nāŒ Requirements not met. Please fix the issues above.") return # Ask for confirmation response = input("\nDo you want to continue? (y/n): ").strip().lower() if response not in ['y', 'yes']: print("šŸ‘‹ Workflow cancelled") return # Execute steps steps = [ ("Dataset Conversion", step1_convert_dataset), ("Model Download", step2_download_model), ("Fine-tuning", step3_fine_tune), ("Model Testing", step4_test_model), ("Deployment Preparation", step5_deploy_preparation) ] successful_steps = 0 total_steps = len(steps) for step_name, step_func in steps: print(f"\nšŸŽÆ Starting: {step_name}") if step_func(): successful_steps += 1 else: print(f"āŒ {step_name} failed. You can:") print("1. Fix the issue and run this step manually") print("2. Continue with the next step") print("3. Stop the workflow") response = input("Continue to next step? (y/n): ").strip().lower() if response not in ['y', 'yes']: break # Summary print("\n" + "="*60) print("WORKFLOW SUMMARY") print("="*60) print(f"āœ… Completed: {successful_steps}/{total_steps} steps") if successful_steps == total_steps: print("\nšŸŽ‰ All steps completed successfully!") print("\nšŸ“‹ Next steps:") print("1. Check your fine-tuned model in the models/ directory") print("2. Read DEPLOYMENT_GUIDE.md for deployment instructions") print("3. Contact Novita AI support for custom model deployment") print("4. Test your deployed model with the chat application") else: print(f"\nāš ļø {total_steps - successful_steps} steps failed") print("Please check the error messages above and run the failed steps manually") print("\nšŸ“ Generated files:") files_to_check = [ "data/textilindo_training_data.jsonl", "configs/training_config.yaml", "models/textilindo-lora-weights/", "DEPLOYMENT_GUIDE.md" ] for file_path in files_to_check: if os.path.exists(file_path): print(f" āœ… {file_path}") else: print(f" āŒ {file_path} (not found)") if __name__ == "__main__": main()