#!/usr/bin/env python3
"""
Complete workflow for Textilindo AI: Dataset → Fine-tuning → Deployment
"""

import os
import sys
import subprocess
from pathlib import Path

def run_command(command, description):
    """Run a command and handle errors"""
    print(f"\n🔄 {description}")
    print(f"Command: {command}")
    
    try:
        result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
        print(f"✅ {description} completed successfully")
        return True
    except subprocess.CalledProcessError as e:
        print(f"❌ {description} failed")
        print(f"Error: {e.stderr}")
        return False

def check_requirements():
    """Check if all requirements are met"""
    print("🔍 Checking requirements...")
    
    # Check if virtual environment is activated
    if not hasattr(sys, 'real_prefix') and not (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix):
        print("⚠️  Virtual environment not detected")
        print("Please activate the virtual environment first:")
        print("source venv/bin/activate")
        return False
    
    # Check if API key is set
    api_key = os.getenv('NOVITA_API_KEY')
    if not api_key:
        print("⚠️  NOVITA_API_KEY not set")
        print("Please set your Novita AI API key:")
        print("export NOVITA_API_KEY='your_api_key'")
        return False
    
    # Check if dataset exists
    dataset_path = "data/lora_dataset_20250829_113330.jsonl"
    if not os.path.exists(dataset_path):
        print(f"❌ Dataset not found: {dataset_path}")
        return False
    
    print("✅ All requirements met")
    return True

def step1_convert_dataset():
    """Step 1: Convert dataset format"""
    print("\n" + "="*60)
    print("STEP 1: CONVERT DATASET FORMAT")
    print("="*60)
    
    return run_command(
        "python convert_dataset.py",
        "Converting dataset from instruction format to training format"
    )

def step2_download_model():
    """Step 2: Download base model"""
    print("\n" + "="*60)
    print("STEP 2: DOWNLOAD BASE MODEL")
    print("="*60)
    
    # Check if model already exists
    model_path = "models/llama-3.2-1b-instruct"
    if os.path.exists(model_path):
        print(f"✅ Model already exists: {model_path}")
        return True
    
    return run_command(
        "python scripts/download_open_models.py",
        "Downloading base model (Llama 3.2 1B Instruct)"
    )

def step3_fine_tune():
    """Step 3: Fine-tune the model"""
    print("\n" + "="*60)
    print("STEP 3: FINE-TUNE MODEL")
    print("="*60)
    
    # Check if training config exists
    config_path = "configs/training_config.yaml"
    if not os.path.exists(config_path):
        print(f"❌ Training config not found: {config_path}")
        print("Please run Step 1 first to create the config")
        return False
    
    return run_command(
        "python scripts/finetune_lora.py",
        "Fine-tuning model with LoRA"
    )

def step4_test_model():
    """Step 4: Test the fine-tuned model"""
    print("\n" + "="*60)
    print("STEP 4: TEST FINE-TUNED MODEL")
    print("="*60)
    
    # Check if fine-tuned model exists
    lora_path = "models/textilindo-lora-weights"
    if not os.path.exists(lora_path):
        print(f"⚠️  Fine-tuned model not found: {lora_path}")
        print("This step will be skipped")
        return True
    
    return run_command(
        "python scripts/test_model.py",
        "Testing fine-tuned model"
    )

def step5_deploy_preparation():
    """Step 5: Prepare for deployment"""
    print("\n" + "="*60)
    print("STEP 5: PREPARE FOR DEPLOYMENT")
    print("="*60)
    
    return run_command(
        "python deploy_to_novita.py",
        "Preparing deployment configuration"
    )

def main():
    print("🚀 Textilindo AI Complete Workflow")
    print("="*60)
    print("This script will:")
    print("1. Convert your dataset to training format")
    print("2. Download a base model")
    print("3. Fine-tune the model with your data")
    print("4. Test the fine-tuned model")
    print("5. Prepare for deployment to Novita AI")
    print("="*60)
    
    # Check requirements
    if not check_requirements():
        print("\n❌ Requirements not met. Please fix the issues above.")
        return
    
    # Ask for confirmation
    response = input("\nDo you want to continue? (y/n): ").strip().lower()
    if response not in ['y', 'yes']:
        print("👋 Workflow cancelled")
        return
    
    # Execute steps
    steps = [
        ("Dataset Conversion", step1_convert_dataset),
        ("Model Download", step2_download_model),
        ("Fine-tuning", step3_fine_tune),
        ("Model Testing", step4_test_model),
        ("Deployment Preparation", step5_deploy_preparation)
    ]
    
    successful_steps = 0
    total_steps = len(steps)
    
    for step_name, step_func in steps:
        print(f"\n🎯 Starting: {step_name}")
        if step_func():
            successful_steps += 1
        else:
            print(f"❌ {step_name} failed. You can:")
            print("1. Fix the issue and run this step manually")
            print("2. Continue with the next step")
            print("3. Stop the workflow")
            
            response = input("Continue to next step? (y/n): ").strip().lower()
            if response not in ['y', 'yes']:
                break
    
    # Summary
    print("\n" + "="*60)
    print("WORKFLOW SUMMARY")
    print("="*60)
    print(f"✅ Completed: {successful_steps}/{total_steps} steps")
    
    if successful_steps == total_steps:
        print("\n🎉 All steps completed successfully!")
        print("\n📋 Next steps:")
        print("1. Check your fine-tuned model in the models/ directory")
        print("2. Read DEPLOYMENT_GUIDE.md for deployment instructions")
        print("3. Contact Novita AI support for custom model deployment")
        print("4. Test your deployed model with the chat application")
    else:
        print(f"\n⚠️  {total_steps - successful_steps} steps failed")
        print("Please check the error messages above and run the failed steps manually")
    
    print("\n📁 Generated files:")
    files_to_check = [
        "data/textilindo_training_data.jsonl",
        "configs/training_config.yaml",
        "models/textilindo-lora-weights/",
        "DEPLOYMENT_GUIDE.md"
    ]
    
    for file_path in files_to_check:
        if os.path.exists(file_path):
            print(f"  ✅ {file_path}")
        else:
            print(f"  ❌ {file_path} (not found)")

if __name__ == "__main__":
    main()