Spaces:
Build error
Build error
File size: 5,410 Bytes
927bb09 0669246 927bb09 0669246 927bb09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
#!/usr/bin/env python3
"""
Setup script untuk Textilindo AI Assistant training
Download model dan prepare environment
"""
import os
import sys
import yaml
import torch
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForCausalLM
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def load_config(config_path):
"""Load configuration from YAML file"""
try:
with open(config_path, 'r') as f:
config = yaml.safe_load(f)
return config
except Exception as e:
logger.error(f"Error loading config: {e}")
return None
def download_model(config):
"""Download base model"""
model_name = config['model_name']
model_path = config['model_path']
logger.info(f"Downloading model: {model_name}")
logger.info(f"Target path: {model_path}")
# Create models directory
Path(model_path).mkdir(parents=True, exist_ok=True)
try:
# Download tokenizer
logger.info("Downloading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True,
cache_dir=model_path
)
# Download model with memory optimization
logger.info("Downloading model...")
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
trust_remote_code=True,
cache_dir=model_path,
low_cpu_mem_usage=True,
load_in_8bit=True # Use 8-bit quantization for memory efficiency
)
# Save to local path
logger.info(f"Saving model to: {model_path}")
tokenizer.save_pretrained(model_path)
model.save_pretrained(model_path)
logger.info("β
Model downloaded successfully!")
return True
except Exception as e:
logger.error(f"Error downloading model: {e}")
return False
def check_requirements():
"""Check if all requirements are met"""
print("π Checking requirements...")
# Check Python version
if sys.version_info < (3, 8):
print("β Python 3.8+ required")
return False
# Check PyTorch
try:
import torch
print(f"β
PyTorch {torch.__version__}")
except ImportError:
print("β PyTorch not installed")
return False
# Check CUDA availability
if torch.cuda.is_available():
print(f"β
CUDA available: {torch.cuda.get_device_name(0)}")
print(f" GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
print("β οΈ CUDA not available - training will be slower on CPU")
# Check required packages
required_packages = [
'transformers',
'peft',
'datasets',
'accelerate',
'bitsandbytes'
]
missing_packages = []
for package in required_packages:
try:
__import__(package)
print(f"β
{package}")
except ImportError:
missing_packages.append(package)
print(f"β {package}")
if missing_packages:
print(f"\nβ Missing packages: {', '.join(missing_packages)}")
print("Install with: pip install " + " ".join(missing_packages))
return False
return True
def main():
print("π Textilindo AI Assistant - Setup")
print("=" * 50)
# Check requirements
if not check_requirements():
print("\nβ Requirements not met. Please install missing packages.")
sys.exit(1)
# Load configuration
config_path = "configs/training_config.yaml"
if not os.path.exists(config_path):
print(f"β Config file tidak ditemukan: {config_path}")
sys.exit(1)
config = load_config(config_path)
if not config:
sys.exit(1)
# Check if model already exists
model_path = config['model_path']
if os.path.exists(model_path) and os.path.exists(os.path.join(model_path, "config.json")):
print(f"β
Model already exists: {model_path}")
print("Skipping download...")
else:
# Download model
print("1οΈβ£ Downloading base model...")
if not download_model(config):
print("β Failed to download model")
sys.exit(1)
# Check dataset
dataset_path = config['dataset_path']
if not os.path.exists(dataset_path):
print(f"β Dataset tidak ditemukan: {dataset_path}")
print("Please ensure your dataset is in the correct location")
sys.exit(1)
else:
print(f"β
Dataset found: {dataset_path}")
# Check system prompt
system_prompt_path = "configs/system_prompt.md"
if not os.path.exists(system_prompt_path):
print(f"β System prompt tidak ditemukan: {system_prompt_path}")
sys.exit(1)
else:
print(f"β
System prompt found: {system_prompt_path}")
print("\nβ
Setup completed successfully!")
print("\nπ Next steps:")
print("1. Run training: python scripts/train_textilindo_ai.py")
print("2. Test model: python scripts/test_textilindo_ai.py")
print("3. Test with LoRA: python scripts/test_textilindo_ai.py --lora_path models/textilindo-ai-lora-YYYYMMDD_HHMMSS")
if __name__ == "__main__":
main()
|