Spaces:

harismlnaslm
/

textilindo-ai-assistant

Build error

textilindo-ai-assistant / deploy_to_novita.py

Stefanus Simandjuntak

initial commit

9b4ef96 4 months ago

8.24 kB

	#!/usr/bin/env python3
	"""
	Deploy fine-tuned model to Novita AI serverless GPU
	"""

	import os
	import json
	import requests
	import time
	from pathlib import Path

	class NovitaAIDeployer:
	def __init__(self, api_key):
	self.api_key = api_key
	self.base_url = "https://api.novita.ai/openai"
	self.headers = {
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json"
	}

	def test_connection(self):
	"""Test connection to Novita AI"""
	try:
	response = requests.get(f"{self.base_url}/models", headers=self.headers, timeout=10)
	return response.status_code == 200
	except Exception as e:
	print(f"❌ Connection error: {e}")
	return False

	def get_available_models(self):
	"""Get list of available models"""
	try:
	response = requests.get(f"{self.base_url}/models", headers=self.headers, timeout=10)
	if response.status_code == 200:
	return response.json().get('data', [])
	return []
	except Exception as e:
	print(f"❌ Error getting models: {e}")
	return []

	def create_deployment(self, model_name, deployment_name=None):
	"""Create a deployment for the model"""
	if not deployment_name:
	deployment_name = f"textilindo-{model_name.split('/')[-1]}"

	# Note: This is a placeholder for the actual deployment API
	# Novita AI might not have a public deployment API yet
	print(f"🔧 Creating deployment: {deployment_name}")
	print(f"📋 Model: {model_name}")

	# For now, we'll create a configuration file for manual deployment
	deployment_config = {
	"deployment_name": deployment_name,
	"model_name": model_name,
	"base_url": self.base_url,
	"api_key": self.api_key[:10] + "..." + self.api_key[-10:],
	"created_at": time.strftime("%Y-%m-%d %H:%M:%S"),
	"status": "ready_for_deployment"
	}

	config_path = f"configs/{deployment_name}_deployment.json"
	os.makedirs("configs", exist_ok=True)

	with open(config_path, 'w', encoding='utf-8') as f:
	json.dump(deployment_config, f, indent=2, ensure_ascii=False)

	print(f"✅ Deployment config saved: {config_path}")
	return config_path

	def test_model_inference(self, model_name, test_prompt="Halo, apa kabar?"):
	"""Test model inference"""
	print(f"🧪 Testing inference with model: {model_name}")

	payload = {
	"model": model_name,
	"messages": [
	{"role": "user", "content": test_prompt}
	],
	"max_tokens": 100,
	"temperature": 0.7
	}

	try:
	response = requests.post(
	f"{self.base_url}/chat/completions",
	headers=self.headers,
	json=payload,
	timeout=30
	)

	if response.status_code == 200:
	result = response.json()
	assistant_message = result.get('choices', [{}])[0].get('message', {}).get('content', '')
	print(f"✅ Inference successful!")
	print(f"📝 Response: {assistant_message}")
	return True
	else:
	print(f"❌ Inference failed: {response.status_code} - {response.text}")
	return False

	except Exception as e:
	print(f"❌ Inference error: {e}")
	return False

	def create_deployment_guide():
	"""Create a deployment guide for Novita AI"""
	guide_content = """
	# Novita AI Deployment Guide

	## Current Status
	Your fine-tuned model is ready for deployment to Novita AI serverless GPU.

	## Manual Deployment Steps

	### 1. Prepare Your Model
	- Ensure your fine-tuned model is saved in the `models/` directory
	- Verify the model weights and configuration files are complete

	### 2. Upload to Novita AI
	1. Log in to your Novita AI dashboard: https://novita.ai/
	2. Navigate to "Custom Models" or "Model Library"
	3. Click "Upload Model" or "Deploy Custom Model"
	4. Upload your model files (weights, config, tokenizer)
	5. Set the model name (e.g., "textilindo-llama-3.2-1b")
	6. Configure serverless GPU settings

	### 3. Configure API Access
	1. Get your deployment API endpoint
	2. Update your application to use the new endpoint
	3. Test the deployment with sample queries

	### 4. Monitor Usage
	- Track API calls and costs in the Novita AI dashboard
	- Monitor model performance and response times
	- Set up alerts for any issues

	## API Usage Example

	```python
	import requests

	# Your deployment endpoint
	endpoint = "https://api.novita.ai/openai"
	api_key = "your_api_key"

	headers = {
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json"
	}

	payload = {
	"model": "your-deployed-model-name",
	"messages": [
	{"role": "user", "content": "dimana lokasi textilindo?"}
	],
	"max_tokens": 200,
	"temperature": 0.7
	}

	response = requests.post(f"{endpoint}/chat/completions", headers=headers, json=payload)
	result = response.json()
	print(result['choices'][0]['message']['content'])
	```

	## Next Steps
	1. Contact Novita AI support for custom model deployment
	2. Consider using their Model API for easier integration
	3. Set up monitoring and logging for production use
	"""

	guide_path = "DEPLOYMENT_GUIDE.md"
	with open(guide_path, 'w', encoding='utf-8') as f:
	f.write(guide_content)

	print(f"✅ Deployment guide created: {guide_path}")

	def main():
	print("🚀 Novita AI Deployment Setup")
	print("=" * 50)

	# Check API key
	api_key = os.getenv('NOVITA_API_KEY')
	if not api_key:
	print("❌ NOVITA_API_KEY not found")
	api_key = input("Enter your Novita AI API key: ").strip()
	if not api_key:
	print("❌ API key required")
	return
	os.environ['NOVITA_API_KEY'] = api_key

	# Initialize deployer
	deployer = NovitaAIDeployer(api_key)

	# Test connection
	print("🔍 Testing connection...")
	if not deployer.test_connection():
	print("❌ Could not connect to Novita AI")
	return

	print("✅ Connected to Novita AI!")

	# Get available models
	models = deployer.get_available_models()
	print(f"📋 Found {len(models)} available models")

	# Select model for deployment
	print("\n🎯 Select model for deployment:")
	lightweight_models = [
	"meta-llama/llama-3.2-1b-instruct",
	"meta-llama/llama-3.2-3b-instruct",
	"qwen/qwen3-4b-fp8",
	"qwen/qwen3-8b-fp8"
	]

	for i, model in enumerate(lightweight_models, 1):
	print(f"{i}. {model}")

	try:
	choice = int(input("\nSelect model (1-4): ").strip())
	if 1 <= choice <= len(lightweight_models):
	selected_model = lightweight_models[choice - 1]
	else:
	print("❌ Invalid choice, using default")
	selected_model = lightweight_models[0]
	except ValueError:
	print("❌ Invalid input, using default")
	selected_model = lightweight_models[0]

	print(f"✅ Selected: {selected_model}")

	# Test model inference
	print(f"\n🧪 Testing model inference...")
	if deployer.test_model_inference(selected_model):
	print("✅ Model inference working!")
	else:
	print("❌ Model inference failed")
	return

	# Create deployment config
	print(f"\n🔧 Creating deployment configuration...")
	config_path = deployer.create_deployment(selected_model)

	# Create deployment guide
	create_deployment_guide()

	print(f"\n🎉 Deployment setup complete!")
	print(f"\n📋 Next steps:")
	print(f"1. Check deployment config: {config_path}")
	print(f"2. Read deployment guide: DEPLOYMENT_GUIDE.md")
	print(f"3. Contact Novita AI support for custom model deployment")
	print(f"4. Monitor your usage in the Novita AI dashboard")

	if __name__ == "__main__":
	main()