Spaces:
Build error
Build error
File size: 8,243 Bytes
9b4ef96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
#!/usr/bin/env python3
"""
Deploy fine-tuned model to Novita AI serverless GPU
"""
import os
import json
import requests
import time
from pathlib import Path
class NovitaAIDeployer:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = "https://api.novita.ai/openai"
self.headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
def test_connection(self):
"""Test connection to Novita AI"""
try:
response = requests.get(f"{self.base_url}/models", headers=self.headers, timeout=10)
return response.status_code == 200
except Exception as e:
print(f"β Connection error: {e}")
return False
def get_available_models(self):
"""Get list of available models"""
try:
response = requests.get(f"{self.base_url}/models", headers=self.headers, timeout=10)
if response.status_code == 200:
return response.json().get('data', [])
return []
except Exception as e:
print(f"β Error getting models: {e}")
return []
def create_deployment(self, model_name, deployment_name=None):
"""Create a deployment for the model"""
if not deployment_name:
deployment_name = f"textilindo-{model_name.split('/')[-1]}"
# Note: This is a placeholder for the actual deployment API
# Novita AI might not have a public deployment API yet
print(f"π§ Creating deployment: {deployment_name}")
print(f"π Model: {model_name}")
# For now, we'll create a configuration file for manual deployment
deployment_config = {
"deployment_name": deployment_name,
"model_name": model_name,
"base_url": self.base_url,
"api_key": self.api_key[:10] + "..." + self.api_key[-10:],
"created_at": time.strftime("%Y-%m-%d %H:%M:%S"),
"status": "ready_for_deployment"
}
config_path = f"configs/{deployment_name}_deployment.json"
os.makedirs("configs", exist_ok=True)
with open(config_path, 'w', encoding='utf-8') as f:
json.dump(deployment_config, f, indent=2, ensure_ascii=False)
print(f"β
Deployment config saved: {config_path}")
return config_path
def test_model_inference(self, model_name, test_prompt="Halo, apa kabar?"):
"""Test model inference"""
print(f"π§ͺ Testing inference with model: {model_name}")
payload = {
"model": model_name,
"messages": [
{"role": "user", "content": test_prompt}
],
"max_tokens": 100,
"temperature": 0.7
}
try:
response = requests.post(
f"{self.base_url}/chat/completions",
headers=self.headers,
json=payload,
timeout=30
)
if response.status_code == 200:
result = response.json()
assistant_message = result.get('choices', [{}])[0].get('message', {}).get('content', '')
print(f"β
Inference successful!")
print(f"π Response: {assistant_message}")
return True
else:
print(f"β Inference failed: {response.status_code} - {response.text}")
return False
except Exception as e:
print(f"β Inference error: {e}")
return False
def create_deployment_guide():
"""Create a deployment guide for Novita AI"""
guide_content = """
# Novita AI Deployment Guide
## Current Status
Your fine-tuned model is ready for deployment to Novita AI serverless GPU.
## Manual Deployment Steps
### 1. Prepare Your Model
- Ensure your fine-tuned model is saved in the `models/` directory
- Verify the model weights and configuration files are complete
### 2. Upload to Novita AI
1. Log in to your Novita AI dashboard: https://novita.ai/
2. Navigate to "Custom Models" or "Model Library"
3. Click "Upload Model" or "Deploy Custom Model"
4. Upload your model files (weights, config, tokenizer)
5. Set the model name (e.g., "textilindo-llama-3.2-1b")
6. Configure serverless GPU settings
### 3. Configure API Access
1. Get your deployment API endpoint
2. Update your application to use the new endpoint
3. Test the deployment with sample queries
### 4. Monitor Usage
- Track API calls and costs in the Novita AI dashboard
- Monitor model performance and response times
- Set up alerts for any issues
## API Usage Example
```python
import requests
# Your deployment endpoint
endpoint = "https://api.novita.ai/openai"
api_key = "your_api_key"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
payload = {
"model": "your-deployed-model-name",
"messages": [
{"role": "user", "content": "dimana lokasi textilindo?"}
],
"max_tokens": 200,
"temperature": 0.7
}
response = requests.post(f"{endpoint}/chat/completions", headers=headers, json=payload)
result = response.json()
print(result['choices'][0]['message']['content'])
```
## Next Steps
1. Contact Novita AI support for custom model deployment
2. Consider using their Model API for easier integration
3. Set up monitoring and logging for production use
"""
guide_path = "DEPLOYMENT_GUIDE.md"
with open(guide_path, 'w', encoding='utf-8') as f:
f.write(guide_content)
print(f"β
Deployment guide created: {guide_path}")
def main():
print("π Novita AI Deployment Setup")
print("=" * 50)
# Check API key
api_key = os.getenv('NOVITA_API_KEY')
if not api_key:
print("β NOVITA_API_KEY not found")
api_key = input("Enter your Novita AI API key: ").strip()
if not api_key:
print("β API key required")
return
os.environ['NOVITA_API_KEY'] = api_key
# Initialize deployer
deployer = NovitaAIDeployer(api_key)
# Test connection
print("π Testing connection...")
if not deployer.test_connection():
print("β Could not connect to Novita AI")
return
print("β
Connected to Novita AI!")
# Get available models
models = deployer.get_available_models()
print(f"π Found {len(models)} available models")
# Select model for deployment
print("\nπ― Select model for deployment:")
lightweight_models = [
"meta-llama/llama-3.2-1b-instruct",
"meta-llama/llama-3.2-3b-instruct",
"qwen/qwen3-4b-fp8",
"qwen/qwen3-8b-fp8"
]
for i, model in enumerate(lightweight_models, 1):
print(f"{i}. {model}")
try:
choice = int(input("\nSelect model (1-4): ").strip())
if 1 <= choice <= len(lightweight_models):
selected_model = lightweight_models[choice - 1]
else:
print("β Invalid choice, using default")
selected_model = lightweight_models[0]
except ValueError:
print("β Invalid input, using default")
selected_model = lightweight_models[0]
print(f"β
Selected: {selected_model}")
# Test model inference
print(f"\nπ§ͺ Testing model inference...")
if deployer.test_model_inference(selected_model):
print("β
Model inference working!")
else:
print("β Model inference failed")
return
# Create deployment config
print(f"\nπ§ Creating deployment configuration...")
config_path = deployer.create_deployment(selected_model)
# Create deployment guide
create_deployment_guide()
print(f"\nπ Deployment setup complete!")
print(f"\nπ Next steps:")
print(f"1. Check deployment config: {config_path}")
print(f"2. Read deployment guide: DEPLOYMENT_GUIDE.md")
print(f"3. Contact Novita AI support for custom model deployment")
print(f"4. Monitor your usage in the Novita AI dashboard")
if __name__ == "__main__":
main()
|