File size: 8,243 Bytes
9b4ef96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
#!/usr/bin/env python3
"""
Deploy fine-tuned model to Novita AI serverless GPU
"""

import os
import json
import requests
import time
from pathlib import Path

class NovitaAIDeployer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://api.novita.ai/openai"
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
    
    def test_connection(self):
        """Test connection to Novita AI"""
        try:
            response = requests.get(f"{self.base_url}/models", headers=self.headers, timeout=10)
            return response.status_code == 200
        except Exception as e:
            print(f"❌ Connection error: {e}")
            return False
    
    def get_available_models(self):
        """Get list of available models"""
        try:
            response = requests.get(f"{self.base_url}/models", headers=self.headers, timeout=10)
            if response.status_code == 200:
                return response.json().get('data', [])
            return []
        except Exception as e:
            print(f"❌ Error getting models: {e}")
            return []
    
    def create_deployment(self, model_name, deployment_name=None):
        """Create a deployment for the model"""
        if not deployment_name:
            deployment_name = f"textilindo-{model_name.split('/')[-1]}"
        
        # Note: This is a placeholder for the actual deployment API
        # Novita AI might not have a public deployment API yet
        print(f"πŸ”§ Creating deployment: {deployment_name}")
        print(f"πŸ“‹ Model: {model_name}")
        
        # For now, we'll create a configuration file for manual deployment
        deployment_config = {
            "deployment_name": deployment_name,
            "model_name": model_name,
            "base_url": self.base_url,
            "api_key": self.api_key[:10] + "..." + self.api_key[-10:],
            "created_at": time.strftime("%Y-%m-%d %H:%M:%S"),
            "status": "ready_for_deployment"
        }
        
        config_path = f"configs/{deployment_name}_deployment.json"
        os.makedirs("configs", exist_ok=True)
        
        with open(config_path, 'w', encoding='utf-8') as f:
            json.dump(deployment_config, f, indent=2, ensure_ascii=False)
        
        print(f"βœ… Deployment config saved: {config_path}")
        return config_path
    
    def test_model_inference(self, model_name, test_prompt="Halo, apa kabar?"):
        """Test model inference"""
        print(f"πŸ§ͺ Testing inference with model: {model_name}")
        
        payload = {
            "model": model_name,
            "messages": [
                {"role": "user", "content": test_prompt}
            ],
            "max_tokens": 100,
            "temperature": 0.7
        }
        
        try:
            response = requests.post(
                f"{self.base_url}/chat/completions",
                headers=self.headers,
                json=payload,
                timeout=30
            )
            
            if response.status_code == 200:
                result = response.json()
                assistant_message = result.get('choices', [{}])[0].get('message', {}).get('content', '')
                print(f"βœ… Inference successful!")
                print(f"πŸ“ Response: {assistant_message}")
                return True
            else:
                print(f"❌ Inference failed: {response.status_code} - {response.text}")
                return False
                
        except Exception as e:
            print(f"❌ Inference error: {e}")
            return False

def create_deployment_guide():
    """Create a deployment guide for Novita AI"""
    guide_content = """
# Novita AI Deployment Guide

## Current Status
Your fine-tuned model is ready for deployment to Novita AI serverless GPU.

## Manual Deployment Steps

### 1. Prepare Your Model
- Ensure your fine-tuned model is saved in the `models/` directory
- Verify the model weights and configuration files are complete

### 2. Upload to Novita AI
1. Log in to your Novita AI dashboard: https://novita.ai/
2. Navigate to "Custom Models" or "Model Library"
3. Click "Upload Model" or "Deploy Custom Model"
4. Upload your model files (weights, config, tokenizer)
5. Set the model name (e.g., "textilindo-llama-3.2-1b")
6. Configure serverless GPU settings

### 3. Configure API Access
1. Get your deployment API endpoint
2. Update your application to use the new endpoint
3. Test the deployment with sample queries

### 4. Monitor Usage
- Track API calls and costs in the Novita AI dashboard
- Monitor model performance and response times
- Set up alerts for any issues

## API Usage Example

```python
import requests

# Your deployment endpoint
endpoint = "https://api.novita.ai/openai"
api_key = "your_api_key"

headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}

payload = {
    "model": "your-deployed-model-name",
    "messages": [
        {"role": "user", "content": "dimana lokasi textilindo?"}
    ],
    "max_tokens": 200,
    "temperature": 0.7
}

response = requests.post(f"{endpoint}/chat/completions", headers=headers, json=payload)
result = response.json()
print(result['choices'][0]['message']['content'])
```

## Next Steps
1. Contact Novita AI support for custom model deployment
2. Consider using their Model API for easier integration
3. Set up monitoring and logging for production use
"""
    
    guide_path = "DEPLOYMENT_GUIDE.md"
    with open(guide_path, 'w', encoding='utf-8') as f:
        f.write(guide_content)
    
    print(f"βœ… Deployment guide created: {guide_path}")

def main():
    print("πŸš€ Novita AI Deployment Setup")
    print("=" * 50)
    
    # Check API key
    api_key = os.getenv('NOVITA_API_KEY')
    if not api_key:
        print("❌ NOVITA_API_KEY not found")
        api_key = input("Enter your Novita AI API key: ").strip()
        if not api_key:
            print("❌ API key required")
            return
        os.environ['NOVITA_API_KEY'] = api_key
    
    # Initialize deployer
    deployer = NovitaAIDeployer(api_key)
    
    # Test connection
    print("πŸ” Testing connection...")
    if not deployer.test_connection():
        print("❌ Could not connect to Novita AI")
        return
    
    print("βœ… Connected to Novita AI!")
    
    # Get available models
    models = deployer.get_available_models()
    print(f"πŸ“‹ Found {len(models)} available models")
    
    # Select model for deployment
    print("\n🎯 Select model for deployment:")
    lightweight_models = [
        "meta-llama/llama-3.2-1b-instruct",
        "meta-llama/llama-3.2-3b-instruct",
        "qwen/qwen3-4b-fp8",
        "qwen/qwen3-8b-fp8"
    ]
    
    for i, model in enumerate(lightweight_models, 1):
        print(f"{i}. {model}")
    
    try:
        choice = int(input("\nSelect model (1-4): ").strip())
        if 1 <= choice <= len(lightweight_models):
            selected_model = lightweight_models[choice - 1]
        else:
            print("❌ Invalid choice, using default")
            selected_model = lightweight_models[0]
    except ValueError:
        print("❌ Invalid input, using default")
        selected_model = lightweight_models[0]
    
    print(f"βœ… Selected: {selected_model}")
    
    # Test model inference
    print(f"\nπŸ§ͺ Testing model inference...")
    if deployer.test_model_inference(selected_model):
        print("βœ… Model inference working!")
    else:
        print("❌ Model inference failed")
        return
    
    # Create deployment config
    print(f"\nπŸ”§ Creating deployment configuration...")
    config_path = deployer.create_deployment(selected_model)
    
    # Create deployment guide
    create_deployment_guide()
    
    print(f"\nπŸŽ‰ Deployment setup complete!")
    print(f"\nπŸ“‹ Next steps:")
    print(f"1. Check deployment config: {config_path}")
    print(f"2. Read deployment guide: DEPLOYMENT_GUIDE.md")
    print(f"3. Contact Novita AI support for custom model deployment")
    print(f"4. Monitor your usage in the Novita AI dashboard")

if __name__ == "__main__":
    main()