Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| """ | |
| Deploy fine-tuned model to Novita AI serverless GPU | |
| """ | |
| import os | |
| import json | |
| import requests | |
| import time | |
| from pathlib import Path | |
| class NovitaAIDeployer: | |
| def __init__(self, api_key): | |
| self.api_key = api_key | |
| self.base_url = "https://api.novita.ai/openai" | |
| self.headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| def test_connection(self): | |
| """Test connection to Novita AI""" | |
| try: | |
| response = requests.get(f"{self.base_url}/models", headers=self.headers, timeout=10) | |
| return response.status_code == 200 | |
| except Exception as e: | |
| print(f"β Connection error: {e}") | |
| return False | |
| def get_available_models(self): | |
| """Get list of available models""" | |
| try: | |
| response = requests.get(f"{self.base_url}/models", headers=self.headers, timeout=10) | |
| if response.status_code == 200: | |
| return response.json().get('data', []) | |
| return [] | |
| except Exception as e: | |
| print(f"β Error getting models: {e}") | |
| return [] | |
| def create_deployment(self, model_name, deployment_name=None): | |
| """Create a deployment for the model""" | |
| if not deployment_name: | |
| deployment_name = f"textilindo-{model_name.split('/')[-1]}" | |
| # Note: This is a placeholder for the actual deployment API | |
| # Novita AI might not have a public deployment API yet | |
| print(f"π§ Creating deployment: {deployment_name}") | |
| print(f"π Model: {model_name}") | |
| # For now, we'll create a configuration file for manual deployment | |
| deployment_config = { | |
| "deployment_name": deployment_name, | |
| "model_name": model_name, | |
| "base_url": self.base_url, | |
| "api_key": self.api_key[:10] + "..." + self.api_key[-10:], | |
| "created_at": time.strftime("%Y-%m-%d %H:%M:%S"), | |
| "status": "ready_for_deployment" | |
| } | |
| config_path = f"configs/{deployment_name}_deployment.json" | |
| os.makedirs("configs", exist_ok=True) | |
| with open(config_path, 'w', encoding='utf-8') as f: | |
| json.dump(deployment_config, f, indent=2, ensure_ascii=False) | |
| print(f"β Deployment config saved: {config_path}") | |
| return config_path | |
| def test_model_inference(self, model_name, test_prompt="Halo, apa kabar?"): | |
| """Test model inference""" | |
| print(f"π§ͺ Testing inference with model: {model_name}") | |
| payload = { | |
| "model": model_name, | |
| "messages": [ | |
| {"role": "user", "content": test_prompt} | |
| ], | |
| "max_tokens": 100, | |
| "temperature": 0.7 | |
| } | |
| try: | |
| response = requests.post( | |
| f"{self.base_url}/chat/completions", | |
| headers=self.headers, | |
| json=payload, | |
| timeout=30 | |
| ) | |
| if response.status_code == 200: | |
| result = response.json() | |
| assistant_message = result.get('choices', [{}])[0].get('message', {}).get('content', '') | |
| print(f"β Inference successful!") | |
| print(f"π Response: {assistant_message}") | |
| return True | |
| else: | |
| print(f"β Inference failed: {response.status_code} - {response.text}") | |
| return False | |
| except Exception as e: | |
| print(f"β Inference error: {e}") | |
| return False | |
| def create_deployment_guide(): | |
| """Create a deployment guide for Novita AI""" | |
| guide_content = """ | |
| # Novita AI Deployment Guide | |
| ## Current Status | |
| Your fine-tuned model is ready for deployment to Novita AI serverless GPU. | |
| ## Manual Deployment Steps | |
| ### 1. Prepare Your Model | |
| - Ensure your fine-tuned model is saved in the `models/` directory | |
| - Verify the model weights and configuration files are complete | |
| ### 2. Upload to Novita AI | |
| 1. Log in to your Novita AI dashboard: https://novita.ai/ | |
| 2. Navigate to "Custom Models" or "Model Library" | |
| 3. Click "Upload Model" or "Deploy Custom Model" | |
| 4. Upload your model files (weights, config, tokenizer) | |
| 5. Set the model name (e.g., "textilindo-llama-3.2-1b") | |
| 6. Configure serverless GPU settings | |
| ### 3. Configure API Access | |
| 1. Get your deployment API endpoint | |
| 2. Update your application to use the new endpoint | |
| 3. Test the deployment with sample queries | |
| ### 4. Monitor Usage | |
| - Track API calls and costs in the Novita AI dashboard | |
| - Monitor model performance and response times | |
| - Set up alerts for any issues | |
| ## API Usage Example | |
| ```python | |
| import requests | |
| # Your deployment endpoint | |
| endpoint = "https://api.novita.ai/openai" | |
| api_key = "your_api_key" | |
| headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| payload = { | |
| "model": "your-deployed-model-name", | |
| "messages": [ | |
| {"role": "user", "content": "dimana lokasi textilindo?"} | |
| ], | |
| "max_tokens": 200, | |
| "temperature": 0.7 | |
| } | |
| response = requests.post(f"{endpoint}/chat/completions", headers=headers, json=payload) | |
| result = response.json() | |
| print(result['choices'][0]['message']['content']) | |
| ``` | |
| ## Next Steps | |
| 1. Contact Novita AI support for custom model deployment | |
| 2. Consider using their Model API for easier integration | |
| 3. Set up monitoring and logging for production use | |
| """ | |
| guide_path = "DEPLOYMENT_GUIDE.md" | |
| with open(guide_path, 'w', encoding='utf-8') as f: | |
| f.write(guide_content) | |
| print(f"β Deployment guide created: {guide_path}") | |
| def main(): | |
| print("π Novita AI Deployment Setup") | |
| print("=" * 50) | |
| # Check API key | |
| api_key = os.getenv('NOVITA_API_KEY') | |
| if not api_key: | |
| print("β NOVITA_API_KEY not found") | |
| api_key = input("Enter your Novita AI API key: ").strip() | |
| if not api_key: | |
| print("β API key required") | |
| return | |
| os.environ['NOVITA_API_KEY'] = api_key | |
| # Initialize deployer | |
| deployer = NovitaAIDeployer(api_key) | |
| # Test connection | |
| print("π Testing connection...") | |
| if not deployer.test_connection(): | |
| print("β Could not connect to Novita AI") | |
| return | |
| print("β Connected to Novita AI!") | |
| # Get available models | |
| models = deployer.get_available_models() | |
| print(f"π Found {len(models)} available models") | |
| # Select model for deployment | |
| print("\nπ― Select model for deployment:") | |
| lightweight_models = [ | |
| "meta-llama/llama-3.2-1b-instruct", | |
| "meta-llama/llama-3.2-3b-instruct", | |
| "qwen/qwen3-4b-fp8", | |
| "qwen/qwen3-8b-fp8" | |
| ] | |
| for i, model in enumerate(lightweight_models, 1): | |
| print(f"{i}. {model}") | |
| try: | |
| choice = int(input("\nSelect model (1-4): ").strip()) | |
| if 1 <= choice <= len(lightweight_models): | |
| selected_model = lightweight_models[choice - 1] | |
| else: | |
| print("β Invalid choice, using default") | |
| selected_model = lightweight_models[0] | |
| except ValueError: | |
| print("β Invalid input, using default") | |
| selected_model = lightweight_models[0] | |
| print(f"β Selected: {selected_model}") | |
| # Test model inference | |
| print(f"\nπ§ͺ Testing model inference...") | |
| if deployer.test_model_inference(selected_model): | |
| print("β Model inference working!") | |
| else: | |
| print("β Model inference failed") | |
| return | |
| # Create deployment config | |
| print(f"\nπ§ Creating deployment configuration...") | |
| config_path = deployer.create_deployment(selected_model) | |
| # Create deployment guide | |
| create_deployment_guide() | |
| print(f"\nπ Deployment setup complete!") | |
| print(f"\nπ Next steps:") | |
| print(f"1. Check deployment config: {config_path}") | |
| print(f"2. Read deployment guide: DEPLOYMENT_GUIDE.md") | |
| print(f"3. Contact Novita AI support for custom model deployment") | |
| print(f"4. Monitor your usage in the Novita AI dashboard") | |
| if __name__ == "__main__": | |
| main() | |