Spaces:
Running
Running
| import os | |
| import logging | |
| import json | |
| import asyncio | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| logger = logging.getLogger("nexari.router") | |
| # === MODEL: Google Gemma 2 2B (Smart Context) === | |
| REPO_ID = "bartowski/gemma-2-2b-it-GGUF" | |
| FILENAME = "gemma-2-2b-it-Q6_K.gguf" | |
| BASE_DIR = "./models/router" | |
| model = None | |
| # === 1. CODING TRIGGERS (Technical Keywords) === | |
| # In words ka matlab hi hai ki user code chahta hai. | |
| CODING_KEYWORDS = { | |
| "python", "javascript", "java", "cpp", "c++", "html", "css", "php", "sql", | |
| "code", "script", "function", "variable", "syntax", "error", "debug", | |
| "terminal", "console", "json", "api", "framework", "react", "node", "django", | |
| "compiler", "program", "develop", "algorithm" | |
| } | |
| # === 2. REAL-TIME TRIGGERS (Search) === | |
| SEARCH_KEYWORDS = { | |
| "latest", "current", "news", "today", "now", "live", "price", "stock", | |
| "weather", "forecast", "cricket", "score", "match", "result", "winner", | |
| "gold rate", "bitcoin", "btc", "who is the", "upcoming", "release date" | |
| } | |
| # === 3. TIME TRIGGERS === | |
| TIME_KEYWORDS = { | |
| "time", "clock", "date", "day", "calendar", "samay", "baj rahe", "ghadi" | |
| } | |
| def load_model(): | |
| global model | |
| try: | |
| os.makedirs(BASE_DIR, exist_ok=True) | |
| path = os.path.join(BASE_DIR, FILENAME) | |
| if not os.path.exists(path): | |
| hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=BASE_DIR) | |
| # Context window 4096 kar diya taaki 16 examples fit ho sakein | |
| model = Llama(model_path=path, n_ctx=4096, n_threads=2, verbose=False) | |
| logger.info("β Router Logic Loaded (Hybrid Mode)") | |
| except Exception as e: | |
| logger.error(f"Router Fail: {e}") | |
| async def load_model_async(): | |
| return await asyncio.to_thread(load_model) | |
| def _predict_sync(query: str): | |
| if not model: return {"intent": "chat"} | |
| q_lower = query.lower().strip() | |
| # --- LAYER 1: STRICT CODING CHECK (Zero Latency) --- | |
| # Agar 'HTML', 'Python' jaisa word hai, to AI se mat pucho. Seedha Code karo. | |
| # Exception: "What is HTML" (Definition) -> Chat | |
| has_code_word = any(k in q_lower for k in CODING_KEYWORDS) | |
| is_definition = q_lower.startswith(("what is", "define", "explain")) | |
| if has_code_word and not is_definition: | |
| return {"intent": "coding"} | |
| # "Create/Make/Build" logic | |
| if any(v in q_lower for v in ["create", "make", "build", "write"]) and \ | |
| any(n in q_lower for n in ["app", "game", "calculator", "website", "tool"]): | |
| return {"intent": "coding"} | |
| # --- LAYER 2: STRICT SEARCH & TIME --- | |
| if any(k in q_lower for k in SEARCH_KEYWORDS): | |
| return {"intent": "search"} | |
| if any(k in q_lower for k in TIME_KEYWORDS) and len(q_lower.split()) < 15: | |
| return {"intent": "time"} | |
| # --- LAYER 3: AI BRAIN (16-Shot Prompting) --- | |
| # Agar upar wale rules fail huye, tab AI intelligence use karega. | |
| sys_prompt = ( | |
| "You are the Router AI. Classify the user query into ONE category: 'coding', 'search', 'time', or 'chat'.\n\n" | |
| "### EXAMPLES (Study these 16 scenarios):\n" | |
| "1. 'Create a simple calculator using HTML' -> {\"intent\": \"coding\"}\n" | |
| "2. 'Write a python script to scrape data' -> {\"intent\": \"coding\"}\n" | |
| "3. 'Fix this NameError in my code' -> {\"intent\": \"coding\"}\n" | |
| "4. 'How do I center a div in CSS?' -> {\"intent\": \"coding\"}\n" | |
| "5. 'Who won the match yesterday?' -> {\"intent\": \"search\"}\n" | |
| "6. 'Current price of Bitcoin' -> {\"intent\": \"search\"}\n" | |
| "7. 'Weather forecast for tomorrow' -> {\"intent\": \"search\"}\n" | |
| "8. 'Latest news about AI' -> {\"intent\": \"search\"}\n" | |
| "9. 'What is the time right now?' -> {\"intent\": \"time\"}\n" | |
| "10. 'My watch stopped, kya time hua hai?' -> {\"intent\": \"time\"}\n" | |
| "11. 'Date today' -> {\"intent\": \"time\"}\n" | |
| "12. 'Tell me a joke' -> {\"intent\": \"chat\"}\n" | |
| "13. 'Who are you?' -> {\"intent\": \"chat\"}\n" | |
| "14. 'Explain the theory of relativity' -> {\"intent\": \"chat\"}\n" | |
| "15. 'What is the meaning of life?' -> {\"intent\": \"chat\"}\n" | |
| "16. 'Make a snake game' -> {\"intent\": \"coding\"}\n\n" | |
| "### INSTRUCTION:\n" | |
| "Output ONLY valid JSON." | |
| ) | |
| try: | |
| response = model.create_chat_completion( | |
| messages=[ | |
| {"role": "system", "content": sys_prompt}, | |
| {"role": "user", "content": query} | |
| ], | |
| max_tokens=15, | |
| temperature=0.0, # Zero temp forces strict pattern following | |
| response_format={"type": "json_object"} | |
| ) | |
| return json.loads(response['choices'][0]['message']['content']) | |
| except: | |
| # Fallback: Agar kuch samajh na aaye to Chat | |
| return {"intent": "chat"} | |
| async def analyze_intent(query: str): | |
| try: | |
| return await asyncio.wait_for(asyncio.to_thread(_predict_sync, query), timeout=2.5) | |
| except: | |
| return {"intent": "chat"} | |