Nexari-G1.1 / router_model.py
Nexari-Research's picture
Update router_model.py
2c066d4 verified
import os
import logging
import json
import asyncio
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
logger = logging.getLogger("nexari.router")
# === MODEL: Google Gemma 2 2B (Smart Context) ===
REPO_ID = "bartowski/gemma-2-2b-it-GGUF"
FILENAME = "gemma-2-2b-it-Q6_K.gguf"
BASE_DIR = "./models/router"
model = None
# === 1. CODING TRIGGERS (Technical Keywords) ===
# In words ka matlab hi hai ki user code chahta hai.
CODING_KEYWORDS = {
"python", "javascript", "java", "cpp", "c++", "html", "css", "php", "sql",
"code", "script", "function", "variable", "syntax", "error", "debug",
"terminal", "console", "json", "api", "framework", "react", "node", "django",
"compiler", "program", "develop", "algorithm"
}
# === 2. REAL-TIME TRIGGERS (Search) ===
SEARCH_KEYWORDS = {
"latest", "current", "news", "today", "now", "live", "price", "stock",
"weather", "forecast", "cricket", "score", "match", "result", "winner",
"gold rate", "bitcoin", "btc", "who is the", "upcoming", "release date"
}
# === 3. TIME TRIGGERS ===
TIME_KEYWORDS = {
"time", "clock", "date", "day", "calendar", "samay", "baj rahe", "ghadi"
}
def load_model():
global model
try:
os.makedirs(BASE_DIR, exist_ok=True)
path = os.path.join(BASE_DIR, FILENAME)
if not os.path.exists(path):
hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=BASE_DIR)
# Context window 4096 kar diya taaki 16 examples fit ho sakein
model = Llama(model_path=path, n_ctx=4096, n_threads=2, verbose=False)
logger.info("βœ… Router Logic Loaded (Hybrid Mode)")
except Exception as e:
logger.error(f"Router Fail: {e}")
async def load_model_async():
return await asyncio.to_thread(load_model)
def _predict_sync(query: str):
if not model: return {"intent": "chat"}
q_lower = query.lower().strip()
# --- LAYER 1: STRICT CODING CHECK (Zero Latency) ---
# Agar 'HTML', 'Python' jaisa word hai, to AI se mat pucho. Seedha Code karo.
# Exception: "What is HTML" (Definition) -> Chat
has_code_word = any(k in q_lower for k in CODING_KEYWORDS)
is_definition = q_lower.startswith(("what is", "define", "explain"))
if has_code_word and not is_definition:
return {"intent": "coding"}
# "Create/Make/Build" logic
if any(v in q_lower for v in ["create", "make", "build", "write"]) and \
any(n in q_lower for n in ["app", "game", "calculator", "website", "tool"]):
return {"intent": "coding"}
# --- LAYER 2: STRICT SEARCH & TIME ---
if any(k in q_lower for k in SEARCH_KEYWORDS):
return {"intent": "search"}
if any(k in q_lower for k in TIME_KEYWORDS) and len(q_lower.split()) < 15:
return {"intent": "time"}
# --- LAYER 3: AI BRAIN (16-Shot Prompting) ---
# Agar upar wale rules fail huye, tab AI intelligence use karega.
sys_prompt = (
"You are the Router AI. Classify the user query into ONE category: 'coding', 'search', 'time', or 'chat'.\n\n"
"### EXAMPLES (Study these 16 scenarios):\n"
"1. 'Create a simple calculator using HTML' -> {\"intent\": \"coding\"}\n"
"2. 'Write a python script to scrape data' -> {\"intent\": \"coding\"}\n"
"3. 'Fix this NameError in my code' -> {\"intent\": \"coding\"}\n"
"4. 'How do I center a div in CSS?' -> {\"intent\": \"coding\"}\n"
"5. 'Who won the match yesterday?' -> {\"intent\": \"search\"}\n"
"6. 'Current price of Bitcoin' -> {\"intent\": \"search\"}\n"
"7. 'Weather forecast for tomorrow' -> {\"intent\": \"search\"}\n"
"8. 'Latest news about AI' -> {\"intent\": \"search\"}\n"
"9. 'What is the time right now?' -> {\"intent\": \"time\"}\n"
"10. 'My watch stopped, kya time hua hai?' -> {\"intent\": \"time\"}\n"
"11. 'Date today' -> {\"intent\": \"time\"}\n"
"12. 'Tell me a joke' -> {\"intent\": \"chat\"}\n"
"13. 'Who are you?' -> {\"intent\": \"chat\"}\n"
"14. 'Explain the theory of relativity' -> {\"intent\": \"chat\"}\n"
"15. 'What is the meaning of life?' -> {\"intent\": \"chat\"}\n"
"16. 'Make a snake game' -> {\"intent\": \"coding\"}\n\n"
"### INSTRUCTION:\n"
"Output ONLY valid JSON."
)
try:
response = model.create_chat_completion(
messages=[
{"role": "system", "content": sys_prompt},
{"role": "user", "content": query}
],
max_tokens=15,
temperature=0.0, # Zero temp forces strict pattern following
response_format={"type": "json_object"}
)
return json.loads(response['choices'][0]['message']['content'])
except:
# Fallback: Agar kuch samajh na aaye to Chat
return {"intent": "chat"}
async def analyze_intent(query: str):
try:
return await asyncio.wait_for(asyncio.to_thread(_predict_sync, query), timeout=2.5)
except:
return {"intent": "chat"}