Spaces:

Nexari-Research
/

Nexari-G1.1

Running

App Files Files Community

Nexari-G1.1 / router_model.py

Nexari-Research

Update router_model.py

2c066d4 verified about 17 hours ago

raw

history blame contribute delete

5.07 kB

	import os
	import logging
	import json
	import asyncio
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama

	logger = logging.getLogger("nexari.router")

	# === MODEL: Google Gemma 2 2B (Smart Context) ===
	REPO_ID = "bartowski/gemma-2-2b-it-GGUF"
	FILENAME = "gemma-2-2b-it-Q6_K.gguf"
	BASE_DIR = "./models/router"

	model = None

	# === 1. CODING TRIGGERS (Technical Keywords) ===
	# In words ka matlab hi hai ki user code chahta hai.
	CODING_KEYWORDS = {
	"python", "javascript", "java", "cpp", "c++", "html", "css", "php", "sql",
	"code", "script", "function", "variable", "syntax", "error", "debug",
	"terminal", "console", "json", "api", "framework", "react", "node", "django",
	"compiler", "program", "develop", "algorithm"
	}

	# === 2. REAL-TIME TRIGGERS (Search) ===
	SEARCH_KEYWORDS = {
	"latest", "current", "news", "today", "now", "live", "price", "stock",
	"weather", "forecast", "cricket", "score", "match", "result", "winner",
	"gold rate", "bitcoin", "btc", "who is the", "upcoming", "release date"
	}

	# === 3. TIME TRIGGERS ===
	TIME_KEYWORDS = {
	"time", "clock", "date", "day", "calendar", "samay", "baj rahe", "ghadi"
	}

	def load_model():
	global model
	try:
	os.makedirs(BASE_DIR, exist_ok=True)
	path = os.path.join(BASE_DIR, FILENAME)
	if not os.path.exists(path):
	hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=BASE_DIR)

	# Context window 4096 kar diya taaki 16 examples fit ho sakein
	model = Llama(model_path=path, n_ctx=4096, n_threads=2, verbose=False)
	logger.info("✅ Router Logic Loaded (Hybrid Mode)")
	except Exception as e:
	logger.error(f"Router Fail: {e}")

	async def load_model_async():
	return await asyncio.to_thread(load_model)

	def _predict_sync(query: str):
	if not model: return {"intent": "chat"}
	q_lower = query.lower().strip()

	# --- LAYER 1: STRICT CODING CHECK (Zero Latency) ---
	# Agar 'HTML', 'Python' jaisa word hai, to AI se mat pucho. Seedha Code karo.
	# Exception: "What is HTML" (Definition) -> Chat
	has_code_word = any(k in q_lower for k in CODING_KEYWORDS)
	is_definition = q_lower.startswith(("what is", "define", "explain"))

	if has_code_word and not is_definition:
	return {"intent": "coding"}

	# "Create/Make/Build" logic
	if any(v in q_lower for v in ["create", "make", "build", "write"]) and \
	any(n in q_lower for n in ["app", "game", "calculator", "website", "tool"]):
	return {"intent": "coding"}

	# --- LAYER 2: STRICT SEARCH & TIME ---
	if any(k in q_lower for k in SEARCH_KEYWORDS):
	return {"intent": "search"}

	if any(k in q_lower for k in TIME_KEYWORDS) and len(q_lower.split()) < 15:
	return {"intent": "time"}

	# --- LAYER 3: AI BRAIN (16-Shot Prompting) ---
	# Agar upar wale rules fail huye, tab AI intelligence use karega.

	sys_prompt = (
	"You are the Router AI. Classify the user query into ONE category: 'coding', 'search', 'time', or 'chat'.\n\n"
	"### EXAMPLES (Study these 16 scenarios):\n"
	"1. 'Create a simple calculator using HTML' -> {\"intent\": \"coding\"}\n"
	"2. 'Write a python script to scrape data' -> {\"intent\": \"coding\"}\n"
	"3. 'Fix this NameError in my code' -> {\"intent\": \"coding\"}\n"
	"4. 'How do I center a div in CSS?' -> {\"intent\": \"coding\"}\n"
	"5. 'Who won the match yesterday?' -> {\"intent\": \"search\"}\n"
	"6. 'Current price of Bitcoin' -> {\"intent\": \"search\"}\n"
	"7. 'Weather forecast for tomorrow' -> {\"intent\": \"search\"}\n"
	"8. 'Latest news about AI' -> {\"intent\": \"search\"}\n"
	"9. 'What is the time right now?' -> {\"intent\": \"time\"}\n"
	"10. 'My watch stopped, kya time hua hai?' -> {\"intent\": \"time\"}\n"
	"11. 'Date today' -> {\"intent\": \"time\"}\n"
	"12. 'Tell me a joke' -> {\"intent\": \"chat\"}\n"
	"13. 'Who are you?' -> {\"intent\": \"chat\"}\n"
	"14. 'Explain the theory of relativity' -> {\"intent\": \"chat\"}\n"
	"15. 'What is the meaning of life?' -> {\"intent\": \"chat\"}\n"
	"16. 'Make a snake game' -> {\"intent\": \"coding\"}\n\n"
	"### INSTRUCTION:\n"
	"Output ONLY valid JSON."
	)

	try:
	response = model.create_chat_completion(
	messages=[
	{"role": "system", "content": sys_prompt},
	{"role": "user", "content": query}
	],
	max_tokens=15,
	temperature=0.0, # Zero temp forces strict pattern following
	response_format={"type": "json_object"}
	)
	return json.loads(response['choices'][0]['message']['content'])
	except:
	# Fallback: Agar kuch samajh na aaye to Chat
	return {"intent": "chat"}

	async def analyze_intent(query: str):
	try:
	return await asyncio.wait_for(asyncio.to_thread(_predict_sync, query), timeout=2.5)
	except:
	return {"intent": "chat"}