Spaces:

Nexari-Research
/

Nexari-G1.1

Running

Nexari-Research commited on about 6 hours ago

Commit

d6a6892

verified ·

1 Parent(s): 2bcbe8d

Update chat_model.py

Files changed (1) hide show

chat_model.py CHANGED Viewed

@@ -8,8 +8,10 @@ logger = logging.getLogger("nexari.chat")
 BASE_DIR = "./models/chat"
 model = None
 REPO_ID = "bartowski/Llama-3.2-3B-Instruct-GGUF"
-FILENAME = "Llama-3.2-3B-Instruct-Q6_K.gguf"
 def load_model(local_dir: str = None):
     global model
@@ -17,18 +19,19 @@ def load_model(local_dir: str = None):
     try:
         os.makedirs(local_dir, exist_ok=True)
         path = os.path.join(local_dir, FILENAME)
         if not os.path.exists(path):
             hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=local_dir)
-        # 2 Threads Strictly
         model = Llama(
             model_path=path,
             n_ctx=4096,
             n_threads=2,
-            n_batch=256,
             verbose=False
         )
-        logger.info("✅ Chat Model Ready")
         return model
     except Exception as e:
         logger.error(f"Chat Load Error: {e}")

 BASE_DIR = "./models/chat"
 model = None
+# === OPTIMIZED: Llama 3.2 3B (Q4_K_M) ===
+# Using Q4_K_M reduces memory bandwidth pressure on the CPU significantly.
 REPO_ID = "bartowski/Llama-3.2-3B-Instruct-GGUF"
+FILENAME = "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
 def load_model(local_dir: str = None):
     global model
     try:
         os.makedirs(local_dir, exist_ok=True)
         path = os.path.join(local_dir, FILENAME)
         if not os.path.exists(path):
+            logger.info(f"⬇️ Downloading Chat Model (Fast Q4)...")
             hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=local_dir)
         model = Llama(
             model_path=path,
             n_ctx=4096,
             n_threads=2,
+            n_batch=512, # Helps process "Search Results" text block faster
             verbose=False
         )
+        logger.info("✅ Chat Model Ready (Turbo Mode)")
         return model
     except Exception as e:
         logger.error(f"Chat Load Error: {e}")