Nexari-Research commited on
Commit
d6a6892
·
verified ·
1 Parent(s): 2bcbe8d

Update chat_model.py

Browse files
Files changed (1) hide show
  1. chat_model.py +7 -4
chat_model.py CHANGED
@@ -8,8 +8,10 @@ logger = logging.getLogger("nexari.chat")
8
  BASE_DIR = "./models/chat"
9
  model = None
10
 
 
 
11
  REPO_ID = "bartowski/Llama-3.2-3B-Instruct-GGUF"
12
- FILENAME = "Llama-3.2-3B-Instruct-Q6_K.gguf"
13
 
14
  def load_model(local_dir: str = None):
15
  global model
@@ -17,18 +19,19 @@ def load_model(local_dir: str = None):
17
  try:
18
  os.makedirs(local_dir, exist_ok=True)
19
  path = os.path.join(local_dir, FILENAME)
 
20
  if not os.path.exists(path):
 
21
  hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=local_dir)
22
 
23
- # 2 Threads Strictly
24
  model = Llama(
25
  model_path=path,
26
  n_ctx=4096,
27
  n_threads=2,
28
- n_batch=256,
29
  verbose=False
30
  )
31
- logger.info("✅ Chat Model Ready")
32
  return model
33
  except Exception as e:
34
  logger.error(f"Chat Load Error: {e}")
 
8
  BASE_DIR = "./models/chat"
9
  model = None
10
 
11
+ # === OPTIMIZED: Llama 3.2 3B (Q4_K_M) ===
12
+ # Using Q4_K_M reduces memory bandwidth pressure on the CPU significantly.
13
  REPO_ID = "bartowski/Llama-3.2-3B-Instruct-GGUF"
14
+ FILENAME = "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
15
 
16
  def load_model(local_dir: str = None):
17
  global model
 
19
  try:
20
  os.makedirs(local_dir, exist_ok=True)
21
  path = os.path.join(local_dir, FILENAME)
22
+
23
  if not os.path.exists(path):
24
+ logger.info(f"⬇️ Downloading Chat Model (Fast Q4)...")
25
  hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=local_dir)
26
 
 
27
  model = Llama(
28
  model_path=path,
29
  n_ctx=4096,
30
  n_threads=2,
31
+ n_batch=512, # Helps process "Search Results" text block faster
32
  verbose=False
33
  )
34
+ logger.info("✅ Chat Model Ready (Turbo Mode)")
35
  return model
36
  except Exception as e:
37
  logger.error(f"Chat Load Error: {e}")