Spaces:
Running
Running
Update chat_model.py
Browse files- chat_model.py +7 -4
chat_model.py
CHANGED
|
@@ -8,8 +8,10 @@ logger = logging.getLogger("nexari.chat")
|
|
| 8 |
BASE_DIR = "./models/chat"
|
| 9 |
model = None
|
| 10 |
|
|
|
|
|
|
|
| 11 |
REPO_ID = "bartowski/Llama-3.2-3B-Instruct-GGUF"
|
| 12 |
-
FILENAME = "Llama-3.2-3B-Instruct-
|
| 13 |
|
| 14 |
def load_model(local_dir: str = None):
|
| 15 |
global model
|
|
@@ -17,18 +19,19 @@ def load_model(local_dir: str = None):
|
|
| 17 |
try:
|
| 18 |
os.makedirs(local_dir, exist_ok=True)
|
| 19 |
path = os.path.join(local_dir, FILENAME)
|
|
|
|
| 20 |
if not os.path.exists(path):
|
|
|
|
| 21 |
hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=local_dir)
|
| 22 |
|
| 23 |
-
# 2 Threads Strictly
|
| 24 |
model = Llama(
|
| 25 |
model_path=path,
|
| 26 |
n_ctx=4096,
|
| 27 |
n_threads=2,
|
| 28 |
-
n_batch=
|
| 29 |
verbose=False
|
| 30 |
)
|
| 31 |
-
logger.info("✅ Chat Model Ready")
|
| 32 |
return model
|
| 33 |
except Exception as e:
|
| 34 |
logger.error(f"Chat Load Error: {e}")
|
|
|
|
| 8 |
BASE_DIR = "./models/chat"
|
| 9 |
model = None
|
| 10 |
|
| 11 |
+
# === OPTIMIZED: Llama 3.2 3B (Q4_K_M) ===
|
| 12 |
+
# Using Q4_K_M reduces memory bandwidth pressure on the CPU significantly.
|
| 13 |
REPO_ID = "bartowski/Llama-3.2-3B-Instruct-GGUF"
|
| 14 |
+
FILENAME = "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
|
| 15 |
|
| 16 |
def load_model(local_dir: str = None):
|
| 17 |
global model
|
|
|
|
| 19 |
try:
|
| 20 |
os.makedirs(local_dir, exist_ok=True)
|
| 21 |
path = os.path.join(local_dir, FILENAME)
|
| 22 |
+
|
| 23 |
if not os.path.exists(path):
|
| 24 |
+
logger.info(f"⬇️ Downloading Chat Model (Fast Q4)...")
|
| 25 |
hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=local_dir)
|
| 26 |
|
|
|
|
| 27 |
model = Llama(
|
| 28 |
model_path=path,
|
| 29 |
n_ctx=4096,
|
| 30 |
n_threads=2,
|
| 31 |
+
n_batch=512, # Helps process "Search Results" text block faster
|
| 32 |
verbose=False
|
| 33 |
)
|
| 34 |
+
logger.info("✅ Chat Model Ready (Turbo Mode)")
|
| 35 |
return model
|
| 36 |
except Exception as e:
|
| 37 |
logger.error(f"Chat Load Error: {e}")
|