CodexTrouter / ProTalk_ModelBuilder.py
prelington's picture
Update ProTalk_ModelBuilder.py
773f5ca verified
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import torch
import threading
model_name = "microsoft/phi-2"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
low_cpu_mem_usage=True
).to(device)
system_prompt = (
"You are ProTalk, a professional and intelligent AI. "
"You answer clearly, politely, and with insight. "
"Be professional, witty, and helpful in all responses."
)
def chat_loop():
history = []
print("ProTalk Online — type 'exit' to quit.\n")
while True:
user_input = input("User: ")
if user_input.lower() == "exit":
break
prompt = system_prompt + "\n" + "\n".join(history) + f"\nUser: {user_input}\nProTalk:"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
thread = threading.Thread(target=model.generate, kwargs={
"input_ids": inputs["input_ids"],
"max_new_tokens": 200,
"do_sample": True,
"temperature": 0.7,
"top_p": 0.9,
"streamer": streamer
})
thread.start()
output_text = ""
for token in streamer:
print(token, end="", flush=True)
output_text += token
thread.join()
print()
history.append(f"User: {user_input}")
history.append(f"ProTalk: {output_text}")
if __name__ == "__main__":
chat_loop()