import os import faiss import numpy as np from sentence_transformers import SentenceTransformer from langchain_community.vectorstores import FAISS from langchain_community.document_loaders import PyPDFLoader, TextLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from llama_cpp import Llama # Embedder embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") # LLM llm = Llama( model_path="model/qwen2_5-7b-instruct-q4_K_M.gguf", n_ctx=4096, n_threads=4, chat_format="chatml" ) def load_documents(): docs = [] for file in os.listdir("docs"): path = f"docs/{file}" if file.endswith(".pdf"): loader = PyPDFLoader(path) else: loader = TextLoader(path) docs.extend(loader.load()) return docs def prepare_vector_store(): if os.path.exists("vectorstore.faiss"): return FAISS.load_local("vectorstore", embedder) docs = load_documents() splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150) chunks = splitter.split_documents(docs) embeddings = embedder.encode([c.page_content for c in chunks]) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(np.array(embeddings).astype("float32")) vectorstore = FAISS(embedding_function=embedder, index=index, docs=chunks) vectorstore.save_local("vectorstore") return vectorstore vectorstore = prepare_vector_store() def ask_rag(question): results = vectorstore.similarity_search(question, k=5) context = "\n".join([r.page_content for r in results]) template = f""" Aşağıdaki bağlama göre soruyu cevapla: BAĞLAM: {context} SORU: {question} Cevap: """ out = llm( template, max_tokens=500, temperature=0.4, stop=[""] ) return out["choices"][0]["text"].strip()