reg-system-backend / vector_db.py
preethishsg's picture
Update vector_db.py
299dff9 verified
import json
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
class VectorDB:
def __init__(self):
self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
self.index = None
self.documents = []
def load_documents(self, path="documents.json"):
with open(path, "r", encoding="utf-8") as f:
raw_docs = json.load(f)
self.documents = []
texts = []
for i, doc in enumerate(raw_docs):
content = doc.get("content") or doc.get("text") or doc.get("data")
if not content:
print(f"⚠️ Skipping document {i}: no content/text field")
continue
self.documents.append(doc)
texts.append(content)
if not texts:
raise ValueError("No valid documents found to index")
embeddings = self.model.encode(texts, convert_to_numpy=True)
dim = embeddings.shape[1]
self.index = faiss.IndexFlatL2(dim)
self.index.add(embeddings)
def search(self, query, top_k=3):
if self.index is None:
return []
query_embedding = self.model.encode([query], convert_to_numpy=True)
distances, indices = self.index.search(query_embedding, top_k)
results = []
for idx in indices[0]:
results.append(self.documents[idx]["content"])
return results