preethishsg commited on
Commit
299dff9
·
verified ·
1 Parent(s): 8287c4e

Update vector_db.py

Browse files
Files changed (1) hide show
  1. vector_db.py +19 -4
vector_db.py CHANGED
@@ -12,15 +12,30 @@ class VectorDB:
12
 
13
  def load_documents(self, path="documents.json"):
14
  with open(path, "r", encoding="utf-8") as f:
15
- self.documents = json.load(f)
16
-
17
- texts = [doc["content"] for doc in self.documents]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  embeddings = self.model.encode(texts, convert_to_numpy=True)
19
-
20
  dim = embeddings.shape[1]
21
  self.index = faiss.IndexFlatL2(dim)
22
  self.index.add(embeddings)
23
 
 
24
  def search(self, query, top_k=3):
25
  if self.index is None:
26
  return []
 
12
 
13
  def load_documents(self, path="documents.json"):
14
  with open(path, "r", encoding="utf-8") as f:
15
+ raw_docs = json.load(f)
16
+
17
+ self.documents = []
18
+ texts = []
19
+
20
+ for i, doc in enumerate(raw_docs):
21
+ content = doc.get("content") or doc.get("text") or doc.get("data")
22
+ if not content:
23
+ print(f"⚠️ Skipping document {i}: no content/text field")
24
+ continue
25
+
26
+ self.documents.append(doc)
27
+ texts.append(content)
28
+
29
+ if not texts:
30
+ raise ValueError("No valid documents found to index")
31
+
32
  embeddings = self.model.encode(texts, convert_to_numpy=True)
33
+
34
  dim = embeddings.shape[1]
35
  self.index = faiss.IndexFlatL2(dim)
36
  self.index.add(embeddings)
37
 
38
+
39
  def search(self, query, top_k=3):
40
  if self.index is None:
41
  return []