Spaces:

preethishsg
/

reg-system-backend

Running

preethishsg commited on 1 day ago

Commit

299dff9

verified ·

1 Parent(s): 8287c4e

Update vector_db.py

Files changed (1) hide show

vector_db.py CHANGED Viewed

@@ -12,15 +12,30 @@ class VectorDB:
     def load_documents(self, path="documents.json"):
         with open(path, "r", encoding="utf-8") as f:
-            self.documents = json.load(f)
-        texts = [doc["content"] for doc in self.documents]
         embeddings = self.model.encode(texts, convert_to_numpy=True)
         dim = embeddings.shape[1]
         self.index = faiss.IndexFlatL2(dim)
         self.index.add(embeddings)
     def search(self, query, top_k=3):
         if self.index is None:
             return []

     def load_documents(self, path="documents.json"):
         with open(path, "r", encoding="utf-8") as f:
+            raw_docs = json.load(f)
+        self.documents = []
+        texts = []
+        for i, doc in enumerate(raw_docs):
+            content = doc.get("content") or doc.get("text") or doc.get("data")
+            if not content:
+                print(f"⚠️ Skipping document {i}: no content/text field")
+                continue
+            self.documents.append(doc)
+            texts.append(content)
+        if not texts:
+            raise ValueError("No valid documents found to index")
         embeddings = self.model.encode(texts, convert_to_numpy=True)
         dim = embeddings.shape[1]
         self.index = faiss.IndexFlatL2(dim)
         self.index.add(embeddings)
     def search(self, query, top_k=3):
         if self.index is None:
             return []