Spaces:

preethishsg
/

reg-system-backend

Running

App Files Files Community

preethishsg commited on 1 day ago

Commit

1631730

verified ·

1 Parent(s): 36d88d5

Upload 4 files

Browse files

Files changed (4) hide show

main.py +161 -0
rag_system.py +214 -0
requirements.txt +11 -0
vector_db.py +118 -0

main.py ADDED Viewed

	@@ -0,0 +1,161 @@

+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import List, Dict, Optional
+import uvicorn
+from pathlib import Path
+from rag_system import RAGSystem, initialize_from_documents
+app = FastAPI(title="RAG System API", version="1.0.0")
+# CORS middleware for frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Global RAG system instance
+rag_system: Optional[RAGSystem] = None
+DB_PATH = "vector_db.json"
+# Request/Response Models
+class Document(BaseModel):
+    text: str
+    metadata: Optional[Dict] = None
+class InsertRequest(BaseModel):
+    documents: List[Document]
+class InsertResponse(BaseModel):
+    success: bool
+    document_ids: List[str]
+    message: str
+class SearchRequest(BaseModel):
+    query: str
+    k: int = 5
+class SearchResponse(BaseModel):
+    results: List[Dict]
+class QueryRequest(BaseModel):
+    query: str
+    k: int = 3
+    max_length: int = 150
+class QueryResponse(BaseModel):
+    query: str
+    answer: str
+    retrieved_documents: List[Dict]
+    context: str
+class StatsResponse(BaseModel):
+    total_documents: int
+    dimension: int
+    next_id: int
+@app.on_event("startup")
+async def startup_event():
+    """Initialize RAG system on startup"""
+    global rag_system
+    print("Starting RAG System...")
+    # Check if we need to initialize from documents.json
+    documents_path = Path("documents.json")
+    if documents_path.exists() and not Path(DB_PATH).exists():
+        print("Initializing database from documents.json...")
+        rag_system = initialize_from_documents(str(documents_path), DB_PATH)
+    else:
+        print("Loading existing database...")
+        rag_system = RAGSystem(db_path=DB_PATH if Path(DB_PATH).exists() else None)
+    print("RAG System ready!")
+@app.get("/")
+async def root():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "message": "RAG System API is running",
+        "version": "1.0.0"
+    }
+@app.get("/stats", response_model=StatsResponse)
+async def get_stats():
+    """Get database statistics"""
+    if rag_system is None:
+        raise HTTPException(status_code=500, detail="RAG system not initialized")
+    stats = rag_system.get_stats()
+    return StatsResponse(**stats)
+@app.post("/insert", response_model=InsertResponse)
+async def insert_documents(request: InsertRequest):
+    """Insert documents into the vector database"""
+    if rag_system is None:
+        raise HTTPException(status_code=500, detail="RAG system not initialized")
+    try:
+        # Convert Pydantic models to dicts
+        documents = []
+        for doc in request.documents:
+            doc_dict = {"text": doc.text}
+            if doc.metadata:
+                doc_dict.update(doc.metadata)
+            documents.append(doc_dict)
+        # Insert documents
+        doc_ids = rag_system.insert_documents(documents)
+        # Save database
+        rag_system.save_db(DB_PATH)
+        return InsertResponse(
+            success=True,
+            document_ids=doc_ids,
+            message=f"Successfully inserted {len(doc_ids)} documents"
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error inserting documents: {str(e)}")
+@app.post("/search", response_model=SearchResponse)
+async def search_documents(request: SearchRequest):
+    """Search for similar documents"""
+    if rag_system is None:
+        raise HTTPException(status_code=500, detail="RAG system not initialized")
+    try:
+        results = rag_system.retrieve(request.query, k=request.k)
+        return SearchResponse(results=results)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error searching documents: {str(e)}")
+@app.post("/query", response_model=QueryResponse)
+async def query_rag(request: QueryRequest):
+    """Complete RAG query: retrieve +ßgenerate"""
+    if rag_system is None:
+        raise HTTPException(status_code=500, detail="RAG system not initialized")
+    try:
+        result = rag_system.query(
+            request.query,
+            k=request.k,
+            max_length=request.max_length
+        )
+        return QueryResponse(**result)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}")
+if __name__ == "__main__":
+    import os
+    port = int(os.environ.get("PORT", 8080))
+    uvicorn.run(app, host="0.0.0.0", port=port)

rag_system.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import os
+import torch
+import requests
+import numpy as np
+from typing import List, Dict
+from pathlib import Path
+from transformers import AutoTokenizer, AutoModel
+from vector_db import VectorDatabase
+class RAGSystem:
+    """
+    RAG System:
+    - Local embeddings using BGE-micro
+    - Custom vector database for retrieval
+    - Hosted lightweight LLM (Hugging Face Inference API) for generation
+    """
+    def __init__(self, db_path: str = None):
+        print("Initializing RAG System...")
+        # -----------------------------
+        # Embedding Model (Local)
+        # -----------------------------
+        print("Loading embedding model (BGE-micro)...")
+        self.embed_tokenizer = AutoTokenizer.from_pretrained("TaylorAI/bge-micro")
+        self.embed_model = AutoModel.from_pretrained("TaylorAI/bge-micro")
+        self.embed_model.eval()
+        # -----------------------------
+        # Vector Database
+        # -----------------------------
+        if db_path and Path(db_path).exists():
+            print(f"Loading vector DB from {db_path}")
+            self.db = VectorDatabase.load(db_path)
+        else:
+            print("Creating new vector DB")
+            self.db = VectorDatabase(dimension=384)
+        # -----------------------------
+        # Hosted LLM Config
+        # -----------------------------
+        self.hf_api_token = os.getenv("HF_API_TOKEN")
+        self.hf_model_url = (
+            "https://api-inference.huggingface.co/models/google/flan-t5-small"
+        )
+        if not self.hf_api_token:
+            print("WARNING: HF_API_TOKEN not set. Generation will fail.")
+        print("RAG System initialized successfully!")
+    # --------------------------------------------------
+    # Embedding
+    # --------------------------------------------------
+    def encode_text(self, text: str) -> np.ndarray:
+        with torch.no_grad():
+            inputs = self.embed_tokenizer(
+                text,
+                padding=True,
+                truncation=True,
+                max_length=512,
+                return_tensors="pt",
+            )
+            outputs = self.embed_model(**inputs)
+            embedding = outputs.last_hidden_state[:, 0, :].numpy()
+        return embedding[0]
+    def encode_batch(self, texts: List[str]) -> List[np.ndarray]:
+        return [self.encode_text(text) for text in texts]
+    # --------------------------------------------------
+    # Insert
+    # --------------------------------------------------
+    def insert_documents(self, documents: List[Dict]) -> List[str]:
+        texts = []
+        processed_docs = []
+        for doc in documents:
+            text = doc.get("data") or doc.get("text", "")
+            texts.append(text)
+            metadata = {"text": text}
+            for k, v in doc.items():
+                if k not in ["data", "text"]:
+                    metadata[k] = v
+            processed_docs.append(metadata)
+        embeddings = self.encode_batch(texts)
+        return self.db.batch_insert(embeddings, processed_docs)
+    # --------------------------------------------------
+    # Retrieve
+    # --------------------------------------------------
+    def retrieve(self, query: str, k: int = 5) -> List[Dict]:
+        query_embedding = self.encode_text(query)
+        results = self.db.search(query_embedding, k=k)
+        return [
+            {"id": doc_id, "score": score, "metadata": metadata}
+            for doc_id, score, metadata in results
+        ]
+    # --------------------------------------------------
+    # Hosted LLM Generation (Optimized Prompt)
+    # --------------------------------------------------
+    def generate_response(self, query: str, context: str, max_length: int = 150) -> str:
+        if not self.hf_api_token:
+            return "HF_API_TOKEN not configured."
+        headers = {
+            "Authorization": f"Bearer {self.hf_api_token}",
+            "Content-Type": "application/json",
+        }
+        # 🔥 Optimized RAG Prompt
+        prompt = f"""
+You are an intelligent assistant answering questions strictly using the provided context.
+Rules:
+- Use only the given context.
+- If the answer is not present, say: "The information is not available in the provided documents."
+- Answer clearly and concisely.
+Context:
+{context}
+Question:
+{query}
+Answer:
+"""
+        payload = {
+            "inputs": prompt.strip(),
+            "parameters": {
+                "max_new_tokens": max_length,
+                "temperature": 0.2,
+                "top_p": 0.9,
+                "do_sample": False,
+            },
+        }
+        try:
+            response = requests.post(
+                self.hf_model_url,
+                headers=headers,
+                json=payload,
+                timeout=30,
+            )
+            response.raise_for_status()
+            result = response.json()
+            if isinstance(result, list) and "generated_text" in result[0]:
+                return result[0]["generated_text"].strip()
+            return str(result)
+        except Exception as e:
+            return f"LLM generation error: {str(e)}"
+    # --------------------------------------------------
+    # Full RAG Query
+    # --------------------------------------------------
+    def query(self, query: str, k: int = 3, max_length: int = 150) -> Dict:
+        retrieved_docs = self.retrieve(query, k=k)
+        if not retrieved_docs:
+            return {
+                "query": query,
+                "answer": "No relevant documents found.",
+                "retrieved_documents": [],
+                "context": "",
+            }
+        context = " ".join(
+            doc["metadata"].get("text", "") for doc in retrieved_docs
+        )
+        answer = self.generate_response(query, context, max_length)
+        return {
+            "query": query,
+            "answer": answer,
+            "retrieved_documents": retrieved_docs,
+            "context": context[:500],
+        }
+    # --------------------------------------------------
+    # Utilities
+    # --------------------------------------------------
+    def save_db(self, filepath: str):
+        self.db.save(filepath)
+    def get_stats(self) -> Dict:
+        return self.db.stats()
+def initialize_from_documents(json_path: str, db_path: str = "vector_db.json"):
+    import json
+    rag = RAGSystem()
+    with open(json_path, "r") as f:
+        documents = json.load(f)
+    print(f"Loading {len(documents)} documents...")
+    rag.insert_documents(documents)
+    rag.save_db(db_path)
+    print("Database initialized successfully.")
+    return rag

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+pydantic==2.5.0
+torch==2.5.1
+transformers==4.35.0
+numpy==1.24.3
+python-multipart==0.0.6
+sentencepiece==0.1.99
+accelerate==0.24.1
+openai
+requests==2.31.0

vector_db.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import numpy as np
+from typing import List, Dict, Tuple
+import json
+from pathlib import Path
+class VectorDatabase:
+    """
+    Custom vector database with flat index supporting:
+    - Insert operations (single and batch)
+    - Top-k search using dot product similarity
+    """
+    def __init__(self, dimension: int = 384):
+        self.dimension = dimension
+        self.vectors = []
+        self.metadata = []
+        self.ids = []
+        self.next_id = 0
+    def insert(self, vector: np.ndarray, metadata: Dict = None) -> str:
+        """Insert a single vector with optional metadata"""
+        if vector.shape[0] != self.dimension:
+            raise ValueError(f"Vector dimension {vector.shape[0]} doesn't match database dimension {self.dimension}")
+        doc_id = f"doc_{self.next_id}"
+        self.next_id += 1
+        self.vectors.append(vector)
+        self.metadata.append(metadata or {})
+        self.ids.append(doc_id)
+        return doc_id
+    def batch_insert(self, vectors: List[np.ndarray], metadata_list: List[Dict] = None) -> List[str]:
+        """Insert multiple vectors at once"""
+        if metadata_list is None:
+            metadata_list = [{}] * len(vectors)
+        if len(vectors) != len(metadata_list):
+            raise ValueError("Number of vectors and metadata entries must match")
+        doc_ids = []
+        for vector, metadata in zip(vectors, metadata_list):
+            doc_id = self.insert(vector, metadata)
+            doc_ids.append(doc_id)
+        return doc_ids
+    def search(self, query_vector: np.ndarray, k: int = 5) -> List[Tuple[str, float, Dict]]:
+        """
+        Search for top-k most similar vectors using dot product similarity
+        Returns: List of (doc_id, similarity_score, metadata) tuples
+        """
+        if len(self.vectors) == 0:
+            return []
+        if query_vector.shape[0] != self.dimension:
+            raise ValueError(f"Query vector dimension {query_vector.shape[0]} doesn't match database dimension {self.dimension}")
+        # Normalize query vector for dot product similarity
+        query_norm = query_vector / (np.linalg.norm(query_vector) + 1e-8)
+        # Calculate dot product with all vectors
+        similarities = []
+        for i, vec in enumerate(self.vectors):
+            vec_norm = vec / (np.linalg.norm(vec) + 1e-8)
+            similarity = np.dot(query_norm, vec_norm)
+            similarities.append((i, similarity))
+        # Sort by similarity (descending)
+        similarities.sort(key=lambda x: x[1], reverse=True)
+        # Return top-k results
+        k = min(k, len(similarities))
+        results = []
+        for i, sim in similarities[:k]:
+            results.append((self.ids[i], float(sim), self.metadata[i]))
+        return results
+    def save(self, filepath: str):
+        """Save database to disk"""
+        data = {
+            'dimension': self.dimension,
+            'vectors': [v.tolist() for v in self.vectors],
+            'metadata': self.metadata,
+            'ids': self.ids,
+            'next_id': self.next_id
+        }
+        Path(filepath).parent.mkdir(parents=True, exist_ok=True)
+        with open(filepath, 'w') as f:
+            json.dump(data, f)
+    @classmethod
+    def load(cls, filepath: str) -> 'VectorDatabase':
+        """Load database from disk"""
+        with open(filepath, 'r') as f:
+            data = json.load(f)
+        db = cls(dimension=data['dimension'])
+        db.vectors = [np.array(v) for v in data['vectors']]
+        db.metadata = data['metadata']
+        db.ids = data['ids']
+        db.next_id = data['next_id']
+        return db
+    def __len__(self):
+        return len(self.vectors)
+    def stats(self) -> Dict:
+        """Return database statistics"""
+        return {
+            'total_documents': len(self.vectors),
+            'dimension': self.dimension,
+            'next_id': self.next_id
+        }