Spaces:

achraf2203
/

RAG-Chatbot

Sleeping

App Files Files Community

mohamedachraf commited on Aug 3

Commit

f8a9e21

1 Parent(s): 8dc5c8f

Add application file

Browse files

Files changed (1) hide show

app.py +30 -14

app.py CHANGED Viewed

@@ -138,8 +138,16 @@ def get_retrieval_qa_chain(text_file, hf_model, use_multi_query=False):
     retriever = default_retriever
     vectorstore = default_vectorstore
-    if text_file != default_text_file:
-        retriever, vectorstore = prepare_vector_store_retriever(text_file)
     if use_multi_query:
         # Custom retrieval function for multi-query
@@ -224,11 +232,10 @@ def generate(question, answer, text_file, max_new_tokens, use_multi_query, store
 # replaces the retriever in the question answering chain whenever a new file is uploaded
 def upload_file(file):
     if file is not None:
-        # Save uploaded file to temporary location
-        temp_path = os.path.join(tempfile.gettempdir(), file.name)
-        with open(temp_path, 'wb') as f:
-            f.write(file.read())
-        return file.name, temp_path
     return None, None
@@ -241,24 +248,33 @@ with gr.Blocks() as demo:
   - Support for both PDF and text files
   - Multi-query RAG for improved retrieval
   - Store Q&A pairs in vector database for future reference
-  ### If you don't have one, there is a txt file already loaded, the new Oppenheimer movie's entire wikipedia page. The movie came out very recently in July, 2023, so the Phi-2 model is not aware of it.
-  The context size of the Phi-2 model is 2048 tokens, so even this medium size wikipedia page (11.5k tokens) does not fit in the context window.
-  Retrieval Augmented Generation (RAG) enables us to retrieve just the few small chunks of the document that are relevant to the our query and inject it into our prompt.
-  The model is then able to answer questions by incorporating knowledge from the newly provided document. RAG can be used with thousands of documents, but this demo is limited to just one file at a time.
   """
     )
     default_text_file = "Oppenheimer-movie-wiki.txt"
-    default_retriever, default_vectorstore = prepare_vector_store_retriever(default_text_file)
     text_file = gr.State(default_text_file)
     gr.Markdown(
-        "## Upload a txt or PDF file or Use the Default 'Oppenheimer-movie-wiki.txt' that has already been loaded"
     )
     file_name = gr.Textbox(
-        label="Loaded file", value=default_text_file, lines=1, interactive=False
     )
     upload_button = gr.UploadButton(
         label="Click to upload a text or PDF file", file_types=[".txt", ".pdf"], file_count="single"

     retriever = default_retriever
     vectorstore = default_vectorstore
+    if text_file != default_text_file or default_text_file is None:
+        if text_file is not None and os.path.exists(text_file):
+            retriever, vectorstore = prepare_vector_store_retriever(text_file)
+        else:
+            # Create a dummy retriever if no file is available
+            from langchain.schema import Document
+            dummy_doc = Document(page_content="No document loaded. Please upload a file to get started.")
+            dummy_vectorstore = FAISS.from_documents([dummy_doc], embeddings)
+            retriever = VectorStoreRetriever(vectorstore=dummy_vectorstore, search_kwargs={"k": 1})
+            vectorstore = dummy_vectorstore
     if use_multi_query:
         # Custom retrieval function for multi-query
 # replaces the retriever in the question answering chain whenever a new file is uploaded
 def upload_file(file):
     if file is not None:
+        # In Gradio, file is already a path to the uploaded file
+        file_path = file.name if hasattr(file, 'name') else file
+        filename = os.path.basename(file_path)
+        return filename, file_path
     return None, None
   - Support for both PDF and text files
   - Multi-query RAG for improved retrieval
   - Store Q&A pairs in vector database for future reference
+  ### To get started, upload a text (.txt) or PDF (.pdf) file using the upload button below.
+  The context size of the Phi-2 model is 2048 tokens, so large documents are automatically split into chunks.
+  Retrieval Augmented Generation (RAG) enables us to retrieve just the few small chunks of the document that are relevant to your query and inject it into our prompt.
+  The model is then able to answer questions by incorporating knowledge from the newly provided document.
   """
     )
     default_text_file = "Oppenheimer-movie-wiki.txt"
+    # Check if default file exists, if not, set to None
+    if not os.path.exists(default_text_file):
+        default_text_file = None
+        default_retriever = None
+        default_vectorstore = None
+        initial_file_display = "No default file found - please upload a file"
+    else:
+        default_retriever, default_vectorstore = prepare_vector_store_retriever(default_text_file)
+        initial_file_display = default_text_file
     text_file = gr.State(default_text_file)
     gr.Markdown(
+        "## Upload a txt or PDF file to get started"
     )
     file_name = gr.Textbox(
+        label="Loaded file", value=initial_file_display, lines=1, interactive=False
     )
     upload_button = gr.UploadButton(
         label="Click to upload a text or PDF file", file_types=[".txt", ".pdf"], file_count="single"