mohamedachraf commited on
Commit
f8a9e21
·
1 Parent(s): 8dc5c8f

Add application file

Browse files
Files changed (1) hide show
  1. app.py +30 -14
app.py CHANGED
@@ -138,8 +138,16 @@ def get_retrieval_qa_chain(text_file, hf_model, use_multi_query=False):
138
  retriever = default_retriever
139
  vectorstore = default_vectorstore
140
 
141
- if text_file != default_text_file:
142
- retriever, vectorstore = prepare_vector_store_retriever(text_file)
 
 
 
 
 
 
 
 
143
 
144
  if use_multi_query:
145
  # Custom retrieval function for multi-query
@@ -224,11 +232,10 @@ def generate(question, answer, text_file, max_new_tokens, use_multi_query, store
224
  # replaces the retriever in the question answering chain whenever a new file is uploaded
225
  def upload_file(file):
226
  if file is not None:
227
- # Save uploaded file to temporary location
228
- temp_path = os.path.join(tempfile.gettempdir(), file.name)
229
- with open(temp_path, 'wb') as f:
230
- f.write(file.read())
231
- return file.name, temp_path
232
  return None, None
233
 
234
 
@@ -241,24 +248,33 @@ with gr.Blocks() as demo:
241
  - Support for both PDF and text files
242
  - Multi-query RAG for improved retrieval
243
  - Store Q&A pairs in vector database for future reference
244
- ### If you don't have one, there is a txt file already loaded, the new Oppenheimer movie's entire wikipedia page. The movie came out very recently in July, 2023, so the Phi-2 model is not aware of it.
245
- The context size of the Phi-2 model is 2048 tokens, so even this medium size wikipedia page (11.5k tokens) does not fit in the context window.
246
- Retrieval Augmented Generation (RAG) enables us to retrieve just the few small chunks of the document that are relevant to the our query and inject it into our prompt.
247
- The model is then able to answer questions by incorporating knowledge from the newly provided document. RAG can be used with thousands of documents, but this demo is limited to just one file at a time.
248
  """
249
  )
250
 
251
  default_text_file = "Oppenheimer-movie-wiki.txt"
252
- default_retriever, default_vectorstore = prepare_vector_store_retriever(default_text_file)
 
 
 
 
 
 
 
 
 
253
 
254
  text_file = gr.State(default_text_file)
255
 
256
  gr.Markdown(
257
- "## Upload a txt or PDF file or Use the Default 'Oppenheimer-movie-wiki.txt' that has already been loaded"
258
  )
259
 
260
  file_name = gr.Textbox(
261
- label="Loaded file", value=default_text_file, lines=1, interactive=False
262
  )
263
  upload_button = gr.UploadButton(
264
  label="Click to upload a text or PDF file", file_types=[".txt", ".pdf"], file_count="single"
 
138
  retriever = default_retriever
139
  vectorstore = default_vectorstore
140
 
141
+ if text_file != default_text_file or default_text_file is None:
142
+ if text_file is not None and os.path.exists(text_file):
143
+ retriever, vectorstore = prepare_vector_store_retriever(text_file)
144
+ else:
145
+ # Create a dummy retriever if no file is available
146
+ from langchain.schema import Document
147
+ dummy_doc = Document(page_content="No document loaded. Please upload a file to get started.")
148
+ dummy_vectorstore = FAISS.from_documents([dummy_doc], embeddings)
149
+ retriever = VectorStoreRetriever(vectorstore=dummy_vectorstore, search_kwargs={"k": 1})
150
+ vectorstore = dummy_vectorstore
151
 
152
  if use_multi_query:
153
  # Custom retrieval function for multi-query
 
232
  # replaces the retriever in the question answering chain whenever a new file is uploaded
233
  def upload_file(file):
234
  if file is not None:
235
+ # In Gradio, file is already a path to the uploaded file
236
+ file_path = file.name if hasattr(file, 'name') else file
237
+ filename = os.path.basename(file_path)
238
+ return filename, file_path
 
239
  return None, None
240
 
241
 
 
248
  - Support for both PDF and text files
249
  - Multi-query RAG for improved retrieval
250
  - Store Q&A pairs in vector database for future reference
251
+ ### To get started, upload a text (.txt) or PDF (.pdf) file using the upload button below.
252
+ The context size of the Phi-2 model is 2048 tokens, so large documents are automatically split into chunks.
253
+ Retrieval Augmented Generation (RAG) enables us to retrieve just the few small chunks of the document that are relevant to your query and inject it into our prompt.
254
+ The model is then able to answer questions by incorporating knowledge from the newly provided document.
255
  """
256
  )
257
 
258
  default_text_file = "Oppenheimer-movie-wiki.txt"
259
+
260
+ # Check if default file exists, if not, set to None
261
+ if not os.path.exists(default_text_file):
262
+ default_text_file = None
263
+ default_retriever = None
264
+ default_vectorstore = None
265
+ initial_file_display = "No default file found - please upload a file"
266
+ else:
267
+ default_retriever, default_vectorstore = prepare_vector_store_retriever(default_text_file)
268
+ initial_file_display = default_text_file
269
 
270
  text_file = gr.State(default_text_file)
271
 
272
  gr.Markdown(
273
+ "## Upload a txt or PDF file to get started"
274
  )
275
 
276
  file_name = gr.Textbox(
277
+ label="Loaded file", value=initial_file_display, lines=1, interactive=False
278
  )
279
  upload_button = gr.UploadButton(
280
  label="Click to upload a text or PDF file", file_types=[".txt", ".pdf"], file_count="single"