ppsingh commited on
Commit
944aab6
·
1 Parent(s): 3f62146

adding other components

Browse files
__pycache__/fileingestor.cpython-310.pyc CHANGED
Binary files a/__pycache__/fileingestor.cpython-310.pyc and b/__pycache__/fileingestor.cpython-310.pyc differ
 
__pycache__/orchestrator.cpython-310.pyc CHANGED
Binary files a/__pycache__/orchestrator.cpython-310.pyc and b/__pycache__/orchestrator.cpython-310.pyc differ
 
__pycache__/retriever.cpython-310.pyc CHANGED
Binary files a/__pycache__/retriever.cpython-310.pyc and b/__pycache__/retriever.cpython-310.pyc differ
 
__pycache__/vectorDB.cpython-310.pyc CHANGED
Binary files a/__pycache__/vectorDB.cpython-310.pyc and b/__pycache__/vectorDB.cpython-310.pyc differ
 
app.py CHANGED
@@ -72,72 +72,57 @@ with gr.Blocks(theme=gr.themes.Monochrome(), fill_width = True) as dashboard_app
72
  with gr.Tab("Retriever and Reranker"):
73
  with gr.Row(elem_classes = "centered-content-row"):
74
  with gr.Column(scale=1):
75
- gr.Markdown("## What is Retriever ?")
76
  gr.Markdown("""It is the crucial process of efficiently finding and extracting relevant \
77
  information from a vast knowledge base to ground and inform the chatbot's final answer.""")
78
  gr.Markdown(retriverText)
79
  embed_space("https://giz-chatfed-retriever0-3.hf.space", height=700)
80
 
81
- # 4. File Ingestor (Embedding via iframe pending due to compliance and Readme documentation missing)
82
  with gr.Tab("File Ingestor"):
83
  with gr.Row(elem_classes = "centered-content-row"):
84
  with gr.Column(scale=1):
85
- gr.Markdown("## What is File Ingestor")
86
  gr.Markdown("""In certain chatbot use-cases it might be that user input can be a file upload,\
87
  on top of existing Vector Database. In this case it's important that we ingest this \
88
  file and use it for next for relevant use """)
89
  gr.Markdown(fileingestorText)
90
- ### Enables this once space is public: after Proper EU aI act compliance release embed_space("https://giz-chatfed-whisp.hf.space", height=700)
91
 
92
  # 5. Generator
93
  with gr.Tab("Generator"):
94
  with gr.Row(elem_classes = "centered-content-row"):
95
  with gr.Column(scale=1):
96
- gr.Markdown("## What is Generator?")
97
- gr.Markdown("""It is the microserviceis the crucial process of efficiently finding and extracting relevant \
98
- information from a vast knowledge base to ground and inform the chatbot's final answer.""")
99
- gr.Markdown(generatorText)
 
100
 
101
- # 6. Orchest
102
  with gr.Tab("Orchestrator"):
103
  with gr.Row(elem_classes = "centered-content-row"):
104
  with gr.Column(scale=1):
105
- gr.Markdown("## What is Orchestrator ?")
106
  gr.Markdown(""" The Orchestrator is the central command module, defining the exact \
107
  steps and flow of data: it sequences the initial user prompt, directs the query \
108
  to the correct vector retrieval module, manages the document reranking (if applicable),\
109
- and finally routes the retrieved context and original prompt to the Large Language Model \
110
- (LLM) for final answer generation..""")
 
111
  gr.Markdown(orchestratorText)
112
 
 
113
  with gr.Tab("HuggingFace Chat UI"):
114
  with gr.Row(elem_classes = "centered-content-row"):
115
  with gr.Column(scale=1):
116
- gr.Markdown("## What is Retriever ?")
117
- gr.Markdown("""It is the crucial process of efficiently finding and extracting relevant \
118
- information from a vast knowledge base to ground and inform the chatbot's final answer.""")
119
- gr.Markdown(""" This mciroservice integrates with the vector database to retrieve semantically relevant documents,\
120
- with optional reranking for precision, ready for seamless use in ChaBo RAG workflows. \
121
-
122
- For more info on Retriever and code base visit the following links:
123
- - ChaBo_Retriever : [**ReadMe**](https://huggingface.co/spaces/GIZ/chatfed_retriever0.3/blob/main/README.md)
124
- - ChaBo_Retriever: [**Codebase**](https://huggingface.co/spaces/GIZ/chatfed_retriever0.3/tree/main)""")
125
- embed_space("https://giz-chatfed-retriever0-3.hf.space", height=700)
126
-
127
- with gr.Tab("Integrated UI"):
128
- with gr.Row(elem_classes = "centered-content-row"):
129
- with gr.Column(scale=1):
130
- gr.Markdown("## What is Retriever ?")
131
- gr.Markdown("""It is the crucial process of efficiently finding and extracting relevant \
132
- information from a vast knowledge base to ground and inform the chatbot's final answer.""")
133
- gr.Markdown(""" This mciroservice integrates with the vector database to retrieve semantically relevant documents,\
134
- with optional reranking for precision, ready for seamless use in ChaBo RAG workflows. \
135
-
136
- For more info on Retriever and code base visit the following links:
137
- - ChaBo_Retriever : [**ReadMe**](https://huggingface.co/spaces/GIZ/chatfed_retriever0.3/blob/main/README.md)
138
- - ChaBo_Retriever: [**Codebase**](https://huggingface.co/spaces/GIZ/chatfed_retriever0.3/tree/main)""")
139
- embed_space("https://giz-chatfed-retriever0-3.hf.space", height=700)
140
-
141
 
142
  dashboard_app.css = """
143
  .centered-content-row {
 
72
  with gr.Tab("Retriever and Reranker"):
73
  with gr.Row(elem_classes = "centered-content-row"):
74
  with gr.Column(scale=1):
75
+ gr.Markdown("### What is Retriever ?")
76
  gr.Markdown("""It is the crucial process of efficiently finding and extracting relevant \
77
  information from a vast knowledge base to ground and inform the chatbot's final answer.""")
78
  gr.Markdown(retriverText)
79
  embed_space("https://giz-chatfed-retriever0-3.hf.space", height=700)
80
 
81
+ # 4. File Ingestor (Embedding via iframe)
82
  with gr.Tab("File Ingestor"):
83
  with gr.Row(elem_classes = "centered-content-row"):
84
  with gr.Column(scale=1):
85
+ gr.Markdown("### What is File Ingestor")
86
  gr.Markdown("""In certain chatbot use-cases it might be that user input can be a file upload,\
87
  on top of existing Vector Database. In this case it's important that we ingest this \
88
  file and use it for next for relevant use """)
89
  gr.Markdown(fileingestorText)
90
+ embed_space("https://giz-eudr-chabo-ingestor.hf.space", height=700)
91
 
92
  # 5. Generator
93
  with gr.Tab("Generator"):
94
  with gr.Row(elem_classes = "centered-content-row"):
95
  with gr.Column(scale=1):
96
+ gr.Markdown("### What is Generator?")
97
+ gr.Markdown("""Drawing upon the relevant context provided by the retrieval \
98
+ the Generator is the module responsible for producing the final, coherent, and natural-sounding \
99
+ text response that directly addresses the user's query.""")
100
+ gr.Markdown(generatorText)
101
 
102
+ # 6. Orchestrator (Embedding via iframe)
103
  with gr.Tab("Orchestrator"):
104
  with gr.Row(elem_classes = "centered-content-row"):
105
  with gr.Column(scale=1):
106
+ gr.Markdown("### What is Orchestrator ?")
107
  gr.Markdown(""" The Orchestrator is the central command module, defining the exact \
108
  steps and flow of data: it sequences the initial user prompt, directs the query \
109
  to the correct vector retrieval module, manages the document reranking (if applicable),\
110
+ and finally routes the retrieved context and original prompt to the Generator module \
111
+ for final answer generation. """)
112
+ embed_space("https://giz-eudr-chabo-orchestrator.hf.space/gradio/")
113
  gr.Markdown(orchestratorText)
114
 
115
+ # 7. HuggingFace Chat UI (Embedding via iframe)
116
  with gr.Tab("HuggingFace Chat UI"):
117
  with gr.Row(elem_classes = "centered-content-row"):
118
  with gr.Column(scale=1):
119
+ gr.Markdown("## What is HuggingFace Chat UI ?")
120
+ gr.Markdown("""The Hugging Face Chat UI is a streamlined, open-source web interface \
121
+ specifically designed for building and deploying conversational AI applications. \
122
+ It offers a powerful, ready-to-use frontend for RAG pipelines and LLMs, \
123
+ enabling rapid prototyping and deployment on platforms like Hugging Face Spaces""")
124
+ embed_space("https://giz-eudr-chatbo-chatui.hf.space", height=700)
125
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  dashboard_app.css = """
128
  .centered-content-row {
fileingestor.py CHANGED
@@ -1,4 +1,4 @@
1
- fileingestorText = """ This mciroservice integrates with the Orchestrator and HuggingFace Chat UI and \
2
  uses the deterministic tool for further processing/actions. \
3
 
4
  [ChaBo_FileIngestor](https://huggingface.co/spaces/GIZ/eudr_chabo_ingestor) hosts a microservice which takes the \
 
1
+ fileingestorText = """ This microservice integrates with the Orchestrator and HuggingFace Chat UI and \
2
  uses the deterministic tool for further processing/actions. \
3
 
4
  [ChaBo_FileIngestor](https://huggingface.co/spaces/GIZ/eudr_chabo_ingestor) hosts a microservice which takes the \
orchestrator.py CHANGED
@@ -1,5 +1,7 @@
1
- orchestratorText = """ # Chabo Orchestrator Documentation
2
- ## Table of Contents
 
 
3
  1. Overview
4
  2. System Architecture
5
  3. Components
@@ -11,158 +13,160 @@ orchestratorText = """ # Chabo Orchestrator Documentation
11
 
12
  ## Overview
13
 
14
- The Chabo Orchestrator is the central coordination module of the Chabo RAG system. It orchestrates the flow between multiple microservices to provide intelligent document processing and question-answering capabilities. The system is designed for deployment on Huggingface Spaces.
15
-
16
- ### Key Features
17
- - **Workflow Orchestration**: Uses LangGraph to manage complex processing pipelines
18
- - **Multi-Modal Support**: Handles files dependent on ChatUI and Ingestor config (e.g. PDF, DOCX, GeoJSON, and JSON )
19
- - **Streaming Responses**: Real-time response generation with Server-Sent Events (SSE)
20
- - **Dual Processing Modes**:
21
- - **Direct Output Mode**: Returns ingestor results immediately (e.g. EUDR use case)
22
- - **Standard RAG Mode**: Full retrieval-augmented generation pipeline
23
- - **Intelligent Caching**: Prevents redundant file processing (e.g. EUDR use case)
24
- - **Multiple Interfaces**: FastAPI endpoints for modules; LangServe endpoints for ChatUI; Gradio UI for testing
25
-
26
-
27
- ## System Architecture
28
-
29
- ### High-Level Architecture
30
-
31
- ```
32
- ┌─────────────────┐
33
- │ ChatUI │
34
- │ Frontend │
35
- └────────┬────────┘
36
- HTTP/SSE
37
-
38
- ┌─────────────────────────────────┐
39
- │ Chabo Orchestrator │
40
- │ ┌─────────────────────────┐ │
41
- │ LangGraph Workflow
42
- ┌─────────────────┐
43
- │ │ Detect File │ │
44
- │ │ │ Type │ │ │
45
- │ │ └────────┬────────┘ │ │
46
- │ │ │ │ │
47
- │ │ ┌────────▼────────┐ │ │
48
- │ │ Ingest File │ │
49
- │ │ └────────┬────────┘ │ │
50
- │ │ │ │
51
- │ │ ┌─────┴──────┐ │ │
52
- │ │ │ │
53
- │ │ ┌──▼───┐ ┌────▼───┐ │ │
54
- │ │ Direct Retrieve│ │
55
- │ │ │Output│ │Context
56
- │ │ └──┬───┘ └────┬───┘ │ │
57
- │ │ │ │
58
- │ │ │ ┌────▼───┐ │ │
59
- │ │ │ Generate
60
- │ │ │ │Response│ │ │
61
- │ │ │ └────────┘ │ │
62
- └──────┴──────────────────┘
63
- └──────┬───────────┬──────────┬───┘
64
-
 
 
65
  ┌───▼──┐ ┌───▼───┐ ┌──▼────┐
66
  │Ingest│ │Retrie-│ │Genera-│
67
  │or │ │ver │ │tor │
68
  └──────┘ └───────┘ └───────┘
69
- ```
70
-
71
- ### Component Communication
72
-
73
- All communication between modules happens over HTTP:
74
- - **Orchestrator ↔ Ingestor**: Gradio Client (file upload, processing)
75
- - **Orchestrator ↔ Retriever**: Gradio Client (semantic search)
76
- - **Orchestrator ↔ Generator**: HTTP streaming (SSE for real-time responses)
77
- - **ChatUI ↔ Orchestrator**: LangServe streaming endpoints
78
 
79
- ### Workflow Logic
 
 
 
 
80
 
81
- The orchestrator implements two distinct workflows:
82
 
83
- **Direct Output Workflow** (when `DIRECT_OUTPUT=True` and file is new):
84
- ```
 
85
  File Upload → Detect Type → Ingest → Direct Output → Return Results
86
- ```
87
-
88
- **Standard RAG Workflow** (default or cached files):
89
- ```
90
  Query → Retrieve Context → Generate Response → Stream to User
91
- ```
 
92
 
93
- ## Components
94
 
95
- ### 1. Main Application (`main.py`)
 
 
 
 
96
 
97
- - LangServe endpoints for ChatUI integration
98
- - Gradio web interface for testing
99
- - FastAPI endpoints for diagnostics and future use (e.g. /health)
100
- - Cache management endpoint (for direct output use cases)
101
 
102
- **Key Functions:**
103
- - `chatui_adapter()`: Handles text-only queries
104
- - `chatui_file_adapter()`: Handles file uploads with queries
105
- - `create_gradio_interface()`: Test UI
106
 
107
- ### 2. Workflow Nodes (`nodes.py`)
108
 
109
- LangGraph nodes that implement the processing pipeline:
110
 
111
- **Node Functions:**
112
 
113
- - `detect_file_type_node()`: Identifies file type and determines routing
114
- - `ingest_node()`: Processes files through appropriate ingestor
115
- - `direct_output_node()`: Returns raw ingestor results
116
- - `retrieve_node()`: Fetches relevant context from vector store
117
- - `generate_node_streaming()`: Streams LLM responses
118
- - `route_workflow()`: Conditional routing logic
119
 
120
- **Helper Functions:**
121
 
122
- - `process_query_streaming()`: Unified streaming interface
123
- - `compute_file_hash()`: SHA256 hashing for deduplication
124
- - `clear_direct_output_cache()`: Cache management
125
 
126
- ### 3. Data Models (`models.py`)
127
 
128
- Pydantic models for type validation
129
 
130
- ### 4. Retriever Adapter (`retriever_adapter.py`)
131
 
132
- Abstraction layer for managing different retriever configurations:
133
- - Handles authentication
134
- - Formats queries and filters
135
 
136
- ### 5. Utilities (`utils.py`)
137
 
138
- Helper functions
139
 
140
- #### Conversation Context Management
141
 
142
- The `build_conversation_context()` function manages conversation history to provide relevant context to the generator while respecting token limits and conversation flow.
143
 
144
- **Key Features:**
145
 
146
- - **Context Selection**: Always includes the first user and assistant messages to maintain conversation context
147
- - **Recent Turn Limiting**: Includes only the last N complete turns (user + assistant pairs) to focus on recent conversation (default: 3)
148
- - **Character Limit Management**: Truncates to maximum character limits to prevent context overflow
149
 
150
 
151
- **Function Parameters:**
152
 
153
- ```python
154
  def build_conversation_context(
155
  messages, # List of Message objects from conversation
156
  max_turns: int = 3, # Maximum number of recent turns to include
157
  max_chars: int = 8000 # Maximum total characters in context
158
  ) -> str
159
- ```
160
 
161
- ## Configuration
 
162
 
163
- ### Configuration File (`params.cfg`)
164
 
165
- ```ini
166
  [file_processing]
167
  # Enable direct output mode: when True, ingestor results are returned directly
168
  # without going through the generator. When False, all files go through full RAG pipeline.
@@ -189,32 +193,31 @@ orchestratorText = """ # Chabo Orchestrator Documentation
189
  [general]
190
  # need to include this for HF inference endpoint limits
191
  MAX_CONTEXT_CHARS = 15000
192
- ```
193
 
194
- ### Environment Variables
195
 
196
- Create a `.env` file with:
197
 
198
- ```bash
199
  # Required for private HuggingFace Spaces
200
  HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxx
 
201
 
202
- ```
203
-
204
- ### ChatUI Configuration
205
 
206
- ChatUI `DOTENV_LOCAL` example deployment configuration:
207
 
208
- ```javascript
209
  MODELS=`[
210
  {
211
  "name": "asistente_eudr",
212
  "displayName": "Asistente EUDR",
213
  "description": "Retrieval-augmented generation on EUDR Whisp API powered by ChatFed modules.",
214
- "instructions": {
215
- "title": "EUDR Asistente: Instructiones",
216
- "content": "Hola, soy Asistente EUDR, un asistente conversacional basado en inteligencia artificial diseñado para ayudarle a comprender el cumplimiento y el análisis del Reglamento de la UE sobre la deforestación. Responderé a sus preguntas utilizando los informes EUDR y los archivos GeoJSON cargados.\n\n💡 **Cómo utilizarlo (panel a la derecha)**\n\n**Modo de uso:** elija entre subir un archivo GeoJSON para su análisis o consultar los informes EUDR filtrados por país.\n\n**Ejemplos:** seleccione entre preguntas de ejemplo seleccionadas de diferentes categorías.\n\n**Referencias:** consulte las fuentes de contenido utilizadas para la verificación de datos.\n\n⚠️ Para conocer las limitaciones y la información sobre la recopilación de datos, consulte la pestaña «Exención de responsibilidad».\n\n⚠️ Al utilizar esta aplicación, usted acepta que recopilemos estadísticas de uso (como preguntas formuladas, comentarios realizados, duración de la sesión, tipo de dispositivo e información geográfica anónima) para comprender el rendimiento y mejorar continuamente la herramienta, basándonos en nuestro interés legítimo por mejorar nuestros servicios."
217
- },
218
  "multimodal": true,
219
  "multimodalAcceptedMimetypes": [
220
  "application/geojson"
@@ -226,307 +229,301 @@ orchestratorText = """ # Chabo Orchestrator Documentation
226
  },
227
  "endpoints": [{
228
  "type": "langserve-streaming",
229
- "url": "https://giz-eudr-chabo-orchestrator.hf.space/chatfed-ui-stream",
230
- "streamingFileUploadUrl": "https://giz-eudr-chabo-orchestrator.hf.space/chatfed-with-file-stream",
231
  "inputKey": "text",
232
  "fileInputKey": "files"
233
  }]
234
  }
235
- ]`
236
-
237
- PUBLIC_ANNOUNCEMENT_BANNERS=`[
238
- {
239
- "title": "This is Chat Prototype for DSC users",
240
- "linkTitle": "Keep it Clean"
241
- }
242
- ]`
243
-
244
- PUBLIC_APP_DISCLAIMER_MESSAGE="Disclaimer: AI is an area of active research with known problems such as biased generation and misinformation. Do not use this application for high-stakes decisions or advice. Do not insert your personal data, especially sensitive, like health data."
245
- PUBLIC_APP_DESCRIPTION="Internal Chat-tool for DSC users for testing"
246
-
247
- PUBLIC_APP_NAME="EUDR ChatUI"
248
- ENABLE_ASSISTANTS=false
249
- ENABLE_ASSISTANTS_RAG=false
250
- COMMUNITY_TOOLS=false
251
- MONGODB_URL=mongodb://localhost:27017
252
-
253
- # Disable LLM-based title generation to prevent template queries
254
- LLM_SUMMARIZATION=false
255
- ```
256
-
257
- Key things to ensure here:
258
- - multimodalAcceptedMimetypes: file types to accept for upload via ChatUI
259
- - endpoints: orchestrator url + endpoints
260
-
261
- ## Deployment Guide
262
-
263
- ### Local Development
264
-
265
- **Prerequisites:**
266
- - Python 3.10+
267
- - pip
268
-
269
- **Steps:**
270
-
271
- 1. Clone the repository:
272
- ```bash
273
- git clone <your-repo-url>
274
- cd chabo-orchestrator
275
- ```
276
-
277
- 2. Install dependencies:
278
- ```bash
279
- pip install -r requirements.txt
280
- ```
281
-
282
- 3. Configure the system:
283
- ```bash
284
- # Create .env file
285
- echo "HF_TOKEN=your_token_here" > .env
286
-
287
- # Edit params.cfg with your service URLs
288
- nano params.cfg
289
- ```
290
-
291
- 4. Run the application:
292
- ```bash
293
- python app/main.py
294
- ```
295
-
296
- 5. Access interfaces:
297
- - Gradio UI: http://localhost:7860/gradio
298
- - API Docs: http://localhost:7860/docs
299
- - Health Check: http://localhost:7860/health
300
-
301
- ### Docker Deployment
302
-
303
- **Build the image:**
304
-
305
- ```bash
306
- docker build -t chabo-orchestrator .
307
- ```
308
-
309
- **Run the container:**
310
-
311
- ```bash
312
- docker run -d \
313
- --name chabo-orchestrator \
314
- -p 7860:7860 \
315
- chabo-orchestrator
316
- ```
317
-
318
- ### HuggingFace Spaces Deployment
319
-
320
- **Repository Structure:**
321
- ```
322
- your-space/
323
- ├── app/
324
- │ ├── main.py
325
- │ ├── nodes.py
326
- │ ├── models.py
327
- │ ├── retriever_adapter.py
328
- │ └── utils.py
329
- ├── Dockerfile
330
- ├── requirements.txt
331
- ├── params.cfg
332
- └─�� README.md
333
- ```
334
-
335
- **Steps:**
336
-
337
- 1. Create a new Space on HuggingFace
338
- 2. Select "Docker" as the SDK
339
- 3. Push your code to the Space repository
340
- 4. Add secrets in Space settings:
341
- - `HF_TOKEN`: Your HuggingFace token
342
- 5. The Space will automatically build and deploy
343
-
344
- **Important:** Ensure all service URLs in `params.cfg` are publicly accessible.
345
-
346
- ### Docker Compose (Multi-Service)
347
-
348
- Example orchestrated deployment for the entire Chabo stack (*NOTE - docker-compose will not run on Huggingface spaces*)
349
-
350
- ```yaml
351
- version: '3.8'
352
-
353
- services:
354
- orchestrator:
355
- build: ./orchestrator
356
- ports:
357
- - "7860:7860"
358
- environment:
359
- - HF_TOKEN=${HF_TOKEN}
360
- - RETRIEVER=http://retriever:7861
361
- - GENERATOR=http://generator:7862
362
- - INGESTOR=http://ingestor:7863
363
- depends_on:
364
- - retriever
365
- - generator
366
- - ingestor
367
-
368
- retriever:
369
- build: ./retriever
370
- ports:
371
- - "7861:7861"
372
- environment:
373
- - QDRANT_API_KEY=${QDRANT_API_KEY}
374
-
375
- generator:
376
- build: ./generator
377
- ports:
378
- - "7862:7862"
379
- environment:
380
- - HF_TOKEN=${HF_TOKEN}
381
-
382
- ingestor:
383
- build: ./ingestor
384
- ports:
385
- - "7863:7863"
386
- ```
387
-
388
- ## API Reference
389
-
390
- ### Endpoints
391
-
392
- #### Health Check
393
- ```
394
- GET /health
395
- ```
396
- Returns service health status.
397
-
398
- **Response:**
399
- ```json
400
  {
401
- "status": "healthy"
 
402
  }
403
- ```
404
-
405
- #### Root Information
406
- ```
407
- GET /
408
- ```
409
- Returns API metadata and available endpoints.
410
-
411
- #### Text Query (Streaming)
412
- ```
413
- POST /chatfed-ui-stream/stream
414
- Content-Type: application/json
415
- ```
416
-
417
- **Request Body:**
418
- ```json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  {
420
- "input": {
421
- "text": "What are EUDR requirements?"
 
422
  }
423
- }
424
- ```
425
-
426
- **Response:** Server-Sent Events stream
427
- ```
428
- event: data
429
- data: "The EUDR requires..."
430
-
431
- event: sources
432
- data: {"sources": [...]}
433
-
434
- event: end
435
- data: ""
436
- ```
437
-
438
- #### File Upload Query (Streaming)
439
- ```
440
- POST /chatfed-with-file-stream/stream
441
- Content-Type: application/json
442
- ```
443
 
444
- **Request Body:**
445
- ```json
446
- {
447
- "input": {
448
- "text": "Analyze this GeoJSON",
449
- "files": [
450
- {
451
- "name": "boundaries.geojson",
452
- "type": "base64",
453
- "content": "base64_encoded_content"
454
- }
455
- ]
456
- }
457
- }
458
- ```
459
 
460
- #### Clear Cache
461
- ```
462
- POST /clear-cache
463
- ```
464
- Clears the direct output file cache.
 
465
 
466
- **Response:**
467
- ```json
468
- {
469
- "status": "cache cleared"
470
- }
471
- ```
472
 
473
- ### Gradio Interface
474
 
475
- #### Interactive Query
 
476
 
477
- Gradio's default API endpoint for UI interactions. If running on huggingface spaces, access via: https://[ORG_NAME]-[SPACE_NAME].hf.space/gradio/
478
 
 
479
 
480
- ## Troubleshooting
481
 
482
- ### Common Issues
 
 
 
483
 
484
- #### 1. File Upload Fails
485
 
486
- **Symptoms:** "Error reading file" or "Failed to decode uploaded file"
487
 
488
- **Solutions:**
489
- - Verify file is properly base64 encoded
490
- - Check file size limits (default: varies by deployment)
491
- - Ensure MIME type is in `multimodalAcceptedMimetypes`
 
492
 
493
- #### 2. Slow Responses
494
 
495
- **Symptoms:** Long wait times for responses
496
 
497
- **Solutions:**
498
- - Check network latency to external services
499
- - Verify `MAX_CONTEXT_CHARS` isn't too high
500
- - Consider enabling `DIRECT_OUTPUT` for suitable file types
501
- - Check logs for retrieval/generation bottlenecks
502
 
503
- #### 3. Cache Not Clearing
504
 
505
- **Symptoms:** Same file shows cached results when it shouldn't
506
 
507
- **Solutions:**
508
- - Call `/clear-cache` endpoint
509
- - Restart the service (clears in-memory cache)
510
- - Check if `DIRECT_OUTPUT=True` in config
 
511
 
512
- #### 4. Service Connection Errors
513
 
514
- **Symptoms:** "Connection refused" or timeout errors
515
 
516
- **Solutions:**
517
- - Verify all service URLs in `params.cfg` are accessible
518
- - Check HF_TOKEN is valid and has access to private spaces (*NOTE - THE ORCHESTRATOR CURRENTLY MUST BE PUBLIC*)
519
- - Test each service independently with health checks
520
- - Review firewall/network policies
521
 
 
522
 
523
- ### Version History
 
524
 
525
- - **v1.0.0**: Initial release with LangGraph orchestration
526
- - Current implementation supports streaming, caching, and dual-mode processing
527
 
528
- ---
529
 
530
- **Documentation Last Updated:** 2025-10-01
531
- **Compatible With:** Python 3.10+, LangGraph 0.2+, FastAPI 0.100+
532
- """
 
1
+ orchestratorText = """
2
+ # Chabo Orchestrator Documentation
3
+
4
+ ### Table of Contents
5
  1. Overview
6
  2. System Architecture
7
  3. Components
 
13
 
14
  ## Overview
15
 
16
+ The Chabo Orchestrator is the central coordination module of the Chabo RAG system. \
17
+ It orchestrates the flow between multiple microservices to provide intelligent \
18
+ document processing and question-answering capabilities. The system is designed for deployment on Huggingface Spaces.
19
+
20
+ ### Key Features:
21
+ - **Workflow Orchestration**: Uses LangGraph to manage complex processing pipelines
22
+ - **Multi-Modal Support**: Handles files dependent on ChatUI and Ingestor config (e.g. PDF, DOCX, GeoJSON, and JSON )
23
+ - **Streaming Responses**: Real-time response generation with Server-Sent Events (SSE)
24
+ - **Dual Processing Modes**:
25
+ - **Direct Output Mode**: Returns ingestor results immediately (e.g. EUDR use case)
26
+ - **Standard RAG Mode**: Full retrieval-augmented generation pipeline
27
+ - **Intelligent Caching**: Prevents redundant file processing (e.g. EUDR use case)
28
+ - **Multiple Interfaces**: FastAPI endpoints for modules; LangServe endpoints for ChatUI; Gradio UI for testing
29
+
30
+ ## System Architecture
31
+
32
+ ### High-Level Architecture
33
+
34
+ ```
35
+
36
+ ┌─────────────────┐
37
+ │ ChatUI │
38
+ Frontend │
39
+ └────────┬────────┘
40
+ │ HTTP/SSE
41
+
42
+ ┌─────────────────────────────────┐
43
+ Chabo Orchestrator
44
+ ┌─────────────────────────┐
45
+ │ │ LangGraph Workflow │ │
46
+ │ │ ┌─────────────────┐ │ │
47
+ │ │ │ Detect File │ │ │
48
+ │ │ Type │ │
49
+ │ │ └────────┬────────┘ │ │
50
+ │ │ │ │
51
+ │ │ ┌────────▼────────┐ │ │
52
+ │ │ Ingest File │ │
53
+ │ │ └────────┬────────┘ │ │
54
+ │ │ │ │ │
55
+ │ │ ┌─────┴──────┐ │ │
56
+ │ │ │ │
57
+ │ │ ┌──▼───┐ ┌────▼───┐ │ │
58
+ │ │ │Direct│ │Retrieve│ │ │
59
+ │ │ Output Context │ │
60
+ │ │ └──┬───┘ └────┬───┘ │ │
61
+ │ │ │ │ │
62
+ │ │ │ ┌────▼───┐ │ │
63
+ │ │ │ │Generate│ │ │
64
+ │ │ │Response│ │
65
+ │ │ │ └────────┘ │ │
66
+ └──────┴──────────────────┘
67
+ └──────┬───────────┬──────────┬───┘
68
+ │ │ │
69
  ┌───▼──┐ ┌───▼───┐ ┌──▼────┐
70
  │Ingest│ │Retrie-│ │Genera-│
71
  │or │ │ver │ │tor │
72
  └──────┘ └───────┘ └───────┘
73
+
74
+ ```
75
+ ### Component Communication
 
 
 
 
 
 
76
 
77
+ All communication between modules happens over HTTP:
78
+ - **Orchestrator ↔ Ingestor**: Gradio Client (file upload, processing)
79
+ - **Orchestrator ↔ Retriever**: Gradio Client (semantic search)
80
+ - **Orchestrator ↔ Generator**: HTTP streaming (SSE for real-time responses)
81
+ - **ChatUI ↔ Orchestrator**: LangServe streaming endpoints
82
 
83
+ ### Workflow Logic
84
 
85
+ The orchestrator implements two distinct workflows:
86
+ **Direct Output Workflow** (when `DIRECT_OUTPUT=True` and file is new):
87
+ ```
88
  File Upload → Detect Type → Ingest → Direct Output → Return Results
89
+ ```
90
+ **Standard RAG Workflow** (default or cached files):
91
+ ```
 
92
  Query → Retrieve Context → Generate Response → Stream to User
93
+ ```
94
+
95
 
96
+ ## Components
97
 
98
+ ### 1. Main Application (`main.py`)
99
+ - LangServe endpoints for ChatUI integration
100
+ - Gradio web interface for testing
101
+ - FastAPI endpoints for diagnostics and future use (e.g. /health)
102
+ - Cache management endpoint (for direct output use cases)
103
 
 
 
 
 
104
 
105
+ **Key Functions:**
106
+ - `chatui_adapter()`: Handles text-only queries
107
+ - `chatui_file_adapter()`: Handles file uploads with queries
108
+ - `create_gradio_interface()`: Test UI
109
 
110
+ ### 2. Workflow Nodes (`nodes.py`)
111
 
112
+ LangGraph nodes that implement the processing pipeline:
113
 
114
+ **Node Functions:**
115
 
116
+ - `detect_file_type_node()`: Identifies file type and determines routing
117
+ - `ingest_node()`: Processes files through appropriate ingestor
118
+ - `direct_output_node()`: Returns raw ingestor results
119
+ - `retrieve_node()`: Fetches relevant context from vector store
120
+ - `generate_node_streaming()`: Streams LLM responses
121
+ - `route_workflow()`: Conditional routing logic
122
 
123
+ **Helper Functions:**
124
 
125
+ - `process_query_streaming()`: Unified streaming interface
126
+ - `compute_file_hash()`: SHA256 hashing for deduplication
127
+ - `clear_direct_output_cache()`: Cache management
128
 
129
+ ### 3. Data Models (`models.py`)
130
 
131
+ Pydantic models for type validation
132
 
133
+ ### 4. Retriever Adapter (`retriever_adapter.py`)
134
 
135
+ Abstraction layer for managing different retriever configurations:
136
+ - Handles authentication
137
+ - Formats queries and filters
138
 
139
+ ### 5. Utilities (`utils.py`)
140
 
141
+ Helper functions
142
 
143
+ #### Conversation Context Management
144
 
145
+ The `build_conversation_context()` function manages conversation history to provide relevant context to the generator while respecting token limits and conversation flow.
146
 
147
+ **Key Features:**
148
 
149
+ - **Context Selection**: Always includes the first user and assistant messages to maintain conversation context
150
+ - **Recent Turn Limiting**: Includes only the last N complete turns (user + assistant pairs) to focus on recent conversation (default: 3)
151
+ - **Character Limit Management**: Truncates to maximum character limits to prevent context overflow
152
 
153
 
154
+ **Function Parameters:**
155
 
156
+ ```python
157
  def build_conversation_context(
158
  messages, # List of Message objects from conversation
159
  max_turns: int = 3, # Maximum number of recent turns to include
160
  max_chars: int = 8000 # Maximum total characters in context
161
  ) -> str
162
+ ```
163
 
164
+
165
+ ## Configuration
166
 
167
+ ### Configuration File (`params.cfg`)
168
 
169
+ ```ini
170
  [file_processing]
171
  # Enable direct output mode: when True, ingestor results are returned directly
172
  # without going through the generator. When False, all files go through full RAG pipeline.
 
193
  [general]
194
  # need to include this for HF inference endpoint limits
195
  MAX_CONTEXT_CHARS = 15000
196
+ ```
197
 
198
+ ### Environment Variables
199
 
200
+ Create a `.env` file with:
201
 
202
+ ```bash
203
  # Required for private HuggingFace Spaces
204
  HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxx
205
+ ```
206
 
207
+ ### ChatUI Configuration
 
 
208
 
209
+ ChatUI `DOTENV_LOCAL` example deployment configuration:
210
 
211
+ ```javascript
212
  MODELS=`[
213
  {
214
  "name": "asistente_eudr",
215
  "displayName": "Asistente EUDR",
216
  "description": "Retrieval-augmented generation on EUDR Whisp API powered by ChatFed modules.",
217
+ "instructions": {
218
+ "title": "EUDR Asistente: Instructiones",
219
+ "content": "Hola, soy Asistente EUDR, un asistente conversacional basado en inteligencia artificial diseñado para ayudarle a comprender el cumplimiento y el análisis del Reglamento de la UE sobre la deforestación. Responderé a sus preguntas utilizando los informes EUDR y los archivos GeoJSON cargados.\\n\\n💡 **Cómo utilizarlo (panel a la derecha)**\\n\\n**Modo de uso:** elija entre subir un archivo GeoJSON para su análisis o consultar los informes EUDR filtrados por país.\\n\\n**Ejemplos:** seleccione entre preguntas de ejemplo seleccionadas de diferentes categorías.\\n\\n**Referencias:** consulte las fuentes de contenido utilizadas para la verificación de datos.\\n\\n⚠️ Para conocer las limitaciones y la información sobre la recopilación de datos, consulte la pestaña «Exención de responsabilidad»\\n\\n⚠️ Al utilizar esta aplicación, usted acepta que recopilemos estadísticas de uso (como preguntas formuladas, comentarios realizados, duración de la sesión, tipo de dispositivo e información geográfica anónima) para comprender el rendimiento y mejorar continuamente la herramienta, basándonos en nuestro interés legítimo por mejorar nuestros servicios."
220
+ },
221
  "multimodal": true,
222
  "multimodalAcceptedMimetypes": [
223
  "application/geojson"
 
229
  },
230
  "endpoints": [{
231
  "type": "langserve-streaming",
232
+ "url": "[https://giz-eudr-chabo-orchestrator.hf.space/chatfed-ui-stream](https://giz-eudr-chabo-orchestrator.hf.space/chatfed-ui-stream)",
233
+ "streamingFileUploadUrl": "[https://giz-eudr-chabo-orchestrator.hf.space/chatfed-with-file-stream](https://giz-eudr-chabo-orchestrator.hf.space/chatfed-with-file-stream)",
234
  "inputKey": "text",
235
  "fileInputKey": "files"
236
  }]
237
  }
238
+ ]`
239
+
240
+ PUBLIC_ANNOUNCEMENT_BANNERS=`[
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  {
242
+ "title": "This is Chat Prototype for DSC users",
243
+ "linkTitle": "Keep it Clean"
244
  }
245
+ ]`
246
+
247
+ PUBLIC_APP_DISCLAIMER_MESSAGE="Disclaimer: AI is an area of active research with known problems such as biased generation and misinformation. Do not use this application for high-stakes decisions or advice. Do not insert your personal data, especially sensitive, like health data."
248
+ PUBLIC_APP_DESCRIPTION="Internal Chat-tool for DSC users for testing"
249
+
250
+ PUBLIC_APP_NAME="EUDR ChatUI"
251
+ ENABLE_ASSISTANTS=false
252
+ ENABLE_ASSISTANTS_RAG=false
253
+ COMMUNITY_TOOLS=false
254
+ MONGODB_URL=mongodb://localhost:27017
255
+
256
+ # Disable LLM-based title generation to prevent template queries
257
+ LLM_SUMMARIZATION=false
258
+ ```
259
+ Key things to ensure here:
260
+ - multimodalAcceptedMimetypes: file types to accept for upload via ChatUI
261
+ - endpoints: orchestrator url + endpoints
262
+
263
+ ## Deployment Guide
264
+
265
+ ### Local Development
266
+
267
+ **Prerequisites:**
268
+ - Python 3.10+
269
+ - pip
270
+
271
+ **Steps:**
272
+
273
+ 1. Clone the repository:
274
+ ```bash
275
+ git clone <your-repo-url>
276
+ cd chabo-orchestrator
277
+ ```
278
+
279
+ 2. Install dependencies:
280
+ ```bash
281
+ pip install -r requirements.txt
282
+ ```
283
+
284
+ 3. Configure the system:
285
+ ```bash
286
+ # Create .env file
287
+ echo "HF_TOKEN=your_token_here" > .env
288
+
289
+ # Edit params.cfg with your service URLs
290
+ nano params.cfg
291
+ ```
292
+
293
+ 4. Run the application:
294
+ ```bash
295
+ python app/main.py
296
+ ```
297
+
298
+ 5. Access interfaces:
299
+ - Gradio UI: http://localhost:7860/gradio
300
+ - API Docs: http://localhost:7860/docs
301
+ - Health Check: http://localhost:7860/health
302
+
303
+ ### Docker Deployment
304
+
305
+ **Build the image:**
306
+
307
+ ```bash
308
+ docker build -t chabo-orchestrator .
309
+ ```
310
+
311
+ **Run the container:**
312
+
313
+ ```bash
314
+ docker run -d --name chabo-orchestrator -p 7860:7860 chabo-orchestrator
315
+ ```
316
+
317
+ ### HuggingFace Spaces Deployment
318
+
319
+
320
+ **Repository Structure:**
321
+ ```
322
+ your-space/
323
+ ├── app/
324
+ │ ├── main.py
325
+ │ ├── nodes.py
326
+ │ ├── models.py
327
+ │ ├── retriever_adapter.py
328
+ │ └── utils.py
329
+ ├── Dockerfile
330
+ ├── requirements.txt
331
+ ├── params.cfg
332
+ └── README.md
333
+ ```
334
+ **Steps:**
335
+
336
+ 1. Create a new Space on HuggingFace
337
+ 2. Select "Docker" as the SDK
338
+ 3. Push your code to the Space repository
339
+ 4. Add secrets in Space settings:
340
+ - `HF_TOKEN`: Your HuggingFace token
341
+ 5. The Space will automatically build and deploy
342
+
343
+ **Important:** Ensure all service URLs in `params.cfg` are publicly accessible.
344
+
345
+ ### Docker Compose (Multi-Service)
346
+
347
+ Example orchestrated deployment for the entire Chabo stack (*NOTE - docker-compose will not run on Huggingface spaces*)
348
+ ```yaml
349
+ version: '3.8'
350
+
351
+ services:
352
+ orchestrator:
353
+ build: ./orchestrator
354
+ ports:
355
+ - "7860:7860"
356
+ environment:
357
+ - HF_TOKEN=${HF_TOKEN}
358
+ - RETRIEVER=http://retriever:7861
359
+ - GENERATOR=http://generator:7862
360
+ - INGESTOR=http://ingestor:7863
361
+ depends_on:
362
+ - retriever
363
+ - generator
364
+ - ingestor
365
+
366
+ retriever:
367
+ build: ./retriever
368
+ ports:
369
+ - "7861:7861"
370
+ environment:
371
+ - QDRANT_API_KEY=${QDRANT_API_KEY}
372
+
373
+ generator:
374
+ build: ./generator
375
+ ports:
376
+ - "7862:7862"
377
+ environment:
378
+ - HF_TOKEN=${HF_TOKEN}
379
+
380
+ ingestor:
381
+ build: ./ingestor
382
+ ports:
383
+ - "7863:7863"
384
+ ```
385
+ ## API Reference
386
+
387
+ ### Endpoints
388
+
389
+ #### Health Check
390
+ ```
391
+ GET /health
392
+ ```
393
+ Returns service health status.
394
+
395
+ **Response:**
396
+ ```json
397
+ {
398
+ "status": "healthy"
399
+ }
400
+ ```
401
+
402
+ #### Root Information
403
+ ```
404
+ GET /
405
+ ```
406
+ Returns API metadata and available endpoints.
407
+
408
+ #### Text Query (Streaming)
409
+ ```
410
+ POST /chatfed-ui-stream/stream
411
+ Content-Type: application/json
412
+ ```
413
+
414
+ **Request Body:**
415
+ ```json
416
+ {
417
+ "input": {
418
+ "text": "What are EUDR requirements?"
419
+ }
420
+ }
421
+ ```
422
+
423
+ **Response:** Server-Sent Events stream
424
+ ```
425
+ event: data
426
+ data: "The EUDR requires..."
427
+
428
+ event: sources
429
+ data: {"sources": [...]}
430
+
431
+ event: end
432
+ data: ""
433
+ ```
434
+ #### File Upload Query (Streaming)
435
+ ```
436
+ POST /chatfed-with-file-stream/stream
437
+ Content-Type: application/json
438
+ ```
439
+
440
+ **Request Body:**
441
+ ```json
442
+ {
443
+ "input": {
444
+ "text": "Analyze this GeoJSON",
445
+ "files": [
446
  {
447
+ "name": "boundaries.geojson",
448
+ "type": "base64",
449
+ "content": "base64_encoded_content"
450
  }
451
+ ]
452
+ }
453
+ }
454
+ ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
 
456
+ #### Clear Cache
457
+ ```
458
+ POST /clear-cache
459
+ ```
460
+ Clears the direct output file cache.
 
 
 
 
 
 
 
 
 
 
461
 
462
+ **Response:**
463
+ ```json
464
+ {
465
+ "status": "cache cleared"
466
+ }
467
+ ```
468
 
469
+ ### Gradio Interface
 
 
 
 
 
470
 
471
+ #### Interactive Query
472
 
473
+ Gradio's default API endpoint for UI interactions. If running on huggingface spaces, access via: https://[ORG_NAME]-[SPACE_NAME].hf.space/gradio/
474
+ ## Troubleshooting
475
 
476
+ ### Common Issues
477
 
478
+ #### 1. File Upload Fails
479
 
480
+ **Symptoms:** "Error reading file" or "Failed to decode uploaded file"
481
 
482
+ **Solutions:**
483
+ - Verify file is properly base64 encoded
484
+ - Check file size limits (default: varies by deployment)
485
+ - Ensure MIME type is in `multimodalAcceptedMimetypes`
486
 
487
+ #### 2. Slow Responses
488
 
489
+ **Symptoms:** Long wait times for responses
490
 
491
+ **Solutions:**
492
+ - Check network latency to external services
493
+ - Verify `MAX_CONTEXT_CHARS` isn't too high
494
+ - Consider enabling `DIRECT_OUTPUT` for suitable file types
495
+ - Check logs for retrieval/generation bottlenecks
496
 
497
+ #### 3. Cache Not Clearing
498
 
499
+ **Symptoms:** Same file shows cached results when it shouldn't
500
 
501
+ **Solutions:**
502
+ - Call `/clear-cache` endpoint
503
+ - Restart the service (clears in-memory cache)
504
+ - Check if `DIRECT_OUTPUT=True` in config
 
505
 
506
+ #### 4. Service Connection Errors
507
 
508
+ **Symptoms:** "Connection refused" or timeout errors
509
 
510
+ **Solutions:**
511
+ - Verify all service URLs in `params.cfg` are accessible
512
+ - Check HF_TOKEN is valid and has access to private spaces (*NOTE - THE ORCHESTRATOR CURRENTLY MUST BE PUBLIC*)
513
+ - Test each service independently with health checks
514
+ - Review firewall/network policies
515
 
 
516
 
517
+ ### Version History
518
 
519
+ - **v1.0.0**: Initial release with LangGraph orchestration
520
+ - Current implementation supports streaming, caching, and dual-mode processing
 
 
 
521
 
522
+ ---
523
 
524
+ **Documentation Last Updated:** 2025-10-01
525
+ **Compatible With:** Python 3.10+, LangGraph 0.2+, FastAPI 0.100+
526
 
 
 
527
 
 
528
 
529
+ """
 
 
retriever.py CHANGED
@@ -1,4 +1,4 @@
1
- retriverText = """ This mciroservice integrates with the vector database to retrieve semantically relevant documents,\
2
  with optional reranking for precision, ready for seamless use in ChaBo RAG workflows.
3
 
4
  # Retriever and Reranker Microservice on Hugging Face Spaces
@@ -50,8 +50,9 @@ retriverText = """ This mciroservice integrates with the vector database to retr
50
 
51
  ```python
52
  from gradio_client import Client
53
-
54
- client = Client("https://giz-chatfed-retriever0-3.hf.space/")
 
55
  result = client.predict(
56
  query="What is Circular Economy",
57
  collection_name="Humboldt",
 
1
+ retriverText = """ This microservice integrates with the vector database to retrieve semantically relevant documents,\
2
  with optional reranking for precision, ready for seamless use in ChaBo RAG workflows.
3
 
4
  # Retriever and Reranker Microservice on Hugging Face Spaces
 
50
 
51
  ```python
52
  from gradio_client import Client
53
+ # Replace with your actual Space URL (e.g., https://your-username-retriever_space.hf.space)
54
+ retriever_url = "https://giz-chatfed-retriever0-3.hf.space/"
55
+ client = Client(retriever_url)
56
  result = client.predict(
57
  query="What is Circular Economy",
58
  collection_name="Humboldt",
vectorDB.py CHANGED
@@ -9,7 +9,7 @@ vectordbText = """
9
  [ChaBo_QdrantServer](https://huggingface.co/spaces/GIZ/chatfed_QdrantServer/blob/main/README.md) Space hosts \
10
  a Qdrant vector database instance. This is just a Infrastructural component and doesnt\
11
  not serve any user application through its User Interface. However the admin task can be performed by\
12
- accessing "<embedded space url>/dashboard" Ex:https://giz-chatfed-qdrantserver.hf.space/dashboard \
13
  which is passsword protected.
14
 
15
  **Persistence:** Data is stored persistently in the `/data/qdrant_data` directory due to enabled persistent storage.
@@ -21,7 +21,7 @@ vectordbText = """
21
  ```python
22
  from qdrant_client import QdrantClient
23
 
24
- # Replace with your actual Space URL (e.g., [https://your-username-qdrant-server.hf.space](https://your-username-qdrant-server.hf.space))
25
  QDRANT_HOST = "giz-chatfed-qdrantserver.hf.space"
26
  client = QdrantClient(
27
  host = QDRANT_HOST,
@@ -30,6 +30,6 @@ vectordbText = """
30
  api_key = <QDRANT_API_KEY>,)
31
  ```
32
 
33
- API Documentation: https://api.qdrant.tech/api-reference
34
 
35
  """
 
9
  [ChaBo_QdrantServer](https://huggingface.co/spaces/GIZ/chatfed_QdrantServer/blob/main/README.md) Space hosts \
10
  a Qdrant vector database instance. This is just a Infrastructural component and doesnt\
11
  not serve any user application through its User Interface. However the admin task can be performed by\
12
+ accessing "<embedded space url>/dashboard" Ex:https://giz-chatfed-qdrantserver.hf.space/dashboard \
13
  which is passsword protected.
14
 
15
  **Persistence:** Data is stored persistently in the `/data/qdrant_data` directory due to enabled persistent storage.
 
21
  ```python
22
  from qdrant_client import QdrantClient
23
 
24
+ # Replace with your actual Space URL (e.g., https://your-username-qdrant-server.hf.space)
25
  QDRANT_HOST = "giz-chatfed-qdrantserver.hf.space"
26
  client = QdrantClient(
27
  host = QDRANT_HOST,
 
30
  api_key = <QDRANT_API_KEY>,)
31
  ```
32
 
33
+ **API Documentation**: https://api.qdrant.tech/api-reference
34
 
35
  """