Spaces:
Runtime error
Runtime error
File size: 7,250 Bytes
a7d2416 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
#!/bin/bash
echo "=== AllyCAT GraphRAG Docker Startup ==="
# Check deployment mode from environment
LLM_MODE=${LLM_RUN_ENV:-cloud}
VECTOR_MODE=${VECTOR_DB_TYPE:-cloud_zilliz}
echo "LLM Mode: $LLM_MODE"
echo "Vector DB Mode: $VECTOR_MODE"
# Conditional: Start Ollama only if in local mode
if [ "$LLM_MODE" = "local_ollama" ]; then
echo "Starting Ollama in local mode..."
# Define OLLAMA_MODELS dir
if [ -z "$OLLAMA_MODELS" ]; then
export OLLAMA_MODELS=/allycat/workspace/ollama
fi
echo "Env variables for OLLAMA:"
env | grep OLLAMA
# Start ollama
ollama_model=${OLLAMA_MODEL:-gemma3:1b}
echo "Starting Ollama server..."
ollama serve > /allycat/ollama-serve.out 2>&1 &
# Wait for ollama to start
OLLAMA_PORT=${OLLAMA_PORT:-11434}
while ! nc -z localhost $OLLAMA_PORT; do
sleep 1
done
echo "✅ Ollama started on port $OLLAMA_PORT"
# Only download the model if we are in DEPLOY mode
if [ "$1" == "deploy" ]; then
echo "Downloading Ollama model: $ollama_model"
ollama pull $ollama_model
echo "✅ Ollama model downloaded: $ollama_model"
fi
else
echo "✅ Using cloud LLM mode - Ollama not started"
fi
# Conditional: Setup local vector DB only if needed
if [ "$VECTOR_MODE" = "local" ]; then
echo "Setting up local Milvus vector database..."
mkdir -p /allycat/workspace
echo "✅ Local vector database directory created"
else
echo "✅ Using Zilliz Cloud for vector database"
fi
# Run GraphRAG pipeline if AUTO_RUN_PIPELINE is enabled and in deploy mode
if [ "$1" == "deploy" ] && [ "${AUTO_RUN_PIPELINE:-false}" = "true" ]; then
echo ""
echo "=== Running GraphRAG Pipeline Automatically ==="
echo ""
# Step 1: Crawl website
if [ -n "$WEBSITE_URL" ]; then
echo "Step 1/5: Crawling website: $WEBSITE_URL"
python3 1_crawl_site.py || echo "⚠️ Warning: Crawl failed, continuing..."
echo "✅ Step 1 complete"
echo ""
else
echo "⚠️ Skipping crawl - WEBSITE_URL not set"
fi
# Step 2: Process files to markdown
echo "Step 2/5: Processing files to markdown..."
python3 2_process_files.py || echo "⚠️ Warning: Processing failed, continuing..."
echo "✅ Step 2 complete"
echo ""
# Step 3: Save to vector database
echo "Step 3/5: Saving to vector database..."
if [ "$VECTOR_MODE" = "cloud_zilliz" ]; then
python3 3_save_to_vector_db_zilliz.py || echo "⚠️ Warning: Vector DB save failed, continuing..."
else
python3 3_save_to_vector_db.py || echo "⚠️ Warning: Vector DB save failed, continuing..."
fi
echo "✅ Step 3 complete"
echo ""
# Step 4: Process graph data (3 phases)
echo "Step 4/5: Processing graph data (3 phases)..."
echo " Phase 1: Extracting entities and relationships..."
python3 2b_process_graph_phase1.py || echo "⚠️ Warning: Phase 1 failed, continuing..."
echo " Phase 2: Building communities..."
python3 2b_process_graph_phase2.py || echo "⚠️ Warning: Phase 2 failed, continuing..."
echo " Phase 3: Generating community summaries..."
python3 2b_process_graph_phase3.py || echo "⚠️ Warning: Phase 3 failed, continuing..."
echo "✅ Step 4 complete"
echo ""
# Step 5: Save to graph database
echo "Step 5/5: Saving to graph database..."
python3 3b_save_to_graph_db.py || echo "⚠️ Warning: Graph DB save failed, continuing..."
echo "✅ Step 5 complete"
echo ""
echo "=== ✅ Pipeline Complete - Starting Application ==="
echo ""
# OPTIMIZATION: Clean up pipeline dependencies to save RAM
if [ "${CLEANUP_PIPELINE_DEPS:-false}" = "true" ]; then
echo ""
echo "=== 🧹 Cleaning Up Pipeline Dependencies ==="
echo "This will save ~350-500 MB of RAM"
echo ""
chmod +x ./cleanup_pipeline_deps.sh
./cleanup_pipeline_deps.sh
echo ""
echo "=== ✅ Cleanup Complete ==="
echo ""
else
echo ""
echo "💡 TIP: Set CLEANUP_PIPELINE_DEPS=true in .env to save ~350-500 MB RAM"
echo " after pipeline completes (reduces OOM errors on 1GB containers)"
echo ""
fi
fi
# Start the appropriate web application
APP_TYPE=${APP_TYPE:-flask_graph}
DOCKER_APP_PORT=${DOCKER_APP_PORT:-8080}
FLASK_GRAPH_PORT=${FLASK_GRAPH_PORT:-8080}
FLASK_VECTOR_PORT=${FLASK_VECTOR_PORT:-8081}
CHAINLIT_GRAPH_PORT=${CHAINLIT_GRAPH_PORT:-8083}
CHAINLIT_VECTOR_PORT=${CHAINLIT_VECTOR_PORT:-8082}
# Log port configuration
echo ""
echo "=== Port Configuration ==="
echo "DOCKER_APP_PORT (internal container): $DOCKER_APP_PORT"
echo "FLASK_GRAPH_PORT: $FLASK_GRAPH_PORT"
echo "FLASK_VECTOR_PORT: $FLASK_VECTOR_PORT"
echo "CHAINLIT_GRAPH_PORT: $CHAINLIT_GRAPH_PORT"
echo "CHAINLIT_VECTOR_PORT: $CHAINLIT_VECTOR_PORT"
echo ""
# Determine which port will be used based on APP_TYPE
case $APP_TYPE in
"flask_graph")
APP_PORT=$FLASK_GRAPH_PORT
;;
"chainlit_graph")
APP_PORT=$CHAINLIT_GRAPH_PORT
;;
"flask")
APP_PORT=$FLASK_VECTOR_PORT
;;
"chainlit")
APP_PORT=$CHAINLIT_VECTOR_PORT
;;
*)
APP_PORT=$FLASK_GRAPH_PORT
;;
esac
echo "Selected APP_TYPE: $APP_TYPE will run on port: $APP_PORT"
echo "Container will expose application on port: $DOCKER_APP_PORT (mapped to host DOCKER_PORT)"
echo ""
if [ "$1" == "deploy" ]; then
echo "In deploy mode..."
case $APP_TYPE in
"flask_graph")
echo "Starting Flask GraphRAG app on port $FLASK_GRAPH_PORT..."
python3 app_flask_graph.py
;;
"chainlit_graph")
echo "Starting Chainlit GraphRAG app on port $CHAINLIT_GRAPH_PORT..."
chainlit run app_chainlit_graph.py --host 0.0.0.0 --port $CHAINLIT_GRAPH_PORT
;;
"flask")
echo "Starting Flask Vector RAG app on port $FLASK_VECTOR_PORT..."
python3 app_flask.py
;;
"chainlit")
echo "Starting Chainlit Vector RAG app on port $CHAINLIT_VECTOR_PORT..."
chainlit run app_chainlit.py --host 0.0.0.0 --port $CHAINLIT_VECTOR_PORT
;;
*)
echo "Starting default Flask GraphRAG app on port $FLASK_GRAPH_PORT..."
python3 app_flask_graph.py
;;
esac
else
echo "Not in deploy mode, entering interactive shell."
echo ""
echo "Available commands:"
echo " python3 app_flask_graph.py - Start Flask GraphRAG app"
echo " python3 app_flask.py - Start Flask VectorRAG app"
echo " chainlit run app_chainlit_graph.py - Start Chainlit GraphRAG app"
echo " chainlit run app_chainlit.py - Start Chainlit VectorRAG app"
if [ "$LLM_MODE" = "local_ollama" ]; then
echo " ollama pull $ollama_model - Download Ollama model"
fi
echo ""
/bin/bash
fi
|