File size: 7,250 Bytes
a7d2416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/bin/bash

echo "=== AllyCAT GraphRAG Docker Startup ==="

# Check deployment mode from environment
LLM_MODE=${LLM_RUN_ENV:-cloud}
VECTOR_MODE=${VECTOR_DB_TYPE:-cloud_zilliz}

echo "LLM Mode: $LLM_MODE"
echo "Vector DB Mode: $VECTOR_MODE"

# Conditional: Start Ollama only if in local mode
if [ "$LLM_MODE" = "local_ollama" ]; then
    echo "Starting Ollama in local mode..."
    
    # Define OLLAMA_MODELS dir
    if [ -z "$OLLAMA_MODELS" ]; then
        export OLLAMA_MODELS=/allycat/workspace/ollama
    fi
    
    echo "Env variables for OLLAMA:"
    env | grep OLLAMA
    
    # Start ollama
    ollama_model=${OLLAMA_MODEL:-gemma3:1b}
    echo "Starting Ollama server..."
    ollama serve > /allycat/ollama-serve.out 2>&1 &
    
    # Wait for ollama to start
    OLLAMA_PORT=${OLLAMA_PORT:-11434}
    while ! nc -z localhost $OLLAMA_PORT; do
        sleep 1
    done
    echo "✅ Ollama started on port $OLLAMA_PORT"
    
    # Only download the model if we are in DEPLOY mode
    if [ "$1" == "deploy" ]; then
        echo "Downloading Ollama model: $ollama_model"
        ollama pull $ollama_model
        echo "✅ Ollama model downloaded: $ollama_model"
    fi
else
    echo "✅ Using cloud LLM mode - Ollama not started"
fi

# Conditional: Setup local vector DB only if needed
if [ "$VECTOR_MODE" = "local" ]; then
    echo "Setting up local Milvus vector database..."
    mkdir -p /allycat/workspace
    echo "✅ Local vector database directory created"
else
    echo "✅ Using Zilliz Cloud for vector database"
fi

# Run GraphRAG pipeline if AUTO_RUN_PIPELINE is enabled and in deploy mode
if [ "$1" == "deploy" ] && [ "${AUTO_RUN_PIPELINE:-false}" = "true" ]; then
    echo ""
    echo "=== Running GraphRAG Pipeline Automatically ==="
    echo ""
    
    # Step 1: Crawl website
    if [ -n "$WEBSITE_URL" ]; then
        echo "Step 1/5: Crawling website: $WEBSITE_URL"
        python3 1_crawl_site.py || echo "⚠️  Warning: Crawl failed, continuing..."
        echo "✅ Step 1 complete"
        echo ""
    else
        echo "⚠️  Skipping crawl - WEBSITE_URL not set"
    fi
    
    # Step 2: Process files to markdown
    echo "Step 2/5: Processing files to markdown..."
    python3 2_process_files.py || echo "⚠️  Warning: Processing failed, continuing..."
    echo "✅ Step 2 complete"
    echo ""
    
    # Step 3: Save to vector database
    echo "Step 3/5: Saving to vector database..."
    if [ "$VECTOR_MODE" = "cloud_zilliz" ]; then
        python3 3_save_to_vector_db_zilliz.py || echo "⚠️  Warning: Vector DB save failed, continuing..."
    else
        python3 3_save_to_vector_db.py || echo "⚠️  Warning: Vector DB save failed, continuing..."
    fi
    echo "✅ Step 3 complete"
    echo ""
    
    # Step 4: Process graph data (3 phases)
    echo "Step 4/5: Processing graph data (3 phases)..."
    echo "  Phase 1: Extracting entities and relationships..."
    python3 2b_process_graph_phase1.py || echo "⚠️  Warning: Phase 1 failed, continuing..."
    echo "  Phase 2: Building communities..."
    python3 2b_process_graph_phase2.py || echo "⚠️  Warning: Phase 2 failed, continuing..."
    echo "  Phase 3: Generating community summaries..."
    python3 2b_process_graph_phase3.py || echo "⚠️  Warning: Phase 3 failed, continuing..."
    echo "✅ Step 4 complete"
    echo ""
    
    # Step 5: Save to graph database
    echo "Step 5/5: Saving to graph database..."
    python3 3b_save_to_graph_db.py || echo "⚠️  Warning: Graph DB save failed, continuing..."
    echo "✅ Step 5 complete"
    echo ""
    
    echo "=== ✅ Pipeline Complete - Starting Application ==="
    echo ""
    
    # OPTIMIZATION: Clean up pipeline dependencies to save RAM
    if [ "${CLEANUP_PIPELINE_DEPS:-false}" = "true" ]; then
        echo ""
        echo "=== 🧹 Cleaning Up Pipeline Dependencies ==="
        echo "This will save ~350-500 MB of RAM"
        echo ""
        chmod +x ./cleanup_pipeline_deps.sh
        ./cleanup_pipeline_deps.sh
        echo ""
        echo "=== ✅ Cleanup Complete ==="
        echo ""
    else
        echo ""
        echo "💡 TIP: Set CLEANUP_PIPELINE_DEPS=true in .env to save ~350-500 MB RAM"
        echo "        after pipeline completes (reduces OOM errors on 1GB containers)"
        echo ""
    fi
fi

# Start the appropriate web application
APP_TYPE=${APP_TYPE:-flask_graph}
DOCKER_APP_PORT=${DOCKER_APP_PORT:-8080}
FLASK_GRAPH_PORT=${FLASK_GRAPH_PORT:-8080}
FLASK_VECTOR_PORT=${FLASK_VECTOR_PORT:-8081}
CHAINLIT_GRAPH_PORT=${CHAINLIT_GRAPH_PORT:-8083}
CHAINLIT_VECTOR_PORT=${CHAINLIT_VECTOR_PORT:-8082}

# Log port configuration
echo ""
echo "=== Port Configuration ==="
echo "DOCKER_APP_PORT (internal container): $DOCKER_APP_PORT"
echo "FLASK_GRAPH_PORT: $FLASK_GRAPH_PORT"
echo "FLASK_VECTOR_PORT: $FLASK_VECTOR_PORT"
echo "CHAINLIT_GRAPH_PORT: $CHAINLIT_GRAPH_PORT"
echo "CHAINLIT_VECTOR_PORT: $CHAINLIT_VECTOR_PORT"
echo ""

# Determine which port will be used based on APP_TYPE
case $APP_TYPE in
    "flask_graph")
        APP_PORT=$FLASK_GRAPH_PORT
        ;;
    "chainlit_graph")
        APP_PORT=$CHAINLIT_GRAPH_PORT
        ;;
    "flask")
        APP_PORT=$FLASK_VECTOR_PORT
        ;;
    "chainlit")
        APP_PORT=$CHAINLIT_VECTOR_PORT
        ;;
    *)
        APP_PORT=$FLASK_GRAPH_PORT
        ;;
esac

echo "Selected APP_TYPE: $APP_TYPE will run on port: $APP_PORT"
echo "Container will expose application on port: $DOCKER_APP_PORT (mapped to host DOCKER_PORT)"
echo ""

if [ "$1" == "deploy" ]; then
    echo "In deploy mode..."
    
    case $APP_TYPE in
        "flask_graph")
            echo "Starting Flask GraphRAG app on port $FLASK_GRAPH_PORT..."
            python3 app_flask_graph.py
            ;;
        "chainlit_graph")
            echo "Starting Chainlit GraphRAG app on port $CHAINLIT_GRAPH_PORT..."
            chainlit run app_chainlit_graph.py --host 0.0.0.0 --port $CHAINLIT_GRAPH_PORT
            ;;
        "flask")
            echo "Starting Flask Vector RAG app on port $FLASK_VECTOR_PORT..."
            python3 app_flask.py
            ;;
        "chainlit")
            echo "Starting Chainlit Vector RAG app on port $CHAINLIT_VECTOR_PORT..."
            chainlit run app_chainlit.py --host 0.0.0.0 --port $CHAINLIT_VECTOR_PORT
            ;;
        *)
            echo "Starting default Flask GraphRAG app on port $FLASK_GRAPH_PORT..."
            python3 app_flask_graph.py
            ;;
    esac
else
    echo "Not in deploy mode, entering interactive shell."
    echo ""
    echo "Available commands:"
    echo "  python3 app_flask_graph.py       - Start Flask GraphRAG app"
    echo "  python3 app_flask.py             - Start Flask VectorRAG app"
    echo "  chainlit run app_chainlit_graph.py - Start Chainlit GraphRAG app"
    echo "  chainlit run app_chainlit.py     - Start Chainlit VectorRAG app"
    
    if [ "$LLM_MODE" = "local_ollama" ]; then
        echo "  ollama pull $ollama_model        - Download Ollama model"
    fi
    echo ""
    /bin/bash
fi