Thuong Nguyen
Initial deployment: FastAPI backend with data and Dockerfile
f3cb94f
"""
Fast import using CORRECT Supabase Connection URI (Tokyo Region)
"""
import sys
import json
import os
import psycopg2
from psycopg2.extras import execute_values
from tqdm import tqdm
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# ============================================================
# DATABASE CONNECTION CONFIGURATION
# ============================================================
# Get connection URI from environment variable
CONNECTION_URI = os.getenv("SUPABASE_DB_URI")
if not CONNECTION_URI:
raise ValueError(
"SUPABASE_DB_URI not found in environment variables. "
"Please set it in your .env file."
)
def import_with_psycopg2(
embeddings_file="plant_hypernodes_with_embeddings.json",
batch_size=1000
):
print(f"\n{'='*60}")
print(f"Fast Import via Supabase Pooler (Tokyo Region)")
print(f"{'='*60}\n")
# 1. Load embeddings
print(f"Loading {embeddings_file}...")
try:
with open(embeddings_file, "r", encoding="utf-8") as f:
hypernodes = json.load(f)
print(f"Loaded {len(hypernodes)} HyperNodes\n")
except FileNotFoundError:
print(f"❌ Error: File '{embeddings_file}' not found.")
return
# 2. Connect
print(f"Connecting to Database via URI...")
print(f"Target: aws-1-ap-northeast-1.pooler.supabase.com (Tokyo)")
try:
# Kết nối bằng chuỗi URI đầy đủ
conn = psycopg2.connect(dsn=CONNECTION_URI)
cursor = conn.cursor()
print("✅ Connected successfully!\n")
# Check existing count
cursor.execute("SELECT COUNT(*) FROM hypernodes")
existing_count = cursor.fetchone()[0]
if existing_count > 0:
print(f"⚠️ Warning: Database has {existing_count} existing nodes")
print("Clearing existing data...")
cursor.execute("TRUNCATE TABLE hypernodes")
conn.commit()
print("✅ Cleared\n")
# 3. Insert batches
print(f"Inserting {len(hypernodes)} nodes in batches of {batch_size}...")
insert_query = """
INSERT INTO hypernodes (key, value, plant_name, section, key_embedding, value_embedding)
VALUES %s
"""
for i in tqdm(range(0, len(hypernodes), batch_size), desc="Inserting"):
batch = hypernodes[i : i + batch_size]
values = [
(
node["key"],
node["value"],
node["plant_name"],
node.get("section"),
node["key_embedding"],
node["value_embedding"],
)
for node in batch
]
try:
execute_values(cursor, insert_query, values)
conn.commit()
except Exception as e:
print(f"\n❌ Error in batch {i//batch_size}: {e}")
conn.rollback()
# Final verify
cursor.execute("SELECT COUNT(*) FROM hypernodes")
final_count = cursor.fetchone()[0]
cursor.close()
conn.close()
print(f"\n{'='*60}")
print("IMPORT COMPLETE")
print(f"Total nodes in DB: {final_count}")
print(f"{'='*60}\n")
except psycopg2.OperationalError as e:
print(f"\n❌ CONNECTION ERROR: {e}")
print("Nếu vẫn lỗi, hãy kiểm tra lại xem Project có bị 'Paused' trên Dashboard không.")
except Exception as e:
print(f"\n❌ UNEXPECTED ERROR: {e}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--embeddings", default="plant_hypernodes_with_embeddings.json")
parser.add_argument("--batch-size", type=int, default=1000)
args = parser.parse_args()
import_with_psycopg2(args.embeddings, args.batch_size)