|
|
""" |
|
|
Fast import using CORRECT Supabase Connection URI (Tokyo Region) |
|
|
""" |
|
|
|
|
|
import sys |
|
|
import json |
|
|
import os |
|
|
import psycopg2 |
|
|
from psycopg2.extras import execute_values |
|
|
from tqdm import tqdm |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CONNECTION_URI = os.getenv("SUPABASE_DB_URI") |
|
|
|
|
|
if not CONNECTION_URI: |
|
|
raise ValueError( |
|
|
"SUPABASE_DB_URI not found in environment variables. " |
|
|
"Please set it in your .env file." |
|
|
) |
|
|
|
|
|
def import_with_psycopg2( |
|
|
embeddings_file="plant_hypernodes_with_embeddings.json", |
|
|
batch_size=1000 |
|
|
): |
|
|
print(f"\n{'='*60}") |
|
|
print(f"Fast Import via Supabase Pooler (Tokyo Region)") |
|
|
print(f"{'='*60}\n") |
|
|
|
|
|
|
|
|
print(f"Loading {embeddings_file}...") |
|
|
try: |
|
|
with open(embeddings_file, "r", encoding="utf-8") as f: |
|
|
hypernodes = json.load(f) |
|
|
print(f"Loaded {len(hypernodes)} HyperNodes\n") |
|
|
except FileNotFoundError: |
|
|
print(f"❌ Error: File '{embeddings_file}' not found.") |
|
|
return |
|
|
|
|
|
|
|
|
print(f"Connecting to Database via URI...") |
|
|
print(f"Target: aws-1-ap-northeast-1.pooler.supabase.com (Tokyo)") |
|
|
|
|
|
try: |
|
|
|
|
|
conn = psycopg2.connect(dsn=CONNECTION_URI) |
|
|
cursor = conn.cursor() |
|
|
print("✅ Connected successfully!\n") |
|
|
|
|
|
|
|
|
cursor.execute("SELECT COUNT(*) FROM hypernodes") |
|
|
existing_count = cursor.fetchone()[0] |
|
|
|
|
|
if existing_count > 0: |
|
|
print(f"⚠️ Warning: Database has {existing_count} existing nodes") |
|
|
print("Clearing existing data...") |
|
|
cursor.execute("TRUNCATE TABLE hypernodes") |
|
|
conn.commit() |
|
|
print("✅ Cleared\n") |
|
|
|
|
|
|
|
|
print(f"Inserting {len(hypernodes)} nodes in batches of {batch_size}...") |
|
|
|
|
|
insert_query = """ |
|
|
INSERT INTO hypernodes (key, value, plant_name, section, key_embedding, value_embedding) |
|
|
VALUES %s |
|
|
""" |
|
|
|
|
|
for i in tqdm(range(0, len(hypernodes), batch_size), desc="Inserting"): |
|
|
batch = hypernodes[i : i + batch_size] |
|
|
values = [ |
|
|
( |
|
|
node["key"], |
|
|
node["value"], |
|
|
node["plant_name"], |
|
|
node.get("section"), |
|
|
node["key_embedding"], |
|
|
node["value_embedding"], |
|
|
) |
|
|
for node in batch |
|
|
] |
|
|
|
|
|
try: |
|
|
execute_values(cursor, insert_query, values) |
|
|
conn.commit() |
|
|
except Exception as e: |
|
|
print(f"\n❌ Error in batch {i//batch_size}: {e}") |
|
|
conn.rollback() |
|
|
|
|
|
|
|
|
cursor.execute("SELECT COUNT(*) FROM hypernodes") |
|
|
final_count = cursor.fetchone()[0] |
|
|
|
|
|
cursor.close() |
|
|
conn.close() |
|
|
|
|
|
print(f"\n{'='*60}") |
|
|
print("IMPORT COMPLETE") |
|
|
print(f"Total nodes in DB: {final_count}") |
|
|
print(f"{'='*60}\n") |
|
|
|
|
|
except psycopg2.OperationalError as e: |
|
|
print(f"\n❌ CONNECTION ERROR: {e}") |
|
|
print("Nếu vẫn lỗi, hãy kiểm tra lại xem Project có bị 'Paused' trên Dashboard không.") |
|
|
except Exception as e: |
|
|
print(f"\n❌ UNEXPECTED ERROR: {e}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
import argparse |
|
|
parser = argparse.ArgumentParser() |
|
|
parser.add_argument("--embeddings", default="plant_hypernodes_with_embeddings.json") |
|
|
parser.add_argument("--batch-size", type=int, default=1000) |
|
|
args = parser.parse_args() |
|
|
|
|
|
import_with_psycopg2(args.embeddings, args.batch_size) |