textilindo-ai-assistant / app_gradio.py
harismlnaslm's picture
Fix: Correct dataset field mapping for question/answer extraction
162cda9
#!/usr/bin/env python3
"""
Textilindo AI Assistant - Hugging Face Spaces (Gradio Version)
"""
import gradio as gr
import os
import json
import requests
from difflib import SequenceMatcher
import logging
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def load_system_prompt(default_text):
"""Load system prompt from configs/system_prompt.md if available"""
try:
base_dir = os.path.dirname(__file__)
prompt_path = os.path.join(base_dir, "configs", "system_prompt.md")
if os.path.exists(prompt_path):
with open(prompt_path, 'r', encoding='utf-8') as f:
return f.read().strip()
except Exception as e:
logger.warning(f"Could not load system prompt: {e}")
return default_text
class TextilindoAI:
def __init__(self):
self.dataset = []
self.system_prompt = load_system_prompt(
"You are a helpful AI assistant for Textilindo, a textile company. "
"Provide accurate and helpful information about Textilindo's products, services, and business information."
)
self.load_all_datasets()
logger.info(f"Total examples loaded: {len(self.dataset)}")
def load_all_datasets(self):
"""Load all JSONL datasets from the data directory"""
base_dir = os.path.dirname(__file__)
data_dir = os.path.join(base_dir, "data")
if not os.path.exists(data_dir):
logger.warning(f"Data directory not found: {data_dir}")
return
logger.info(f"Found data directory: {data_dir}")
# Load all JSONL files
for filename in os.listdir(data_dir):
if filename.endswith('.jsonl'):
filepath = os.path.join(data_dir, filename)
file_examples = 0
try:
with open(filepath, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line:
try:
data = json.loads(line)
data['source'] = filename # Add source tracking
self.dataset.append(data)
file_examples += 1
except json.JSONDecodeError as e:
logger.warning(f"Invalid JSON in {filename}: {e}")
continue
logger.info(f"Loaded {filename}: {file_examples} examples")
except Exception as e:
logger.error(f"Error loading {filename}: {e}")
def find_most_similar(self, query, top_k=3):
"""Find most similar examples using sequence matching"""
similarities = []
for example in self.dataset:
# Try different field names that might exist in the dataset
question = example.get('question', example.get('instruction', '')).lower()
similarity = SequenceMatcher(None, query.lower(), question).ratio()
similarities.append((similarity, example))
# Sort by similarity and return top_k
similarities.sort(key=lambda x: x[0], reverse=True)
return [example for _, example in similarities[:top_k]]
def chat(self, message, temperature=0.7, max_tokens=300):
"""Generate AI response using RAG"""
try:
# Find similar examples
similar_examples = self.find_most_similar(message, top_k=3)
# Build context from similar examples
context = ""
for example in similar_examples:
# Try different field names that might exist in the dataset
question = example.get('question', example.get('instruction', ''))
answer = example.get('answer', example.get('output', ''))
context += f"Q: {question}\nA: {answer}\n\n"
# Create prompt
prompt = f"""System: {self.system_prompt}
Context from Textilindo knowledge base:
{context}
User Question: {message}
Please provide a helpful response based on the context above. If the context doesn't contain relevant information, provide a general helpful response about Textilindo."""
# For now, return a simple response based on context
if similar_examples:
# Return the most similar answer
best_answer = similar_examples[0].get('answer', similar_examples[0].get('output', ''))
if best_answer:
return f"Based on our knowledge base: {best_answer}"
else:
return "I found some relevant information but couldn't extract a proper answer. Please try rephrasing your question."
else:
return "I'm sorry, I don't have specific information about that. Please contact Textilindo directly for more details."
except Exception as e:
logger.error(f"Error in chat: {e}")
return f"Error: {str(e)}"
# Initialize AI assistant (will be created when needed)
ai = None
def get_ai_assistant():
"""Get or create the AI assistant instance"""
global ai
if ai is None:
try:
logger.info("Initializing Textilindo AI Assistant...")
ai = TextilindoAI()
logger.info("AI Assistant initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize AI Assistant: {e}")
# Create a minimal fallback
ai = type('FallbackAI', (), {
'dataset': [],
'chat': lambda self, message, **kwargs: f"AI Assistant is not available. Error: {str(e)}"
})()
return ai
def chat_function(message, temperature=0.7, max_tokens=300):
"""Chat function for Gradio interface"""
try:
if not message:
return "Please enter a message."
# Get AI assistant (initializes if needed)
ai_assistant = get_ai_assistant()
# Get AI response
response = ai_assistant.chat(message, temperature=temperature, max_tokens=max_tokens)
return response
except Exception as e:
logger.error(f"Error in chat function: {str(e)}")
return f"Error: {str(e)}"
# Create Gradio interface
def create_interface():
"""Create the Gradio interface"""
with gr.Blocks(title="Textilindo AI Assistant") as interface:
gr.Markdown("# 🤖 Textilindo AI Assistant")
gr.Markdown("AI-powered customer service for Textilindo")
with gr.Row():
with gr.Column():
message_input = gr.Textbox(
label="Your Message",
placeholder="Ask me anything about Textilindo...",
lines=3
)
with gr.Row():
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.7,
step=0.1,
label="Temperature"
)
max_tokens = gr.Slider(
minimum=50,
maximum=1000,
value=300,
step=50,
label="Max Tokens"
)
submit_btn = gr.Button("Send Message", variant="primary")
with gr.Column():
response_output = gr.Textbox(
label="AI Response",
lines=10,
interactive=False
)
# Event handlers
submit_btn.click(
fn=chat_function,
inputs=[message_input, temperature, max_tokens],
outputs=response_output
)
# Allow Enter key to submit
message_input.submit(
fn=chat_function,
inputs=[message_input, temperature, max_tokens],
outputs=response_output
)
# Add examples
gr.Examples(
examples=[
"Dimana lokasi Textilindo?",
"Apa saja produk yang dijual di Textilindo?",
"Jam berapa Textilindo buka?",
"Bagaimana cara menghubungi Textilindo?"
],
inputs=message_input
)
# Add footer with stats
try:
ai_assistant = get_ai_assistant()
dataset_size = len(ai_assistant.dataset) if hasattr(ai_assistant, 'dataset') else 0
gr.Markdown(f"**Dataset loaded:** {dataset_size} examples")
except:
gr.Markdown("**Dataset:** Loading...")
return interface
if __name__ == '__main__':
try:
logger.info("Starting Textilindo AI Assistant...")
# Try to initialize AI assistant early to catch any issues
try:
ai_assistant = get_ai_assistant()
logger.info(f"Dataset loaded: {len(ai_assistant.dataset)} examples")
except Exception as e:
logger.warning(f"AI Assistant initialization failed: {e}")
logger.info("Continuing with fallback mode...")
# Create and launch the interface
logger.info("Creating Gradio interface...")
interface = create_interface()
logger.info("Gradio interface created successfully")
# Get server configuration from environment variables
server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
server_port = int(os.environ.get("GRADIO_SERVER_PORT", "7860"))
logger.info(f"Launching Gradio interface on {server_name}:{server_port}")
# Launch with basic configuration for Hugging Face Spaces
interface.launch(
server_name=server_name,
server_port=server_port,
share=False, # Hugging Face Spaces handles tunneling automatically
show_error=True,
quiet=False,
inbrowser=False,
prevent_thread_lock=False # Essential for keeping the process alive
)
logger.info("Gradio interface launched successfully")
# Keep the application running
try:
import time
while True:
time.sleep(1)
except KeyboardInterrupt:
logger.info("Application stopped by user")
except Exception as e:
logger.error(f"Failed to start application: {e}")
# Create a simple fallback interface
try:
with gr.Blocks() as fallback:
gr.Markdown("# Textilindo AI Assistant")
gr.Markdown("Application is starting... Please wait.")
gr.Markdown(f"Error: {str(e)}")
# Get server configuration from environment variables
server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
server_port = int(os.environ.get("GRADIO_SERVER_PORT", "7860"))
fallback.launch(
server_name=server_name,
server_port=server_port,
share=False,
inbrowser=False,
prevent_thread_lock=False # Essential for keeping the process alive
)
# Keep the fallback running
import time
while True:
time.sleep(1)
except Exception as fallback_error:
logger.error(f"Fallback interface also failed: {fallback_error}")
# If everything fails, just keep the process alive
import time
while True:
time.sleep(1)