Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ from groq import Groq
|
|
| 9 |
# Set Your API Keys
|
| 10 |
# Use environment variables securely
|
| 11 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 12 |
-
HF_API_KEY = os.getenv("
|
| 13 |
|
| 14 |
if not GROQ_API_KEY or not HF_API_KEY:
|
| 15 |
raise ValueError("GROQ_API_KEY and HF_TOKEN must be set in the environment variables.")
|
|
@@ -19,7 +19,6 @@ client = Groq(api_key=GROQ_API_KEY)
|
|
| 19 |
# Use a Public Hugging Face Image Model
|
| 20 |
HF_IMAGE_MODEL = "stabilityai/stable-diffusion-2-1"
|
| 21 |
|
| 22 |
-
|
| 23 |
# Function 1: Tamil Audio to Tamil Text (Transcription)
|
| 24 |
def transcribe_audio(audio_path):
|
| 25 |
if not audio_path:
|
|
@@ -38,7 +37,6 @@ def transcribe_audio(audio_path):
|
|
| 38 |
except Exception as e:
|
| 39 |
return f"Error in transcription: {str(e)}"
|
| 40 |
|
| 41 |
-
|
| 42 |
# Function 2: Tamil Text to English Translation
|
| 43 |
def translate_tamil_to_english(tamil_text):
|
| 44 |
if not tamil_text:
|
|
@@ -60,7 +58,6 @@ def translate_tamil_to_english(tamil_text):
|
|
| 60 |
except Exception as e:
|
| 61 |
return f"Error in translation: {str(e)}"
|
| 62 |
|
| 63 |
-
|
| 64 |
# Function 3: English Text to Image Generation (Hugging Face)
|
| 65 |
def generate_image(english_text):
|
| 66 |
if not english_text:
|
|
@@ -84,7 +81,6 @@ def generate_image(english_text):
|
|
| 84 |
except Exception as e:
|
| 85 |
return f"Error in image generation: {str(e)}"
|
| 86 |
|
| 87 |
-
|
| 88 |
# Function 4: English Text to AI-Generated Text
|
| 89 |
|
| 90 |
def generate_text(english_text):
|
|
@@ -108,7 +104,6 @@ def generate_text(english_text):
|
|
| 108 |
except Exception as e:
|
| 109 |
return f"Error in text generation: {str(e)}"
|
| 110 |
|
| 111 |
-
|
| 112 |
# Combined Function to Process All Steps
|
| 113 |
def process_audio(audio_path):
|
| 114 |
# Step 1: Tamil Audio β Tamil Text
|
|
@@ -128,22 +123,54 @@ def process_audio(audio_path):
|
|
| 128 |
|
| 129 |
# Step 4: English Text β AI-Generated Text
|
| 130 |
generated_text = generate_text(english_text)
|
| 131 |
-
|
| 132 |
-
|
| 133 |
|
| 134 |
# Create Gradio Interface
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
# Set Your API Keys
|
| 10 |
# Use environment variables securely
|
| 11 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 12 |
+
HF_API_KEY = os.getenv("HF_TOKEN")
|
| 13 |
|
| 14 |
if not GROQ_API_KEY or not HF_API_KEY:
|
| 15 |
raise ValueError("GROQ_API_KEY and HF_TOKEN must be set in the environment variables.")
|
|
|
|
| 19 |
# Use a Public Hugging Face Image Model
|
| 20 |
HF_IMAGE_MODEL = "stabilityai/stable-diffusion-2-1"
|
| 21 |
|
|
|
|
| 22 |
# Function 1: Tamil Audio to Tamil Text (Transcription)
|
| 23 |
def transcribe_audio(audio_path):
|
| 24 |
if not audio_path:
|
|
|
|
| 37 |
except Exception as e:
|
| 38 |
return f"Error in transcription: {str(e)}"
|
| 39 |
|
|
|
|
| 40 |
# Function 2: Tamil Text to English Translation
|
| 41 |
def translate_tamil_to_english(tamil_text):
|
| 42 |
if not tamil_text:
|
|
|
|
| 58 |
except Exception as e:
|
| 59 |
return f"Error in translation: {str(e)}"
|
| 60 |
|
|
|
|
| 61 |
# Function 3: English Text to Image Generation (Hugging Face)
|
| 62 |
def generate_image(english_text):
|
| 63 |
if not english_text:
|
|
|
|
| 81 |
except Exception as e:
|
| 82 |
return f"Error in image generation: {str(e)}"
|
| 83 |
|
|
|
|
| 84 |
# Function 4: English Text to AI-Generated Text
|
| 85 |
|
| 86 |
def generate_text(english_text):
|
|
|
|
| 104 |
except Exception as e:
|
| 105 |
return f"Error in text generation: {str(e)}"
|
| 106 |
|
|
|
|
| 107 |
# Combined Function to Process All Steps
|
| 108 |
def process_audio(audio_path):
|
| 109 |
# Step 1: Tamil Audio β Tamil Text
|
|
|
|
| 123 |
|
| 124 |
# Step 4: English Text β AI-Generated Text
|
| 125 |
generated_text = generate_text(english_text)
|
| 126 |
+
return tamil_text, english_text, image, generated_text
|
|
|
|
| 127 |
|
| 128 |
# Create Gradio Interface
|
| 129 |
+
def clear_outputs():
|
| 130 |
+
return "", "", None, ""
|
| 131 |
+
|
| 132 |
+
# --- Creative Gradio Interface ---
|
| 133 |
+
with gr.Blocks() as demo:
|
| 134 |
+
# Title & Subtitle with Emojis
|
| 135 |
+
gr.Markdown("### π¨ **TransArt: Multimodal Tamil Audio Experience**")
|
| 136 |
+
gr.Markdown("**Transform Tamil audio into captivating content** β from transcription and English translation to stunning AI-generated images and creative narratives! π")
|
| 137 |
+
|
| 138 |
+
# Visual Separator
|
| 139 |
+
gr.Markdown("---")
|
| 140 |
+
|
| 141 |
+
# Row for Audio Input + Buttons
|
| 142 |
+
with gr.Row():
|
| 143 |
+
audio_input = gr.Audio(type="filepath", label="πΆ Upload Tamil Audio")
|
| 144 |
+
with gr.Column():
|
| 145 |
+
submit_button = gr.Button("β¨ Submit")
|
| 146 |
+
clear_button = gr.Button("π§Ή Clear")
|
| 147 |
+
|
| 148 |
+
# Another Separator for clarity
|
| 149 |
+
gr.Markdown("---")
|
| 150 |
+
|
| 151 |
+
# Row for Transcribed Tamil (left) & Translated English (right)
|
| 152 |
+
with gr.Row():
|
| 153 |
+
transcribed_text = gr.Textbox(label="π Transcribed Tamil Text")
|
| 154 |
+
translated_text = gr.Textbox(label="π Translated English Text")
|
| 155 |
+
|
| 156 |
+
# Separator
|
| 157 |
+
gr.Markdown("---")
|
| 158 |
+
|
| 159 |
+
# Row for Generated Image (left) & Generated Text (right)
|
| 160 |
+
with gr.Row():
|
| 161 |
+
generated_image = gr.Image(label="π¨ Generated AI Image")
|
| 162 |
+
generated_text = gr.Textbox(label="π‘ Generated English Text")
|
| 163 |
+
|
| 164 |
+
# Button actions
|
| 165 |
+
submit_button.click(
|
| 166 |
+
fn=process_audio,
|
| 167 |
+
inputs=audio_input,
|
| 168 |
+
outputs=[transcribed_text, translated_text, generated_image, generated_text],
|
| 169 |
+
)
|
| 170 |
+
clear_button.click(
|
| 171 |
+
fn=clear_outputs,
|
| 172 |
+
inputs=[],
|
| 173 |
+
outputs=[transcribed_text, translated_text, generated_image, generated_text],
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
demo.launch()
|