Spaces:

kevalfst
/

visionary-ai

Running

App Files Files Community

kevalfst commited on May 9

Commit

556d852

verified ·

1 Parent(s): 39c1d2f

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -40

app.py CHANGED Viewed

@@ -1,42 +1,49 @@
-import torch
-from diffusers import StableDiffusionPipeline
 import gradio as gr
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load Stable Diffusion v1.4 from Hugging Face
-pipe = StableDiffusionPipeline.from_pretrained(
-    "CompVis/stable-diffusion-v1-4",
-    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-    use_safetensors=True
-)
-pipe = pipe.to(device)
-# Inference function
-def generate(prompt, guidance, steps, width, height):
-    image = pipe(prompt=prompt, guidance_scale=guidance, num_inference_steps=steps, height=height, width=width).images[0]
-    return image
-# Gradio UI
-title = "🎨 Offline Text-to-Image Generator (Stable Diffusion v1.4)"
-description = "Generate images from text prompts using a fully self-hosted Stable Diffusion model."
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="pink")) as demo:
-    gr.Markdown(f"# {title}")
-    gr.Markdown(description)
     with gr.Row():
-        with gr.Column():
-            prompt = gr.Textbox(label="Enter your prompt", placeholder="A steampunk dragon flying over a futuristic city")
-            guidance = gr.Slider(1, 20, value=7.5, step=0.5, label="Guidance Scale")
-            steps = gr.Slider(10, 100, value=30, step=5, label="Inference Steps")
-            width = gr.Slider(256, 768, value=512, step=64, label="Image Width")
-            height = gr.Slider(256, 768, value=512, step=64, label="Image Height")
-            submit = gr.Button("Generate Image")
-        with gr.Column():
-            output = gr.Image(label="Generated Image")
-    submit.click(fn=generate, inputs=[prompt, guidance, steps, width, height], outputs=output)
-demo.launch()

 import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import os
+# Set model and tokenizer
+model_name = "Qwen/Qwen2.5-Omni-3B"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
+# Function to process inputs and generate response
+def process_input(text_input, image_input=None, audio_input=None):
+    inputs = {"text": text_input}
+    if image_input:
+        inputs["image"] = image_input
+    if audio_input:
+        inputs["audio"] = audio_input
+    # Tokenize inputs (simplified for demo)
+    input_ids = tokenizer.encode(inputs["text"], return_tensors="pt").to(model.device)
+    # Generate response
+    outputs = model.generate(input_ids, max_length=200)
+    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Placeholder for speech generation (requires additional setup)
+    response_audio = None  # Implement speech generation if needed
+    return response_text, response_audio
+# Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Qwen2.5-Omni-3B Demo")
     with gr.Row():
+        text_input = gr.Textbox(label="Text Input")
+        image_input = gr.Image(label="Upload Image")
+        audio_input = gr.Audio(label="Upload Audio")
+    submit_button = gr.Button("Submit")
+    text_output = gr.Textbox(label="Text Response")
+    audio_output = gr.Audio(label="Audio Response")
+    submit_button.click(
+        fn=process_input,
+        inputs=[text_input, image_input, audio_input],
+        outputs=[text_output, audio_output]
+    )
+# Launch the app
+demo.launch()