pixagram-backup

Runtime error

App Files Files Community

primerz commited on Nov 6

Commit

ef9c54d

verified ·

1 Parent(s): 270a823

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -175

app.py CHANGED Viewed

@@ -1,6 +1,10 @@
 """
 Pixagram AI Pixel Art Generator - Gradio Interface
 """
 import spaces
 import gradio as gr
 import os
@@ -20,18 +24,19 @@ def apply_preset(preset_name):
         preset_name = "Balanced Portrait"
     preset = PRESETS[preset_name]
     return (
         preset["strength"],
         preset["guidance_scale"],
-        preset["identity_preservation"],
         preset["lora_scale"],
         preset["depth_control_scale"],
-        preset["identity_control_scale"],
         f"[APPLIED] {preset_name}\n{preset['description']}"
     )
-@spaces.GPU(duration=35)
 def process_image(
     image,
     prompt,
@@ -39,9 +44,9 @@ def process_image(
     steps,
     guidance_scale,
     depth_control_scale,
-    identity_control_scale,
-    lora_scale,
-    identity_preservation,
     strength,
     enable_color_matching,
     consistency_mode,
@@ -53,24 +58,24 @@ def process_image(
         return None, None
     try:
-        # Generate retro art
-        result = converter.generate_retro_art(
-            input_image=image,
             prompt=prompt,
             negative_prompt=negative_prompt,
             num_inference_steps=int(steps),
             guidance_scale=guidance_scale,
             depth_control_scale=depth_control_scale,
-            identity_control_scale=identity_control_scale,
-            lora_scale=lora_scale,
-            identity_preservation=identity_preservation,
             strength=strength,
             enable_color_matching=enable_color_matching,
             consistency_mode=consistency_mode,
             seed=int(seed)
         )
-        # Generate captions if requested
         caption_text = None
         if enable_captions:
             captions = []
@@ -81,15 +86,16 @@ def process_image(
                 captions.append(f"Input: {input_caption}")
                 print(f"[CAPTION] Input: {input_caption}")
-            # Output caption
-            output_caption = converter.generate_caption(result)
-            if output_caption:
-                captions.append(f"Output: {output_caption}")
-                print(f"[CAPTION] Output: {output_caption}")
             caption_text = "\n".join(captions) if captions else None
-        return result, caption_text
     except Exception as e:
         print(f"Error: {e}")
@@ -102,12 +108,13 @@ def process_image(
 def get_model_status():
     """Generate model status markdown"""
     if converter.models_loaded:
-        status_text = "**[OK] Loaded Models:**\n"
         status_text += f"- Custom Checkpoint (Horizon): {'[OK] Loaded' if converter.models_loaded['custom_checkpoint'] else '[OK] Using SDXL base'}\n"
         status_text += f"- LORA (RetroArt): {'[OK] Loaded' if converter.models_loaded['lora'] else ' Disabled'}\n"
-        status_text += f"- InstantID: {'[OK] Loaded' if converter.models_loaded['instantid'] else ' Disabled'}\n"
-        status_text += f"- Zoe Depth: {'[OK] Loaded' if converter.models_loaded['zoe_depth'] else ' Fallback'}\n"
-        status_text += f"- IP-Adapter (Face Embeddings): {'[OK] Loaded' if converter.models_loaded.get('ip_adapter', False) else ' Keypoints only'}\n"
         return status_text
     return "**Model status unavailable**"
@@ -162,8 +169,9 @@ with gr.Blocks(title="Pixagram - AI Pixel Art Generator", theme=gr.themes.Soft()
     # App description
     gr.Markdown(f"""
-    <h2 class="app-title"> PIXAGRAM.IO | AI Pixel Art Generator (Img2Img + InstantID)</h2>
     Transform your photos into retro pixel art style with **strong face preservation!**
     """)
     # Model status
@@ -172,19 +180,14 @@ with gr.Blocks(title="Pixagram - AI Pixel Art Generator", theme=gr.themes.Soft()
     # Scheduler info
     scheduler_info = f"""
     **[CONFIG] Advanced Configuration:**
-    - Pipeline: **Img2Img** (structure preservation)
-    - Face System: **CLIP + InsightFace** (dual embeddings)
-    - **[ADVANCED] Enhanced Resampler:** 10 layers, 20 heads (+3-5% quality)
-    - **[ADVANCED] Adaptive Attention:** Context-aware scaling (+2-3% quality)
-    - **[ADVANCED] Multi-Scale Processing:** 3-scale face analysis (+1-2% quality)
-    - **[ADVANCED] Adaptive Parameters:** Auto-adjust for face quality (+2-3% consistency)
-    - **[ADVANCED] Face-Aware Color Matching:** LAB space with saturation preservation (+1-2% quality)
     - Scheduler: **LCM** (12 steps, fast generation)
-    - Recommended CFG: **1.15-1.5** (optimized for LCM)
-    - Identity Boost: **1.15x** (for maximum face fidelity)
-    - CLIP Skip: **2** (enhanced style control)
     - LORA Trigger: `{TRIGGER_WORD}` (auto-added)
-    - **Total Improvement:** +10-15% over base = **96-99% face similarity**
     """
     gr.Markdown(scheduler_info)
@@ -193,15 +196,15 @@ with gr.Blocks(title="Pixagram - AI Pixel Art Generator", theme=gr.themes.Soft()
             input_image = gr.Image(label="Input Image", type="pil")
             prompt = gr.Textbox(
-                label="Prompt (trigger word auto-added)",
-                value=" ",
                 lines=3,
-                info=f"'{TRIGGER_WORD}' will be automatically added"
             )
             negative_prompt = gr.Textbox(
                 label="Negative Prompt",
-                value=" ",
                 lines=2
             )
@@ -230,18 +233,18 @@ with gr.Blocks(title="Pixagram - AI Pixel Art Generator", theme=gr.themes.Soft()
                 steps = gr.Slider(
                     minimum=4,
                     maximum=50,
-                    value=DEFAULT_PARAMS['num_inference_steps'],
                     step=1,
-                    label=f" Inference Steps (LCM optimized for 12)"
                 )
                 with gr.Row():
                     guidance_scale = gr.Slider(
                         minimum=0.5,
-                        maximum=2.0,
-                        value=DEFAULT_PARAMS['guidance_scale'],
-                        step=0.05,
-                        label="Guidance Scale (CFG)\nHigher = stronger adherence to prompt"
                     )
                     strength = gr.Slider(
@@ -255,49 +258,53 @@ with gr.Blocks(title="Pixagram - AI Pixel Art Generator", theme=gr.themes.Soft()
                 gr.Markdown("### Advanced Fine-Tuning")
                 with gr.Row():
-                    depth_control_scale = gr.Slider(
-                        minimum=0.3,
-                        maximum=1.2,
-                        value=DEFAULT_PARAMS['depth_control_scale'],
-                        step=0.05,
-                        label="Depth ControlNet Scale"
-                    )
                     lora_scale = gr.Slider(
-                        minimum=0.5,
-                        maximum=2.0,
                         value=DEFAULT_PARAMS['lora_scale'],
                         step=0.05,
                         label="RetroArt LORA Scale\nLower = more realistic"
                     )
-            with gr.Accordion(" InstantID Settings (for portraits)", open=True):
-                identity_control_scale = gr.Slider(
-                    minimum=0.3,
-                    maximum=1.5,
-                    value=DEFAULT_PARAMS['identity_control_scale'],
                     step=0.05,
-                    label="InstantID ControlNet Scale (facial keypoints structure)"
                 )
-                identity_preservation = gr.Slider(
-                    minimum=0.3,
-                    maximum=2.0,
-                    value=DEFAULT_PARAMS['identity_preservation'],
                     step=0.05,
-                    label="Identity Preservation (IP-Adapter scale)\nHigher = stronger face preservation"
                 )
                 enable_color_matching = gr.Checkbox(
                     value=DEFAULT_PARAMS['enable_color_matching'],
-                    label="[OPTIONAL] Enable Color Matching (gentle skin tone adjustment)",
-                    info="Apply subtle color matching - disable if colors look faded"
                 )
                 consistency_mode = gr.Checkbox(
                     value=DEFAULT_PARAMS['consistency_mode'],
-                    label="[CONSISTENCY] Auto-adjust parameters for predictable results",
-                    info="Validates and balances parameters to reduce variation"
                 )
                 seed_input = gr.Number(
@@ -308,15 +315,20 @@ with gr.Blocks(title="Pixagram - AI Pixel Art Generator", theme=gr.themes.Soft()
                 )
                 enable_captions = gr.Checkbox(
-                    value=False,
                     label="[CAPTIONS] Generate descriptive captions",
-                    info="Generate short captions for input and output images"
                 )
-            generate_btn = gr.Button(">>> Generate Retro Art", variant="primary", size="lg")
         with gr.Column():
-            output_image = gr.Image(label="Retro Art Output")
             caption_output = gr.Textbox(
                 label="Generated Captions",
@@ -326,119 +338,45 @@ with gr.Blocks(title="Pixagram - AI Pixel Art Generator", theme=gr.themes.Soft()
             )
             gr.Markdown(f"""
-            ### Tips for Maximum Quality Results:
-            **[OPTIMIZATIONS] Advanced Optimizations Active:**
-            - **Enhanced Resampler:** 10 layers, 20 heads (+3-5% quality)
-            - **Adaptive Attention:** Context-aware scaling (+2-3% quality)
-            - **Multi-Scale Processing:** 3-scale face analysis (+1-2% quality)
-            - **Adaptive Parameters:** Auto-adjust based on face quality (+2-3% consistency)
-            - **Enhanced Color Matching:** Face-aware LAB color space (+1-2% quality)
-            **Expected Quality:**
-            - Base system: 90-93% face similarity
-            - With optimizations: 96-99% face similarity
-            - Ultra Fidelity preset: 97-99%+ face similarity
-            **[PRESETS] Optimized Preset Guide:**
-            - **Ultra Fidelity:** 96-98% similarity, minimal transformation
-            - **Premium Portrait:** 94-96% similarity, excellent balance (recommended)
-            - **Balanced Portrait:** 90-93% similarity, good balance
-            - **Artistic Excellence:** 88-91% similarity, creative with likeness
-            - **Style Focus:** 83-87% similarity, maximum pixel art
-            - **Subtle Enhancement:** 97-99% similarity, photo-realistic
-            **[ADAPTIVE] Automatic Adjustments:**
-            - Small faces (< 50K px): Boosts identity preservation to 1.8
-            - Low confidence (< 80%): Increases identity control to 0.9
-            - Profile views (> 20° yaw): Enhances preservation to 1.7
-            - Good quality faces: Uses your selected parameters
-            **[PARAMETERS] Parameter Relationships:**
-            - **Strength** (most important): Controls transformation intensity
-              - `0.38-0.45`: Maximum fidelity (Ultra/Subtle presets)
-              - `0.48-0.55`: Balanced quality (Premium/Balanced presets)
-              - `0.58-0.68`: Artistic freedom (Artistic/Style presets)
-            - **Identity Preservation**: Face embedding strength (auto-boosted 1.15x)
-            - **Guidance Scale (CFG)**: LCM-optimized range 1.1-1.5
-            - **LORA Scale**: Pixel art intensity (inverse to identity)
-            **[CONSISTENCY] Consistency Mode Benefits:**
-            - Validates parameter combinations for predictability
-            - Prevents identity-LORA conflicts
-            - Keeps CFG in optimal LCM range
-            - Balances ControlNet scales
-            - Recommended: Always ON
-            **[SEED] Reproducibility:**
-            - **-1:** Random, explore variations
-            - **Fixed (e.g., 42):** Identical results for testing
             **[WORKFLOW] Recommended Workflow:**
-            1. Upload high-res portrait (face > 30% of frame)
-            2. Select preset (start with Premium Portrait)
-            3. Enable Consistency Mode (ON by default)
-            4. First generation: See quality level
-            5. If adjusting: Change ONE parameter at a time
-            6. Fix seed for consistent testing
-            **[TECHNICAL] System Details:**
-            - Enhanced Resampler: 10 layers, 20 heads, 1280 dim
-            - Attention: Adaptive per-layer scaling
-            - Face Processing: Multi-scale (0.75x, 1x, 1.25x)
-            - Color Matching: LAB space, face-aware masking
-            - Resolution: Auto-optimized to 896x1152 or 832x1216
             """)
     # Preset button click events
-    preset_btn_1.click(
-        fn=lambda: apply_preset("Ultra Fidelity"),
-        inputs=[],
-        outputs=[strength, guidance_scale, identity_preservation, lora_scale,
-                depth_control_scale, identity_control_scale, preset_status]
-    )
-    preset_btn_2.click(
-        fn=lambda: apply_preset("Premium Portrait"),
-        inputs=[],
-        outputs=[strength, guidance_scale, identity_preservation, lora_scale,
-                depth_control_scale, identity_control_scale, preset_status]
-    )
-    preset_btn_3.click(
-        fn=lambda: apply_preset("Balanced Portrait"),
-        inputs=[],
-        outputs=[strength, guidance_scale, identity_preservation, lora_scale,
-                depth_control_scale, identity_control_scale, preset_status]
-    )
-    preset_btn_4.click(
-        fn=lambda: apply_preset("Artistic Excellence"),
-        inputs=[],
-        outputs=[strength, guidance_scale, identity_preservation, lora_scale,
-                depth_control_scale, identity_control_scale, preset_status]
-    )
-    preset_btn_5.click(
-        fn=lambda: apply_preset("Style Focus"),
-        inputs=[],
-        outputs=[strength, guidance_scale, identity_preservation, lora_scale,
-                depth_control_scale, identity_control_scale, preset_status]
-    )
-    preset_btn_6.click(
-        fn=lambda: apply_preset("Subtle Enhancement"),
-        inputs=[],
-        outputs=[strength, guidance_scale, identity_preservation, lora_scale,
-                depth_control_scale, identity_control_scale, preset_status]
-    )
     generate_btn.click(
         fn=process_image,
         inputs=[
             input_image, prompt, negative_prompt, steps, guidance_scale,
-            depth_control_scale, identity_control_scale, lora_scale,
-            identity_preservation, strength, enable_color_matching,
             consistency_mode, seed_input, enable_captions
         ],
         outputs=[output_image, caption_output]
@@ -452,4 +390,4 @@ if __name__ == "__main__":
         server_port=7860,
         share=True,
         show_api=True
-    )

 """
 Pixagram AI Pixel Art Generator - Gradio Interface
+MODIFIED for IP-Adapter-FaceIDXL (non-plus) and LCM
 """
+import torch  # <-- MUST BE FIRST
+torch.jit.script = lambda f: f  # <-- MUST BE SECOND
 import spaces
 import gradio as gr
 import os
         preset_name = "Balanced Portrait"
     preset = PRESETS[preset_name]
+    # Re-added lora_scale
     return (
         preset["strength"],
         preset["guidance_scale"],
+        preset.get("ip_adapter_scale", 1.0),
         preset["lora_scale"],
         preset["depth_control_scale"],
+        preset.get("canny_control_scale", 0.5),
         f"[APPLIED] {preset_name}\n{preset['description']}"
     )
+@spaces.GPU(duration=35) # LCM is fast
 def process_image(
     image,
     prompt,
     steps,
     guidance_scale,
     depth_control_scale,
+    canny_control_scale,
+    lora_scale,             # Re-added lora_scale
+    ip_adapter_scale,
     strength,
     enable_color_matching,
     consistency_mode,
         return None, None
     try:
+        # Generate retro art (returns a list of 4 images)
+        result_images = converter.generate(
+            image=image,
             prompt=prompt,
             negative_prompt=negative_prompt,
             num_inference_steps=int(steps),
             guidance_scale=guidance_scale,
             depth_control_scale=depth_control_scale,
+            canny_control_scale=canny_control_scale,
+            lora_scale=lora_scale,         # Re-added lora_scale
+            ip_adapter_scale=ip_adapter_scale,
             strength=strength,
             enable_color_matching=enable_color_matching,
             consistency_mode=consistency_mode,
             seed=int(seed)
         )
+        # Generate captions if requested (from original image)
         caption_text = None
         if enable_captions:
             captions = []
                 captions.append(f"Input: {input_caption}")
                 print(f"[CAPTION] Input: {input_caption}")
+            # Output caption (from first generated image)
+            if result_images:
+                output_caption = converter.generate_caption(result_images[0])
+                if output_caption:
+                    captions.append(f"Output (Image 1): {output_caption}")
+                    print(f"[CAPTION] Output: {output_caption}")
             caption_text = "\n".join(captions) if captions else None
+        return result_images, caption_text
     except Exception as e:
         print(f"Error: {e}")
 def get_model_status():
     """Generate model status markdown"""
     if converter.models_loaded:
+        status_text = "**[OK] Loaded Models (FaceIDXL / LCM):**\n"
         status_text += f"- Custom Checkpoint (Horizon): {'[OK] Loaded' if converter.models_loaded['custom_checkpoint'] else '[OK] Using SDXL base'}\n"
         status_text += f"- LORA (RetroArt): {'[OK] Loaded' if converter.models_loaded['lora'] else ' Disabled'}\n"
+        status_text += f"- IP-Adapter (FaceIDXL): {'[OK] Loaded' if converter.models_loaded['ip_adapter'] else ' [ERROR] DISABLED'}\n"
+        status_text += f"- LeReS++ Depth: {'[OK] Loaded' if converter.models_loaded['leres_depth'] else ' [ERROR] DISABLED'}\n"
+        status_text += f"- Canny Detector: {'[OK] Loaded' if converter.models_loaded['canny'] else ' [ERROR] DISABLED'}\n"
+        status_text += f"- ControlNets: Depth + Canny\n"
         return status_text
     return "**Model status unavailable**"
     # App description
     gr.Markdown(f"""
+    <h2 class="app-title"> PIXAGRAM.IO | AI Pixel Art Generator (Img2Img + FaceIDXL + LCM)</h2>
     Transform your photos into retro pixel art style with **strong face preservation!**
+    This version uses **LCM**, IP-Adapter-FaceIDXL, LeReS++ Depth, and Canny ControlNets.
     """)
     # Model status
     # Scheduler info
     scheduler_info = f"""
     **[CONFIG] Advanced Configuration:**
+    - Pipeline: **IP-Adapter-FaceIDXL (Img2Img)**
+    - Face System: **InsightFace (buffalo_l)** (512D embeddings)
+    - **[FaceID] IP-Adapter:** `ip-adapter-faceid_sdxl.bin` (Optional, only if face is detected)
+    - **[CONTROL] Dual ControlNets:** LeReS++ Depth + Canny
+    - **[ADVANCED] Auto-Captioning:** Input image caption added to prompt
     - Scheduler: **LCM** (12 steps, fast generation)
+    - Recommended CFG: **1.0-2.0**
     - LORA Trigger: `{TRIGGER_WORD}` (auto-added)
     """
     gr.Markdown(scheduler_info)
             input_image = gr.Image(label="Input Image", type="pil")
             prompt = gr.Textbox(
+                label="Prompt (trigger word & caption auto-added)",
+                value="a person",
                 lines=3,
+                info=f"'{TRIGGER_WORD}' and an auto-generated caption will be added"
             )
             negative_prompt = gr.Textbox(
                 label="Negative Prompt",
+                value="blurry, low quality, ugly, distorted, monochrome, lowres, bad anatomy, worst quality",
                 lines=2
             )
                 steps = gr.Slider(
                     minimum=4,
                     maximum=50,
+                    value=12, # LCM default
                     step=1,
+                    label=f" Inference Steps (LCM optimized for 8-12)"
                 )
                 with gr.Row():
                     guidance_scale = gr.Slider(
                         minimum=0.5,
+                        maximum=3.0,
+                        value=1.5, # LCM default
+                        step=0.1,
+                        label="Guidance Scale (CFG)"
                     )
                     strength = gr.Slider(
                 gr.Markdown("### Advanced Fine-Tuning")
                 with gr.Row():
+                    # --- LORA SCALE SLIDER MODIFIED ---
                     lora_scale = gr.Slider(
+                        minimum=0.0,
+                        maximum=4.0,  # Changed from 2.0 to 4.0
                         value=DEFAULT_PARAMS['lora_scale'],
                         step=0.05,
                         label="RetroArt LORA Scale\nLower = more realistic"
                     )
+                    depth_control_scale = gr.Slider(
+                        minimum=0.0,
+                        maximum=1.2,
+                        value=DEFAULT_PARAMS['depth_control_scale'],
+                        step=0.05,
+                        label="Depth ControlNet Scale (LeReS++)"
+                    )
+            with gr.Accordion(" Face & Structure Settings", open=True):
+                ip_adapter_scale = gr.Slider(
+                    minimum=0.0,
+                    maximum=2.0,
+                    value=1.0,
                     step=0.05,
+                    label="Identity Scale (IP-Adapter FaceID)"
                 )
+                canny_control_scale = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.2,
+                    value=0.5,
                     step=0.05,
+                    label="Canny ControlNet Scale (Structure)"
                 )
                 enable_color_matching = gr.Checkbox(
                     value=DEFAULT_PARAMS['enable_color_matching'],
+                    label="[DISABLED] Enable Color Matching",
+                    info="Disabled for multi-image output",
+                    interactive=False
                 )
                 consistency_mode = gr.Checkbox(
                     value=DEFAULT_PARAMS['consistency_mode'],
+                    label="[DISABLED] Auto-adjust parameters",
+                    info="Disabled for this pipeline",
+                    interactive=False
                 )
                 seed_input = gr.Number(
                 )
                 enable_captions = gr.Checkbox(
+                    value=True,
                     label="[CAPTIONS] Generate descriptive captions",
+                    info="Generate captions for input and output images"
                 )
+            generate_btn = gr.Button(">>> Generate 4 Retro Art Images", variant="primary", size="lg")
         with gr.Column():
+            output_image = gr.Gallery(
+                label="Retro Art Output (4 Images)",
+                columns=2,
+                object_fit="contain",
+                height="auto"
+            )
             caption_output = gr.Textbox(
                 label="Generated Captions",
             )
             gr.Markdown(f"""
+            ### 💡 How to Get Full Style (The "Battle of Scales")
+            Your problem is that **Identity** (realism) is fighting **LoRA** (style). To make the *whole scene* stylized, you MUST change the balance.
+            **[PARAMETERS] For "Max Style" (Try This!):**
+            - **`Img2Img Strength`:** **`0.8` - `0.85`**
+              (This is the *most important* slider. It gives the LoRA power over the whole scene.)
+            - **`Identity Scale`:** **`0.6` - `0.7`**
+              (You *must* lower this to let the LoRA pixelate the face.)
+            - **`RetroArt LORA Scale`:** **`1.2` - `1.4`**
+              (Now you can boost the LoRA's power.)
+            - **`Depth` / `Canny` Scales:** **`0.4` - `0.6`**
+              (Lower these to let the LoRA change the realistic structure.)
             **[WORKFLOW] Recommended Workflow:**
+            1.  Upload a clear portrait.
+            2.  Set the "Max Style" parameters above.
+            3.  Generate. This will be your new stylistic baseline.
+            4.  If the face is *too* stylized, slowly increase **`Identity Scale`** (e.g., to 0.75).
+            5.  If the background is *too* messy, slowly increase **`Depth Control`** (e.g., to 0.6).
             """)
     # Preset button click events
+    all_sliders = [strength, guidance_scale, ip_adapter_scale, lora_scale,
+                   depth_control_scale, canny_control_scale, preset_status]
+    preset_btn_1.click(fn=lambda: apply_preset("Ultra Fidelity"), inputs=[], outputs=all_sliders)
+    preset_btn_2.click(fn=lambda: apply_preset("Premium Portrait"), inputs=[], outputs=all_sliders)
+    preset_btn_3.click(fn=lambda: apply_preset("Balanced Portrait"), inputs=[], outputs=all_sliders)
+    preset_btn_4.click(fn=lambda: apply_preset("Artistic Excellence"), inputs=[], outputs=all_sliders)
+    preset_btn_5.click(fn=lambda: apply_preset("Style Focus"), inputs=[], outputs=all_sliders)
+    preset_btn_6.click(fn=lambda: apply_preset("Subtle Enhancement"), inputs=[], outputs=all_sliders)
     generate_btn.click(
         fn=process_image,
         inputs=[
             input_image, prompt, negative_prompt, steps, guidance_scale,
+            depth_control_scale, canny_control_scale, lora_scale,
+            ip_adapter_scale, strength, enable_color_matching,
             consistency_mode, seed_input, enable_captions
         ],
         outputs=[output_image, caption_output]
         server_port=7860,
         share=True,
         show_api=True
+    )