Spaces:

yfyangd
/

CritiFusion

Running on Zero

App Files Files Community

czq0719 commited on 4 days ago

Commit

e571656

1 Parent(s): 6bffc9a

Add gradio app

Browse files

Files changed (1) hide show

app.py +264 -197

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # =========================
 # ONE-CELL: SDXL + CritiCore + SpecFusion + Gradio UI
-# - 3 variants only
-# - single-choice dropdown (no format_fn; compatible with older Gradio)
-# - generate ONE image per click
 # =========================
 import os, re, io, json, time, base64, asyncio, inspect, traceback
@@ -25,22 +26,30 @@ from diffusers import (
 os.environ["TOGETHER_NO_BANNER"] = "1"
 # =========================
-# 0) Variant labels (MUST be BEFORE Blocks)
 # =========================
-# Expose ONLY 3 variants in UI.
-# Map internal "criticore_on_multi_llm__specfusion" -> UI name "CritiFusion".
 VARIANT_LABELS = {
-    "base_original":  "Base (Original prompt)",
-    "base_multi_llm": "Base (Multi-LLM tag expansion)",
-    "CritiFusion":    "CritiFusion (Multi-LLM + VLM critique + SpecFusion)",
 }
-VARIANT_KEYS_UI = ["base_original", "base_multi_llm", "CritiFusion"]
-RHO_T_DEFAULT = 0.85  # fixed as requested
 TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY", "").strip()
 if not TOGETHER_API_KEY:
-    print("[Warn] TOGETHER_API_KEY is not set. Together steps will fail for base_multi_llm/CritiFusion.")
 # =========================
 # 1) SDXL init
@@ -105,7 +114,7 @@ def base_sample_latent(prompt: str, seed: int, H: int, W: int, neg: str):
     return z0, x0
 @torch.no_grad()
-def img2img_latent(prompt: str, image_or_latent, strength: float, guidance: float, steps: int, seed: int):
     g = torch.Generator(device=DEVICE).manual_seed(int(seed))
     out = SDXL_i2i(
         prompt=prompt,
@@ -115,7 +124,7 @@ def img2img_latent(prompt: str, image_or_latent, strength: float, guidance: floa
         num_inference_steps=int(steps),
         generator=g,
         output_type="latent",
-        negative_prompt=DEFAULT_NEG
     )
     return out.images
@@ -151,8 +160,7 @@ def clip77_strict(text: str, max_tok: int = 77) -> str:
         mid = (lo + hi) // 2
         cand = " ".join(words[:mid]) if mid > 0 else ""
         if _count_tokens(cand) <= max_tok:
-            best = cand
-            lo = mid + 1
         else:
             hi = mid - 1
     return best.strip()
@@ -165,8 +173,7 @@ def _dedup_keep_order(items: List[str]) -> List[str]:
     for t in items:
         key = re.sub(r"\s+", " ", t.lower()).strip()
         if key and key not in seen:
-            seen.add(key)
-            out.append(t.strip())
     return out
 def _order_tags(subject_first: List[str], rest: List[str]) -> List[str]:
@@ -188,10 +195,7 @@ def _order_tags(subject_first: List[str], rest: List[str]) -> List[str]:
         elif any(k in lt for k in detail_kw): buckets["detail"].append(t)
         else:                                 buckets["other"].append(t)
-    return (
-        buckets["subject"] + buckets["style"] + buckets["composition"] +
-        buckets["lighting"] + buckets["color"] + buckets["detail"] + buckets["other"]
-    )
 def pil_to_base64(img: Image.Image, fmt: str = "PNG") -> str:
     buf = io.BytesIO()
@@ -207,10 +211,8 @@ async def _maybe_close_async_together(client) -> None:
             if inspect.iscoroutinefunction(fn):
                 await fn()
             else:
-                try:
-                    fn()
-                except Exception:
-                    pass
     except Exception:
         pass
@@ -221,7 +223,7 @@ def _run_async(coro):
     try:
         loop = asyncio.get_event_loop()
         if loop.is_running():
-            return loop.run_until_complete(coro)
         return loop.run_until_complete(coro)
     except RuntimeError:
         return asyncio.run(coro)
@@ -278,10 +280,8 @@ def _TAG_RE(tag: str):
 def _extract_tag(text: str, tag: str, fallback: str = "") -> str:
     s = (text or "").strip()
-    r = _TAG_RE(tag)
-    m = r.search(s)
-    if m:
-        return m.group(1).strip()
     s2 = s.replace("&lt;","<").replace("&gt;",">")
     m2 = r.search(s2)
     return m2.group(1).strip() if m2 else fallback.strip()
@@ -303,22 +303,17 @@ class CritiCore:
     async def decompose_components(self, user_prompt: str) -> List[str]:
         client = AsyncTogether(api_key=os.environ["TOGETHER_API_KEY"])
         try:
-            tasks = [
-                client.chat.completions.create(
-                    model=m,
-                    messages=[{"role":"system","content": _DECOMP_SYS},
-                              {"role":"user","content": user_prompt}],
-                    temperature=0.4, max_tokens=256
-                )
-                for m in LLM_MULTI_CANDIDATES
-            ]
             rs = await asyncio.gather(*tasks, return_exceptions=True)
             texts = []
             for r in rs:
-                try:
-                    texts.append(r.choices[0].message.content)
-                except Exception:
-                    pass
             if not texts:
                 return []
             joined = "\n\n---\n\n".join(texts)
@@ -332,8 +327,8 @@ class CritiCore:
             try:
                 obj = json.loads(txt)
             except Exception:
-                s, e = txt.find("{"), txt.rfind("}")
-                obj = json.loads(txt[s:e+1]) if (s != -1 and e != -1) else {"components":[]}
             comps = [c.strip() for c in obj.get("components", []) if isinstance(c, str) and c.strip()]
             return comps[:6]
         finally:
@@ -345,23 +340,18 @@ class CritiCore:
         seed_pos = _dedup_keep_order(seed["seed_pos"])
         seed_neg = seed["seed_neg"]
         try:
-            tasks = [
-                client.chat.completions.create(
-                    model=m,
-                    messages=[{"role":"system","content": _TXT_SYS},
-                              {"role":"user","content":
-                               f"Short idea: {user_prompt}\nSeed: {', '.join(seed_pos)}\nOutput: a single comma-separated tag list."}],
-                    temperature=0.7, max_tokens=220
-                )
-                for m in LLM_MULTI_CANDIDATES
-            ]
             rs = await asyncio.gather(*tasks, return_exceptions=True)
             props = []
             for r in rs:
-                try:
-                    props.append(r.choices[0].message.content)
-                except Exception:
-                    pass
             if not props:
                 pos = ", ".join([user_prompt.strip()] + seed_pos)
@@ -382,6 +372,7 @@ class CritiCore:
                 ordered = _order_tags([tags[0]], tags[1:])
                 pos = ", ".join(_dedup_keep_order(ordered))
             for q in ["high detailed","sharp focus","8k","UHD"]:
                 if q.lower() not in {t.lower() for t in _split_tags(pos)}:
                     pos += ", " + q
@@ -409,7 +400,6 @@ class CritiCore:
                 "Output EXACTLY two tags:\n"
                 "<issues>...</issues>\n<refined>...</refined>"
             )
         try:
             tasks = []
             for m in VLM_CANDIDATES:
@@ -425,10 +415,8 @@ class CritiCore:
             rs = await asyncio.gather(*tasks, return_exceptions=True)
             ok = []
             for m, r in zip(VLM_CANDIDATES, rs):
-                try:
-                    ok.append((m, r.choices[0].message.content))
-                except Exception:
-                    pass
             if not ok:
                 return {"refined": original_prompt, "issues_merged": ""}
@@ -437,13 +425,11 @@ class CritiCore:
             for m, raw in ok:
                 issues = _extract_tag(raw, "issues", "")
                 refined = _extract_tag(raw, "refined", original_prompt)
-                if refined.strip():
-                    refined_items.append((m, refined.strip()))
-                if issues.strip():
-                    per_vlm_issues[m] = _summarize_issues_lines(issues, 5)
-            joined_issues  = "\n".join(f"[{m}] {t}" for m, t in per_vlm_issues.items())
-            joined_refined = "\n".join(f"[{m}] {t}" for m, t in refined_items) if refined_items else original_prompt
             merged = await client.chat.completions.create(
                 model=self.aggregator,
@@ -472,7 +458,7 @@ class CritiCore:
         return text
 # =========================
-# 5) SpecFusion
 # =========================
 @torch.no_grad()
 def frequency_fusion(
@@ -505,9 +491,13 @@ def frequency_fusion(
     x = x + torch.randn_like(x) * 0.001
     return x.to(dtype=x_hi_latent.dtype)
-def _decode_to_pil(latents, pipe):
-    out = decode_image_sdxl(latents, pipe)
-    return out if isinstance(out, Image.Image) else out.images[0]
 def _guidance_for_k(k: int) -> float:
     if k >= 20: return 12.0
@@ -515,116 +505,145 @@ def _guidance_for_k(k: int) -> float:
     return 5.2
 # =========================
-# 6) Shared + one-variant generator
 # =========================
-async def _shared_materials(user_prompt: str, seed: int, H: int, W: int, preset: str):
-    critic = CritiCore(preset=preset)
-    pos_tags_77, neg_tags = await critic.make_tags(user_prompt, clip77=True)
-    comps = await critic.decompose_components(user_prompt)
-    z0_og,  base_og  = base_sample_latent(user_prompt, seed=seed, H=H, W=W, neg=DEFAULT_NEG)
-    z0_enh, base_enh = base_sample_latent(pos_tags_77, seed=seed, H=H, W=W, neg=neg_tags)
-    vlm_out = await critic.vlm_refine(base_enh, pos_tags_77, comps or [])
-    vlm_agg_77 = vlm_out.get("refined") or pos_tags_77
-    return dict(
-        pos_tags_77=pos_tags_77, neg_tags=neg_tags, comps=comps,
-        z0_og=z0_og, base_og=base_og,
-        z0_enh=z0_enh, base_enh=base_enh,
-        vlm_agg_77=vlm_agg_77,
-        vlm_issues=vlm_out.get("issues_merged",""),
-    )
 async def generate_one_variant(
     user_prompt: str,
     seed: int,
-    H: int, W: int,
-    preset: str,
-    variant_key: str,
     total_steps_refine: int,
     last_k: int,
     guidance: float,
-    save_outputs: bool,
-    out_dir: str,
-):
-    shared = await _shared_materials(user_prompt, seed, H, W, preset)
-    pos_tags_77 = shared["pos_tags_77"]
-    neg_tags    = shared["neg_tags"]
-    comps       = shared["comps"]
-    z0_og       = shared["z0_og"]
-    base_og     = shared["base_og"]
-    z0_enh      = shared["z0_enh"]
-    base_enh    = shared["base_enh"]
-    vlm_agg_77  = shared["vlm_agg_77"]
-    vlm_issues  = shared["vlm_issues"]
-    out_path = Path(out_dir) if (save_outputs and out_dir) else None
-    if out_path is not None:
-        out_path.mkdir(parents=True, exist_ok=True)
-    meta = {
         "user_prompt": user_prompt,
-        "preset": preset,
         "variant_key": variant_key,
-        "variant_label": VARIANT_LABELS.get(variant_key, variant_key),
-        "seed": int(seed),
-        "H": int(H),
-        "W": int(W),
-        "pos_tags_77": pos_tags_77,
-        "neg_tags": neg_tags,
-        "components": comps,
-        "vlm_agg_77_on_multi_llm": vlm_agg_77,
-        "vlm_issues": vlm_issues,
-        "params": {
-            "total_steps_refine": int(total_steps_refine),
-            "last_k": int(last_k),
-            "guidance": float(guidance),
-            "rho_t": float(RHO_T_DEFAULT),
-        }
     }
     if variant_key == "base_original":
-        img = base_og
-        if out_path is not None:
-            img.save(out_path / "base_original.png")
-        return img, meta
-    if variant_key == "base_multi_llm":
-        img = base_enh
-        if out_path is not None:
-            img.save(out_path / "base_multi_llm.png")
-        return img, meta
     if variant_key == "CritiFusion":
-        lk = int(last_k)
-        strength = float(strength_for_last_k(lk, int(total_steps_refine)))
-        gscale   = float(guidance) if float(guidance) > 0 else float(_guidance_for_k(lk))
-        steps    = int(total_steps_refine)
         refined_on_enh = CritiCore.merge_vlm_multi_text(vlm_agg_77, pos_tags_77)
-        meta["refined_prompt_77"] = refined_on_enh
-        meta["img2img"] = {"strength": strength, "guidance_scale": gscale, "steps": steps}
         z_ref = img2img_latent(
             refined_on_enh, z0_enh,
-            strength=strength, guidance=gscale, steps=steps,
-            seed=int(seed) + 2100 + lk
         )
         fused_lat = frequency_fusion(z_ref, z0_enh, base_c=0.5, rho_t=RHO_T_DEFAULT, device=DEVICE)
-        img = _decode_to_pil(fused_lat, SDXL_i2i)
-        if out_path is not None:
-            img.save(out_path / "CritiFusion.png")
-            (out_path / "meta.json").write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
-        return img, meta
     raise ValueError(f"Unknown variant_key: {variant_key}")
 # =========================
-# 7) UI callback
 # =========================
 def ui_run_once(
     user_prompt: str,
@@ -632,55 +651,95 @@ def ui_run_once(
     H: int,
     W: int,
     preset: str,
-    variant_key: str,
     total_steps_refine: int,
     last_k: int,
     guidance: float,
     save_outputs: bool,
     out_dir: str,
 ):
     t0 = time.time()
     try:
         if not user_prompt or not user_prompt.strip():
-            return None, "Empty prompt."
-        # Only these require Together
-        if variant_key in ("base_multi_llm", "CritiFusion") and not TOGETHER_API_KEY:
-            return None, "ERROR: TOGETHER_API_KEY not set (required for this variant)."
-        img, meta = _run_async(generate_one_variant(
             user_prompt=user_prompt.strip(),
             seed=int(seed),
             H=int(H), W=int(W),
-            preset=preset,
-            variant_key=variant_key,
             total_steps_refine=int(total_steps_refine),
             last_k=int(last_k),
             guidance=float(guidance),
-            save_outputs=bool(save_outputs),
-            out_dir=str(out_dir or ""),
         ))
         meta["elapsed_sec"] = round(time.time() - t0, 3)
-        return img, json.dumps(meta, ensure_ascii=False, indent=2)
     except Exception:
-        return None, traceback.format_exc()
 @spaces.GPU
 def ui_run_once_gpu(*args, **kwargs):
     return ui_run_once(*args, **kwargs)
 # =========================
-# 8) Gradio UI (Dropdown: single choice; 3 variants only)
 # =========================
-VARIANT_CHOICES_DISPLAY = [VARIANT_LABELS[k] for k in VARIANT_KEYS_UI]
-DISPLAY_TO_KEY = {VARIANT_LABELS[k]: k for k in VARIANT_KEYS_UI}
 with gr.Blocks(title="CritiFusion (SDXL) Demo") as demo:
     gr.Markdown(
         "## CritiFusion Demo (SDXL)\n"
-        "- Choose **one** variant and generate **one** image per click.\n"
         f"- Device: **{DEVICE_STR}**, DType: **{DTYPE}**\n"
         f"- Together API: {'✅ set' if TOGETHER_API_KEY else '❌ missing (set TOGETHER_API_KEY)'}"
     )
@@ -689,7 +748,7 @@ with gr.Blocks(title="CritiFusion (SDXL) Demo") as demo:
         with gr.Column(scale=7):
             user_prompt = gr.Textbox(
                 label="Prompt",
-                value="A fluffy orange cat lying on a window ledge, front-facing, stylized in 3D Pixar look, soft indoor lighting",
                 lines=3,
             )
             with gr.Row():
@@ -698,42 +757,50 @@ with gr.Blocks(title="CritiFusion (SDXL) Demo") as demo:
             with gr.Row():
                 H = gr.Number(label="H", value=1024, precision=0)
                 W = gr.Number(label="W", value=1024, precision=0)
-            variant_display = gr.Dropdown(
-                label="Variant (select ONE)",
-                choices=VARIANT_CHOICES_DISPLAY,
-                value=VARIANT_LABELS["CritiFusion"],
-            )
             with gr.Row():
                 total_steps_refine = gr.Slider(label="total_steps_refine", minimum=10, maximum=80, step=1, value=50)
                 last_k = gr.Slider(label="last_k", minimum=1, maximum=50, step=1, value=37)
-            guidance = gr.Slider(label="Guidance (0 => fallback rule)", minimum=0.0, maximum=15.0, step=0.1, value=0.0)
             with gr.Row():
-                save_outputs = gr.Checkbox(label="Save outputs to disk", value=False)
                 out_dir = gr.Textbox(label="Output dir (only if save enabled)", value="./variants_demo_gradio")
-            run_btn = gr.Button("Generate", variant="primary")
         with gr.Column(scale=8):
-            out_img = gr.Image(label="Result", type="pil")
             meta_json = gr.Code(label="Meta / Debug (JSON)", language="json")
-    def _map_variant_display_to_key(vdisp: str) -> str:
-        return DISPLAY_TO_KEY.get(vdisp, "CritiFusion")
     run_btn.click(
-        fn=lambda p, s, h, w, pre, vdisp, ts, lk, g, sv, od: ui_run_once_gpu(
-            p, s, h, w, pre, _map_variant_display_to_key(vdisp), ts, lk, g, sv, od
-        ),
-        inputs=[user_prompt, seed, H, W, preset, variant_display, total_steps_refine, last_k, guidance, save_outputs, out_dir],
-        outputs=[out_img, meta_json],
-        api_name=False,
     )
 demo.queue().launch(
     debug=True,
-    share=True,
-    theme=gr.themes.Soft(),
 )

 # =========================
 # ONE-CELL: SDXL + CritiCore + SpecFusion + Gradio UI
+# - Keep original "Enabled Variants" pills UI (CheckboxGroup)
+# - Enforce: ONLY ONE can be selected at a time (auto-fix on change)
+# - 4 variants (but names are clearer)
+# - No Radio.format_fn (older gradio safe)
 # =========================
 import os, re, io, json, time, base64, asyncio, inspect, traceback
 os.environ["TOGETHER_NO_BANNER"] = "1"
 # =========================
+# 0) Variants (MUST be BEFORE Blocks)
 # =========================
+# internal_key -> UI display label
 VARIANT_LABELS = {
+    "base_original":                     "Base (Original Prompt)",
+    "base_multi_llm":                    "Base (MoA Tags)",
+    "CritiFusion":                       "CritiFusion (MoA+VLM+SpecFusion)",
+    "criticore_on_original__specfusion": "CritiFusion (Original+VLM+SpecFusion)",
 }
+# order for gallery display
+VARIANT_ORDER = [
+    VARIANT_LABELS["base_original"],
+    VARIANT_LABELS["base_multi_llm"],
+    VARIANT_LABELS["CritiFusion"],
+    VARIANT_LABELS["criticore_on_original__specfusion"],
+]
+RHO_T_DEFAULT = 0.85  # fixed
+# ---- SAFETY: do NOT hardcode API keys ----
 TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY", "").strip()
 if not TOGETHER_API_KEY:
+    print("[Warn] TOGETHER_API_KEY is not set. Together-based variants will error if selected.")
 # =========================
 # 1) SDXL init
     return z0, x0
 @torch.no_grad()
+def img2img_latent(prompt: str, image_or_latent, strength: float, guidance: float, steps: int, seed: int, neg: str):
     g = torch.Generator(device=DEVICE).manual_seed(int(seed))
     out = SDXL_i2i(
         prompt=prompt,
         num_inference_steps=int(steps),
         generator=g,
         output_type="latent",
+        negative_prompt=neg
     )
     return out.images
         mid = (lo + hi) // 2
         cand = " ".join(words[:mid]) if mid > 0 else ""
         if _count_tokens(cand) <= max_tok:
+            best = cand; lo = mid + 1
         else:
             hi = mid - 1
     return best.strip()
     for t in items:
         key = re.sub(r"\s+", " ", t.lower()).strip()
         if key and key not in seen:
+            seen.add(key); out.append(t.strip())
     return out
 def _order_tags(subject_first: List[str], rest: List[str]) -> List[str]:
         elif any(k in lt for k in detail_kw): buckets["detail"].append(t)
         else:                                 buckets["other"].append(t)
+    return buckets["subject"] + buckets["style"] + buckets["composition"] + buckets["lighting"] + buckets["color"] + buckets["detail"] + buckets["other"]
 def pil_to_base64(img: Image.Image, fmt: str = "PNG") -> str:
     buf = io.BytesIO()
             if inspect.iscoroutinefunction(fn):
                 await fn()
             else:
+                try: fn()
+                except Exception: pass
     except Exception:
         pass
     try:
         loop = asyncio.get_event_loop()
         if loop.is_running():
+            return loop.run_until_complete(coro)  # nest_asyncio enabled
         return loop.run_until_complete(coro)
     except RuntimeError:
         return asyncio.run(coro)
 def _extract_tag(text: str, tag: str, fallback: str = "") -> str:
     s = (text or "").strip()
+    r = _TAG_RE(tag); m = r.search(s)
+    if m: return m.group(1).strip()
     s2 = s.replace("&lt;","<").replace("&gt;",">")
     m2 = r.search(s2)
     return m2.group(1).strip() if m2 else fallback.strip()
     async def decompose_components(self, user_prompt: str) -> List[str]:
         client = AsyncTogether(api_key=os.environ["TOGETHER_API_KEY"])
         try:
+            tasks = [client.chat.completions.create(
+                model=m,
+                messages=[{"role":"system","content": _DECOMP_SYS},
+                          {"role":"user","content": user_prompt}],
+                temperature=0.4, max_tokens=256
+            ) for m in LLM_MULTI_CANDIDATES]
             rs = await asyncio.gather(*tasks, return_exceptions=True)
             texts = []
             for r in rs:
+                try: texts.append(r.choices[0].message.content)
+                except Exception: pass
             if not texts:
                 return []
             joined = "\n\n---\n\n".join(texts)
             try:
                 obj = json.loads(txt)
             except Exception:
+                s,e = txt.find("{"), txt.rfind("}")
+                obj = json.loads(txt[s:e+1]) if (s!=-1 and e!=-1) else {"components":[]}
             comps = [c.strip() for c in obj.get("components", []) if isinstance(c, str) and c.strip()]
             return comps[:6]
         finally:
         seed_pos = _dedup_keep_order(seed["seed_pos"])
         seed_neg = seed["seed_neg"]
         try:
+            tasks = [client.chat.completions.create(
+                model=m,
+                messages=[{"role":"system","content": _TXT_SYS},
+                          {"role":"user","content":
+                           f"Short idea: {user_prompt}\nSeed: {', '.join(seed_pos)}\nOutput: a single comma-separated tag list."}],
+                temperature=0.7, max_tokens=220
+            ) for m in LLM_MULTI_CANDIDATES]
             rs = await asyncio.gather(*tasks, return_exceptions=True)
             props = []
             for r in rs:
+                try: props.append(r.choices[0].message.content)
+                except Exception: pass
             if not props:
                 pos = ", ".join([user_prompt.strip()] + seed_pos)
                 ordered = _order_tags([tags[0]], tags[1:])
                 pos = ", ".join(_dedup_keep_order(ordered))
+            # quality floor
             for q in ["high detailed","sharp focus","8k","UHD"]:
                 if q.lower() not in {t.lower() for t in _split_tags(pos)}:
                     pos += ", " + q
                 "Output EXACTLY two tags:\n"
                 "<issues>...</issues>\n<refined>...</refined>"
             )
         try:
             tasks = []
             for m in VLM_CANDIDATES:
             rs = await asyncio.gather(*tasks, return_exceptions=True)
             ok = []
             for m, r in zip(VLM_CANDIDATES, rs):
+                try: ok.append((m, r.choices[0].message.content))
+                except Exception: pass
             if not ok:
                 return {"refined": original_prompt, "issues_merged": ""}
             for m, raw in ok:
                 issues = _extract_tag(raw, "issues", "")
                 refined = _extract_tag(raw, "refined", original_prompt)
+                if refined.strip(): refined_items.append((m, refined.strip()))
+                if issues.strip():  per_vlm_issues[m] = _summarize_issues_lines(issues, 5)
+            joined_issues  = "\n".join(f"[{m}] {t}" for m,t in per_vlm_issues.items())
+            joined_refined = "\n".join(f"[{m}] {t}" for m,t in refined_items) if refined_items else original_prompt
             merged = await client.chat.completions.create(
                 model=self.aggregator,
         return text
 # =========================
+# 5) SpecFusion (latent FFT gate)
 # =========================
 @torch.no_grad()
 def frequency_fusion(
     x = x + torch.randn_like(x) * 0.001
     return x.to(dtype=x_hi_latent.dtype)
+def _decode_to_pil(latents):
+    out = decode_image_sdxl(latents, SDXL_i2i)
+    if isinstance(out, Image.Image):
+        return out
+    if hasattr(out, "images"):
+        return out.images[0]
+    return out
 def _guidance_for_k(k: int) -> float:
     if k >= 20: return 12.0
     return 5.2
 # =========================
+# 6) ONE-variant generator (because UI enforces single selection)
 # =========================
 async def generate_one_variant(
     user_prompt: str,
     seed: int,
+    H: int,
+    W: int,
     total_steps_refine: int,
     last_k: int,
     guidance: float,
+    preset: str,
+    variant_key: str,
+    out_dir: Optional[Path] = None,
+) -> Tuple[Image.Image, str, Dict[str, object]]:
+    """
+    Returns:
+      img, display_name, meta_dict
+    """
+    meta: Dict[str, object] = {
         "user_prompt": user_prompt,
         "variant_key": variant_key,
     }
+    def _save(im: Image.Image, display_name: str):
+        if out_dir is None:
+            return
+        out_dir.mkdir(parents=True, exist_ok=True)
+        safe = re.sub(r"[^a-zA-Z0-9_\\-]+", "_", display_name)[:120]
+        im.save(out_dir / f"{safe}.png")
+    # ----------------------------------------------------------
+    # Variant 1: Base (Original Prompt)  [NO Together needed]
+    # ----------------------------------------------------------
     if variant_key == "base_original":
+        z0_og, base_og = base_sample_latent(user_prompt, seed=seed, H=H, W=W, neg=DEFAULT_NEG)
+        meta.update({"note": "SDXL base generation from original prompt."})
+        _save(base_og, VARIANT_LABELS[variant_key])
+        return base_og, VARIANT_LABELS[variant_key], meta
+    # The rest need Together
+    if not TOGETHER_API_KEY:
+        raise RuntimeError("TOGETHER_API_KEY not set, but selected variant requires Together.")
+    critic = CritiCore(preset=preset)
+    # Common refine params
+    lk = int(last_k)
+    strength = float(strength_for_last_k(lk, total_steps_refine))
+    use_guidance = float(guidance) if float(guidance) > 0 else float(_guidance_for_k(lk))
+    steps = int(total_steps_refine)
+    meta.update({"strength": strength, "guidance": use_guidance, "steps": steps, "last_k": lk})
+    # ----------------------------------------------------------
+    # Variant 2: Base (MoA Tags)
+    # ----------------------------------------------------------
+    if variant_key == "base_multi_llm":
+        pos_tags_77, neg_tags = await critic.make_tags(user_prompt, clip77=True)
+        z0_enh, base_enh = base_sample_latent(pos_tags_77, seed=seed, H=H, W=W, neg=neg_tags)
+        meta.update({
+            "pos_tags_77": pos_tags_77,
+            "neg_tags": neg_tags,
+            "note": "SDXL base generation from MoA-generated tags."
+        })
+        _save(base_enh, VARIANT_LABELS[variant_key])
+        return base_enh, VARIANT_LABELS[variant_key], meta
+    # ----------------------------------------------------------
+    # Variant 3: CritiFusion (MoA+VLM+SpecFusion)
+    # ----------------------------------------------------------
     if variant_key == "CritiFusion":
+        pos_tags_77, neg_tags = await critic.make_tags(user_prompt, clip77=True)
+        comps = await critic.decompose_components(user_prompt)
+        z0_enh, base_enh = base_sample_latent(pos_tags_77, seed=seed, H=H, W=W, neg=neg_tags)
+        vlm_out = await critic.vlm_refine(base_enh, pos_tags_77, comps or [])
+        vlm_agg_77 = vlm_out.get("refined") or pos_tags_77
         refined_on_enh = CritiCore.merge_vlm_multi_text(vlm_agg_77, pos_tags_77)
         z_ref = img2img_latent(
             refined_on_enh, z0_enh,
+            strength=strength, guidance=use_guidance, steps=steps,
+            seed=seed + 2100 + lk,
+            neg=DEFAULT_NEG
         )
         fused_lat = frequency_fusion(z_ref, z0_enh, base_c=0.5, rho_t=RHO_T_DEFAULT, device=DEVICE)
+        img_sf = _decode_to_pil(fused_lat)
+        meta.update({
+            "pos_tags_77": pos_tags_77,
+            "neg_tags": neg_tags,
+            "components": comps,
+            "vlm_refined_77": vlm_agg_77,
+            "enhanced_prompt_77": refined_on_enh,
+            "vlm_issues": vlm_out.get("issues_merged", ""),
+            "note": "MoA tags + VLM critique prompt + img2img + SpecFusion."
+        })
+        _save(img_sf, VARIANT_LABELS[variant_key])
+        return img_sf, VARIANT_LABELS[variant_key], meta
+    # ----------------------------------------------------------
+    # Variant 4: CritiFusion (Original+VLM+SpecFusion)
+    # ----------------------------------------------------------
+    if variant_key == "criticore_on_original__specfusion":
+        pos_tags_77, neg_tags = await critic.make_tags(user_prompt, clip77=True)
+        comps = await critic.decompose_components(user_prompt)
+        z0_og, base_og = base_sample_latent(user_prompt, seed=seed, H=H, W=W, neg=DEFAULT_NEG)
+        vlm_on_og = await critic.vlm_refine(base_og, user_prompt, comps or [])
+        refined_og_77 = clip77_strict(vlm_on_og.get("refined") or user_prompt, 77)
+        refined_merge = CritiCore.merge_vlm_multi_text(refined_og_77, pos_tags_77)
+        z_ref = img2img_latent(
+            refined_merge, z0_og,
+            strength=strength, guidance=use_guidance, steps=steps,
+            seed=seed + 2400 + lk,
+            neg=DEFAULT_NEG
+        )
+        fused_lat = frequency_fusion(z_ref, z0_og, base_c=0.5, rho_t=RHO_T_DEFAULT, device=DEVICE)
+        img_sf = _decode_to_pil(fused_lat)
+        meta.update({
+            "pos_tags_77": pos_tags_77,
+            "neg_tags": neg_tags,
+            "components": comps,
+            "vlm_refined_77": refined_og_77,
+            "enhanced_prompt_77": refined_merge,
+            "vlm_issues": vlm_on_og.get("issues_merged", ""),
+            "note": "Original prompt + VLM critique prompt + img2img + SpecFusion."
+        })
+        _save(img_sf, VARIANT_LABELS[variant_key])
+        return img_sf, VARIANT_LABELS[variant_key], meta
     raise ValueError(f"Unknown variant_key: {variant_key}")
 # =========================
+# 7) UI callbacks
 # =========================
 def ui_run_once(
     user_prompt: str,
     H: int,
     W: int,
     preset: str,
     total_steps_refine: int,
     last_k: int,
     guidance: float,
+    enabled_variants_display: List[str],
     save_outputs: bool,
     out_dir: str,
 ):
     t0 = time.time()
     try:
         if not user_prompt or not user_prompt.strip():
+            return [], "Empty prompt."
+        # display -> internal
+        display_to_internal = {v: k for k, v in VARIANT_LABELS.items()}
+        chosen_display = (enabled_variants_display or [])[-1:]  # enforce single here too
+        if not chosen_display:
+            return [], "Please select ONE variant."
+        chosen_display = chosen_display[0]
+        variant_key = display_to_internal.get(chosen_display)
+        if variant_key is None:
+            return [], f"Unknown selected variant: {chosen_display}"
+        out_path = Path(out_dir) if (save_outputs and out_dir) else None
+        img, disp_name, meta = _run_async(generate_one_variant(
             user_prompt=user_prompt.strip(),
             seed=int(seed),
             H=int(H), W=int(W),
             total_steps_refine=int(total_steps_refine),
             last_k=int(last_k),
             guidance=float(guidance),
+            preset=preset,
+            variant_key=variant_key,
+            out_dir=out_path,
         ))
+        meta["ui"] = {
+            "seed": int(seed),
+            "H": int(H),
+            "W": int(W),
+            "preset": preset,
+            "total_steps_refine": int(total_steps_refine),
+            "last_k": int(last_k),
+            "guidance": float(guidance),
+            "selected_variant": chosen_display,
+            "save_outputs": bool(save_outputs),
+            "out_dir": out_dir if save_outputs else None,
+        }
         meta["elapsed_sec"] = round(time.time() - t0, 3)
+        gallery = [(img, disp_name)]
+        return gallery, json.dumps(meta, ensure_ascii=False, indent=2)
     except Exception:
+        return [], traceback.format_exc()
 @spaces.GPU
 def ui_run_once_gpu(*args, **kwargs):
     return ui_run_once(*args, **kwargs)
 # =========================
+# 8) Single-select enforcement for CheckboxGroup
 # =========================
+def enforce_single_variant(new_list: List[str], prev_list: List[str]):
+    new_list = new_list or []
+    prev_list = prev_list or []
+    new_set = set(new_list)
+    prev_set = set(prev_list)
+    added = list(new_set - prev_set)
+    if added:
+        # keep the newly added one
+        chosen = added[-1]
+        out = [chosen]
+    else:
+        # no added; maybe removed or same; if multi exists, keep last item
+        out = new_list[-1:] if len(new_list) > 1 else new_list
+    return out, out  # update checkbox value + state
+# =========================
+# 9) Gradio UI
+# =========================
 with gr.Blocks(title="CritiFusion (SDXL) Demo") as demo:
     gr.Markdown(
         "## CritiFusion Demo (SDXL)\n"
+        "- Keep **Enabled Variants** pills UI, but **only one** can be selected.\n"
         f"- Device: **{DEVICE_STR}**, DType: **{DTYPE}**\n"
         f"- Together API: {'✅ set' if TOGETHER_API_KEY else '❌ missing (set TOGETHER_API_KEY)'}"
     )
         with gr.Column(scale=7):
             user_prompt = gr.Textbox(
                 label="Prompt",
+                value="A fluffy orange cat lying on a window ledge, front-facing, stylized 3D, soft indoor lighting",
                 lines=3,
             )
             with gr.Row():
             with gr.Row():
                 H = gr.Number(label="H", value=1024, precision=0)
                 W = gr.Number(label="W", value=1024, precision=0)
             with gr.Row():
                 total_steps_refine = gr.Slider(label="total_steps_refine", minimum=10, maximum=80, step=1, value=50)
                 last_k = gr.Slider(label="last_k", minimum=1, maximum=50, step=1, value=37)
+            guidance = gr.Slider(
+                label="Guidance (0 => fallback rule)",
+                minimum=0.0, maximum=15.0, step=0.1, value=0.0
+            )
+            # --- pills UI, but single-select enforced ---
+            selected_state = gr.State([VARIANT_LABELS["base_original"]])
+            enabled_variants = gr.CheckboxGroup(
+                label="Enabled Variants (select ONE)",
+                choices=[VARIANT_LABELS[k] for k in VARIANT_LABELS.keys()],
+                value=[VARIANT_LABELS["base_original"]],
+            )
+            # enforce single selection on change
+            enabled_variants.change(
+                fn=enforce_single_variant,
+                inputs=[enabled_variants, selected_state],
+                outputs=[enabled_variants, selected_state],
+            )
             with gr.Row():
+                save_outputs = gr.Checkbox(label="Save output to disk", value=False)
                 out_dir = gr.Textbox(label="Output dir (only if save enabled)", value="./variants_demo_gradio")
+            run_btn = gr.Button("Run", variant="primary")
         with gr.Column(scale=8):
+            gallery = gr.Gallery(label="Result", columns=1, height=600)
             meta_json = gr.Code(label="Meta / Debug (JSON)", language="json")
     run_btn.click(
+        fn=ui_run_once_gpu,
+        inputs=[user_prompt, seed, H, W, preset, total_steps_refine, last_k, guidance, enabled_variants, save_outputs, out_dir],
+        outputs=[gallery, meta_json],
+        api_name=False,   # gradio-safe (avoid schema issues)
     )
 demo.queue().launch(
     debug=True,
+    share=True,      # optional; helps if you run outside Spaces
+    show_api=False,
 )