|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import dataclasses |
|
import json |
|
import base64 |
|
import io |
|
from pathlib import Path |
|
|
|
import gradio as gr |
|
import torch |
|
import spaces |
|
from PIL import Image as PILImage |
|
from fastapi import FastAPI, Body |
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
|
from uno.flux.pipeline import UNOPipeline |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
app.add_middleware( |
|
CORSMiddleware, |
|
allow_origins=["*"], |
|
allow_credentials=True, |
|
allow_methods=["*"], |
|
allow_headers=["*"], |
|
) |
|
|
|
|
|
pipeline = None |
|
|
|
def get_examples(examples_dir: str = "assets/examples") -> list: |
|
examples = Path(examples_dir) |
|
ans = [] |
|
for example in examples.iterdir(): |
|
if not example.is_dir(): |
|
continue |
|
with open(example / "config.json") as f: |
|
example_dict = json.load(f) |
|
|
|
|
|
example_list = [] |
|
|
|
example_list.append(example_dict["useage"]) |
|
example_list.append(example_dict["prompt"]) |
|
|
|
for key in ["image_ref1", "image_ref2", "image_ref3", "image_ref4"]: |
|
if key in example_dict: |
|
example_list.append(str(example / example_dict[key])) |
|
else: |
|
example_list.append(None) |
|
|
|
example_list.append(example_dict["seed"]) |
|
|
|
ans.append(example_list) |
|
return ans |
|
|
|
|
|
def create_demo( |
|
model_type: str, |
|
device: str = "cuda" if torch.cuda.is_available() else "cpu", |
|
offload: bool = False, |
|
): |
|
global pipeline |
|
pipeline = UNOPipeline(model_type, device, offload, only_lora=True, lora_rank=512) |
|
pipeline.gradio_generate = spaces.GPU(duratioin=120)(pipeline.gradio_generate) |
|
|
|
|
|
css = """ |
|
.gradio-container { |
|
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; |
|
} |
|
|
|
.main-header { |
|
text-align: center; |
|
margin-bottom: 2rem; |
|
background: linear-gradient(to right, #4776E6, #8E54E9); |
|
-webkit-background-clip: text; |
|
-webkit-text-fill-color: transparent; |
|
font-weight: 700; |
|
padding: 1rem 0; |
|
} |
|
|
|
.container { |
|
border-radius: 12px; |
|
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1); |
|
padding: 20px; |
|
background: white; |
|
margin-bottom: 1.5rem; |
|
} |
|
|
|
.input-container { |
|
background: rgba(245, 247, 250, 0.7); |
|
border-radius: 10px; |
|
padding: 1rem; |
|
margin-bottom: 1rem; |
|
} |
|
|
|
.image-grid { |
|
display: grid; |
|
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); |
|
gap: 10px; |
|
} |
|
|
|
.generate-btn { |
|
background: linear-gradient(90deg, #4776E6, #8E54E9); |
|
border: none; |
|
color: white; |
|
padding: 10px 20px; |
|
border-radius: 50px; |
|
font-weight: 600; |
|
box-shadow: 0 4px 10px rgba(0,0,0,0.1); |
|
transition: all 0.3s ease; |
|
} |
|
|
|
.generate-btn:hover { |
|
transform: translateY(-2px); |
|
box-shadow: 0 6px 15px rgba(0,0,0,0.15); |
|
} |
|
|
|
.badge-container { |
|
display: flex; |
|
justify-content: center; |
|
align-items: center; |
|
gap: 8px; |
|
flex-wrap: wrap; |
|
margin-bottom: 1rem; |
|
} |
|
|
|
.badge { |
|
display: inline-block; |
|
padding: 0.25rem 0.75rem; |
|
font-size: 0.875rem; |
|
font-weight: 500; |
|
line-height: 1.5; |
|
text-align: center; |
|
white-space: nowrap; |
|
vertical-align: middle; |
|
border-radius: 30px; |
|
color: white; |
|
background: #6c5ce7; |
|
text-decoration: none; |
|
} |
|
|
|
.output-container { |
|
background: rgba(243, 244, 246, 0.7); |
|
border-radius: 10px; |
|
padding: 1.5rem; |
|
} |
|
|
|
.slider-container label { |
|
font-weight: 600; |
|
margin-bottom: 0.5rem; |
|
color: #4a5568; |
|
} |
|
""" |
|
|
|
badges_text = r""" |
|
<div class="badge-container"> |
|
<a href="https://github.com/bytedance/UNO" class="badge" style="background: #24292e;"><img alt="GitHub Stars" src="https://img.shields.io/github/stars/bytedance/UNO" style="vertical-align: middle;"></a> |
|
<a href="https://bytedance.github.io/UNO/" class="badge" style="background: #f1c40f; color: #333;"><img alt="Project Page" src="https://img.shields.io/badge/Project%20Page-UNO-yellow" style="vertical-align: middle;"></a> |
|
<a href="https://arxiv.org/abs/2504.02160" class="badge" style="background: #b31b1b;"><img alt="arXiv" src="https://img.shields.io/badge/arXiv%20paper-UNO-b31b1b.svg" style="vertical-align: middle;"></a> |
|
<a href="https://huggingface.co./bytedance-research/UNO" class="badge" style="background: #FF9D00;"><img src="https://img.shields.io/static/v1?label=%F0%9F%A4%97%20Hugging%20Face&message=Model&color=orange" style="vertical-align: middle;"></a> |
|
<a href="https://huggingface.co./spaces/bytedance-research/UNO-FLUX" class="badge" style="background: #FF9D00;"><img src="https://img.shields.io/static/v1?label=%F0%9F%A4%97%20Hugging%20Face&message=demo&color=orange" style="vertical-align: middle;"></a> |
|
</div> |
|
""".strip() |
|
|
|
with gr.Blocks(css=css) as demo: |
|
gr.Markdown("# <div class='main-header'>UNO-FLUX Image Generator</div>") |
|
gr.Markdown(badges_text) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=3): |
|
with gr.Group(elem_classes="container"): |
|
prompt = gr.Textbox( |
|
label="Prompt", |
|
placeholder="Describe the image you want to generate...", |
|
value="handsome woman in the city", |
|
elem_classes="input-container" |
|
) |
|
|
|
gr.Markdown("### Reference Images") |
|
with gr.Row(elem_classes="image-grid"): |
|
image_prompt1 = gr.Image(label="Ref Img 1", visible=True, interactive=True, type="pil") |
|
image_prompt2 = gr.Image(label="Ref Img 2", visible=True, interactive=True, type="pil") |
|
image_prompt3 = gr.Image(label="Ref Img 3", visible=True, interactive=True, type="pil") |
|
image_prompt4 = gr.Image(label="Ref Img 4", visible=True, interactive=True, type="pil") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
with gr.Group(elem_classes="slider-container"): |
|
width = gr.Slider(512, 2048, 512, step=16, label="Generation Width") |
|
height = gr.Slider(512, 2048, 512, step=16, label="Generation Height") |
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown("<div style='background: #f8f9fa; padding: 10px; border-radius: 8px; border-left: 4px solid #4776E6;'>📌 The model was trained on 512x512 resolution.<br>Sizes closer to 512 are more stable, higher sizes give better visual effects but are less stable.</div>") |
|
|
|
with gr.Accordion("Advanced Options", open=False): |
|
with gr.Row(): |
|
with gr.Column(): |
|
num_steps = gr.Slider(1, 50, 25, step=1, label="Number of steps") |
|
with gr.Column(): |
|
guidance = gr.Slider(1.0, 5.0, 4.0, step=0.1, label="Guidance", interactive=True) |
|
with gr.Column(): |
|
seed = gr.Number(-1, label="Seed (-1 for random)") |
|
|
|
generate_btn = gr.Button("Generate", elem_classes="generate-btn") |
|
|
|
with gr.Column(scale=2): |
|
with gr.Group(elem_classes="output-container"): |
|
gr.Markdown("### Generated Result") |
|
output_image = gr.Image(label="Generated Image") |
|
download_btn = gr.File(label="Download full-resolution", type="filepath", interactive=False) |
|
|
|
|
|
inputs = [ |
|
prompt, width, height, guidance, num_steps, |
|
seed, image_prompt1, image_prompt2, image_prompt3, image_prompt4 |
|
] |
|
generate_btn.click( |
|
fn=pipeline.gradio_generate, |
|
inputs=inputs, |
|
outputs=[output_image, download_btn], |
|
) |
|
|
|
example_text = gr.Text("", visible=False, label="Case For:") |
|
examples = get_examples("./assets/examples") |
|
|
|
with gr.Group(elem_classes="container"): |
|
gr.Markdown("### <div style='text-align: center; margin-bottom: 1rem;'>Examples</div>") |
|
gr.Examples( |
|
examples=examples, |
|
inputs=[ |
|
example_text, prompt, |
|
image_prompt1, image_prompt2, image_prompt3, image_prompt4, |
|
seed, output_image |
|
], |
|
) |
|
|
|
|
|
with gr.Accordion("API Documentation", open=False): |
|
gr.Markdown(""" |
|
### API Usage |
|
|
|
You can use the following endpoint to generate images programmatically: |
|
|
|
**Endpoint:** `/api/generate` |
|
|
|
**Method:** POST |
|
|
|
**Request Body:** |
|
```json |
|
{ |
|
"prompt": "your text prompt", |
|
"image_refs": ["base64_encoded_image1", "base64_encoded_image2", ...], |
|
"width": 512, |
|
"height": 512, |
|
"guidance": 4.0, |
|
"num_steps": 25, |
|
"seed": -1 |
|
} |
|
``` |
|
|
|
**Response:** |
|
```json |
|
{ |
|
"image": "base64_encoded_generated_image" |
|
} |
|
``` |
|
|
|
**Example JavaScript Usage:** |
|
```javascript |
|
async function generateImage() { |
|
const response = await fetch('/api/generate', { |
|
method: 'POST', |
|
headers: { |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify({ |
|
prompt: "handsome woman in the city", |
|
image_refs: [], |
|
width: 512, |
|
height: 512 |
|
}), |
|
}); |
|
|
|
const data = await response.json(); |
|
const imgElement = document.getElementById('generatedImage'); |
|
imgElement.src = `data:image/png;base64,${data.image}`; |
|
} |
|
``` |
|
""") |
|
|
|
return demo |
|
|
|
|
|
@app.post("/api/generate") |
|
async def generate_image( |
|
prompt: str = Body(...), |
|
width: int = Body(512), |
|
height: int = Body(512), |
|
guidance: float = Body(4.0), |
|
num_steps: int = Body(25), |
|
seed: int = Body(-1), |
|
image_refs: list = Body([]) |
|
): |
|
global pipeline |
|
|
|
ref_images = [] |
|
for i in range(min(4, len(image_refs))): |
|
if image_refs[i]: |
|
try: |
|
|
|
if isinstance(image_refs[i], str) and "base64" in image_refs[i]: |
|
|
|
if "," in image_refs[i]: |
|
img_data = image_refs[i].split(",")[1] |
|
else: |
|
img_data = image_refs[i] |
|
|
|
img_data = base64.b64decode(img_data) |
|
ref_img = PILImage.open(io.BytesIO(img_data)) |
|
ref_images.append(ref_img) |
|
else: |
|
ref_images.append(None) |
|
except: |
|
ref_images.append(None) |
|
else: |
|
ref_images.append(None) |
|
|
|
|
|
while len(ref_images) < 4: |
|
ref_images.append(None) |
|
|
|
|
|
result_image, _ = pipeline.gradio_generate( |
|
prompt, width, height, guidance, num_steps, seed, |
|
ref_images[0], ref_images[1], ref_images[2], ref_images[3] |
|
) |
|
|
|
|
|
buffered = io.BytesIO() |
|
result_image.save(buffered, format="PNG") |
|
img_str = base64.b64encode(buffered.getvalue()).decode() |
|
|
|
return {"image": img_str} |
|
|
|
if __name__ == "__main__": |
|
from typing import Literal |
|
import uvicorn |
|
from transformers import HfArgumentParser |
|
|
|
@dataclasses.dataclass |
|
class AppArgs: |
|
name: Literal["flux-dev", "flux-dev-fp8", "flux-schnell"] = "flux-dev" |
|
device: Literal["cuda", "cpu"] = "cuda" if torch.cuda.is_available() else "cpu" |
|
offload: bool = dataclasses.field( |
|
default=False, |
|
metadata={"help": "If True, sequantial offload the models(ae, dit, text encoder) to CPU if not used."} |
|
) |
|
port: int = 7860 |
|
host: str = "0.0.0.0" |
|
|
|
parser = HfArgumentParser([AppArgs]) |
|
args_tuple = parser.parse_args_into_dataclasses() |
|
args = args_tuple[0] |
|
|
|
|
|
demo = create_demo(args.name, args.device, args.offload) |
|
|
|
|
|
app = gr.mount_gradio_app(app, demo, path="/") |
|
|
|
|
|
uvicorn.run(app, host=args.host, port=args.port) |