Spaces:

3DAIGC
/

MotionShop2

Running

App Files Files Community

阿灰 commited on 14 days ago

Commit

9b9835e

1 Parent(s): 5b31657

init

Browse files

Files changed (21) hide show

.gitattributes +2 -0
app.py +443 -0
files/example_images/ai_woman1.jpg +3 -0
files/example_images/ai_woman2.jpg +3 -0
files/example_images/anime.jpg +3 -0
files/example_images/anime2.jpg +3 -0
files/example_images/basket.jpg +3 -0
files/example_images/eins.jpg +3 -0
files/example_images/girl1.jpg +3 -0
files/example_images/joker.jpg +3 -0
files/example_images/robot.jpg +3 -0
files/example_images/zz.jpg +3 -0
files/example_videos/cai.mp4 +3 -0
files/example_videos/girl.mp4 +3 -0
files/example_videos/lee.mp4 +3 -0
files/example_videos/ma.mp4 +3 -0
files/example_videos/mimo1_origin.mp4 +3 -0
files/example_videos/mimo2_origin.mp4 +3 -0
files/example_videos/play_basketball.mp4 +3 -0
files/example_videos/wushu.mp4 +3 -0
requirements.txt +7 -0

.gitattributes CHANGED Viewed

@@ -1,3 +1,5 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,443 @@

+import gradio as gr
+import requests
+import os
+import time
+import json
+from datetime import datetime
+import oss2
+import cv2
+from pathlib import Path
+import decord
+from gradio.utils import get_cache_folder
+cache_version = 20250325
+dashscope_api_key = os.getenv("API_KEY","")
+class Examples(gr.helpers.Examples):
+    def __init__(self, *args, directory_name=None, **kwargs):
+        super().__init__(*args, **kwargs, _initiated_directly=False)
+        if directory_name is not None:
+            self.cached_folder = get_cache_folder() / directory_name
+            self.cached_file = Path(self.cached_folder) / "log.csv"
+        self.create()
+def upload_to_oss(local_file_path, remote_file_path, expire_time=3600):
+    remote_url = "motionshop/%s/%s" %(datetime.now().strftime("%Y%m%d"), remote_file_path)
+    for i in range(5):
+        try:
+            from oss2.credentials import EnvironmentVariableCredentialsProvider
+            auth = oss2.ProviderAuth(EnvironmentVariableCredentialsProvider())
+            bucket = oss2.Bucket(auth, 'oss-cn-hangzhou.aliyuncs.com', 'virtualbuy-devo')
+            bucket.put_object_from_file(key=remote_url, filename=local_file_path)
+            break
+        except Exception as e:
+            if i < 4:  # If this is not the last retry
+                time.sleep(2)  # Wait for 2 second before next retry
+                continue
+            else:  # If this is the last retry and it still fails
+                raise e
+    return bucket.sign_url('GET', remote_url, expire_time)
+def get_url(filepath):
+    filename = os.path.basename(filepath)
+    remote_file_path = "test/%s" %filename
+    return upload_to_oss(filepath, remote_file_path)
+def online_detect(filepath):
+    url = "https://poc-dashscope.aliyuncs.com/api/v1/services/default/default/default"
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": "Bearer {}".format(dashscope_api_key)
+    }
+    data = {
+        "model": "pre-motionshop-detect-gradio",
+        "input": {
+            "video_url": filepath
+        },
+        "parameters": {
+            "threshold": 0.4,
+            "min_area_ratio": 0.001
+        }
+    }
+    print("Call detect api, params: " + json.dumps(data))
+    query_result_request = requests.post(
+        url,
+        json=data,
+        headers=headers
+    )
+    print("Detect api returned: " + query_result_request.text)
+    return json.loads(query_result_request.text)
+def online_render(filepath, frame_id, bbox, replacement_ids, cache_url=None, model="pre-motionshop-render-gradio"):
+    url = "https://poc-dashscope.aliyuncs.com/api/v1/services/async-default/async-default/async-default"
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": "Bearer {}".format(dashscope_api_key),
+        "X-DashScope-Async": "enable"
+    }
+    data = {
+        "model": model,
+        # "model": "pre-motionshop-render-gradio",
+        "input": {
+            "video_url": filepath,
+            "frame_index": frame_id,
+            "bbox": bbox,
+            "replacement_id": replacement_ids
+        },
+        "parameters": {
+        }
+    }
+    if cache_url is not None:
+        data["input"]["cache_url"] = cache_url
+    print("Call render video api with params: " + json.dumps(data))
+    query_result_request = requests.post(
+        url,
+        json=data,
+        headers=headers
+    )
+    print("Render video api returned: " + query_result_request.text)
+    return json.loads(query_result_request.text)
+def get_async_result(task_id):
+    while True:
+        result = requests.post(
+            "https://poc-dashscope.aliyuncs.com/api/v1/tasks/%s" %task_id,
+            headers={
+                "Authorization": "Bearer {}".format(dashscope_api_key),
+            }
+        )
+        result = json.loads(result.text)
+        if "output" in result and result["output"]["task_status"] in ["SUCCEEDED", "FAILED"]:
+            break
+        time.sleep(1)
+    return result
+def save_video_cv2(vid, resize_video_input, resize_h, resize_w, fps):
+    fourcc = cv2.VideoWriter_fourcc(*'XVID')
+    out = cv2.VideoWriter(resize_video_input, fourcc, fps, (resize_w, resize_h))
+    for idx in range(len(vid)):
+        frame = vid[idx].asnumpy()[:,:,::-1]
+        frame = cv2.resize(frame,(resize_w, resize_h))
+        out.write(frame)
+    out.release()
+def detect_human(video_input):
+    # print(video_input)
+    video_input_basename = os.path.basename(video_input)
+    resize_video_input = os.path.join(os.path.dirname(video_input), video_input_basename.split(".")[0]+"_resize."+video_input_basename.split(".")[-1])
+    vid = decord.VideoReader(video_input)
+    fps = vid.get_avg_fps()
+    H, W, C = vid[0].shape
+    if H > 1280 or W > 1280:
+        if H > W:
+            resize_h, resize_w = 1280, int(W*1280/H)
+        else:
+            resize_h, resize_w = int(H*1280/W), 1280
+        save_video_cv2(vid, resize_video_input, resize_h, resize_w, fps)
+        new_video_input = resize_video_input
+    else:
+        # resize_h, resize_w = H, W
+        new_video_input = video_input
+    video_url = get_url(new_video_input)
+    detect_result = online_detect(video_url)
+    check_result = "output" in detect_result
+    select_frame_index = detect_result["output"]["frame_index"]
+    boxes = detect_result["output"]["bbox"][:3]
+    print("Detected %d characters" %len(boxes))
+    cap = cv2.VideoCapture(new_video_input)
+    cap.set(cv2.CAP_PROP_POS_FRAMES, select_frame_index)
+    _, box_image = cap.read()
+    box_image = cv2.cvtColor(box_image, cv2.COLOR_BGR2RGB)
+    width, height = box_image.shape[1], box_image.shape[0]
+    for i, box in enumerate(boxes):
+        box = [
+            (box[0] - box[2] / 2) * width, (box[1] - box[3] / 2) * height,
+            (box[0] + box[2] / 2) * width, (box[1] + box[3] / 2) * height]
+        # box_image = cv2.rectangle(box_image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
+        if i == 0:
+            box_image = cv2.rectangle(box_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 2)
+        if i == 1:
+            box_image = cv2.rectangle(box_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 2)
+        if i == 2:
+            box_image = cv2.rectangle(box_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)
+    # check_result, select_frame_index, box, box_image, _ = object_detector.getGroundingInfo(video_input)
+    video_state = {
+        "check_result": check_result,
+        "select_frame_index": select_frame_index,
+        "box": boxes,
+        "replace_ids": [],
+        "image_to_3d_tasks": {},
+        "video_url": video_url,
+        "video_path": new_video_input
+    }
+    return video_state, box_image, gr.update(visible=True), gr.update(visible=False)
+def predict(video_state, first_image, second_image, third_image):
+    if len(video_state["box"]) == 0:
+        return None, "No human detected, please use a video with clear human"
+    print("images:", first_image, second_image, third_image)
+    tasks = []
+    boxes = []
+    if first_image is not None and len(video_state["box"]) >= 1:
+        tasks.append(image_to_3d(first_image))
+        boxes.append(video_state["box"][0])
+    if second_image is not None and len(video_state["box"]) >= 2:
+        tasks.append(image_to_3d(second_image))
+        boxes.append(video_state["box"][1])
+    if third_image is not None and len(video_state["box"]) >= 3:
+        tasks.append(image_to_3d(third_image))
+        boxes.append(video_state["box"][2])
+    if len(tasks) == 0:
+        return None, "Please upload at least one character photo for replacement."
+    ids = []
+    for t in tasks:
+        try:
+            image_to_3d_result = get_async_result(t)
+            print("image to 3d finished", image_to_3d_result)
+            ids.append(image_to_3d_result["output"]["ply_url"])
+        except Exception as e:
+            print(e)
+            return None, "Error in 3d model generation, please check the uploaded image"
+    if (video_state["check_result"]):
+        try:
+            taskid = online_render(video_state["video_url"], video_state["select_frame_index"], boxes, ids, None)["output"]["task_id"]
+            task_output = get_async_result(taskid)
+            print("Video synthesis completed, api returned: " + json.dumps(task_output))
+            video_url = task_output["output"]["synthesis_video_url"]
+            return video_url, "Processing Success"
+        except Exception as e:
+            print(e)
+            return None, "Error in video synthesis, please change the material and try again"
+    else:
+        return None, "Error in human detection, please use a video with clear human"
+def online_img_to_3d(img_url):
+    url = "https://poc-dashscope.aliyuncs.com/api/v1/services/async-default/async-default/async-default"
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": "Bearer {}".format(dashscope_api_key),
+        "X-DashScope-Async": "enable"
+    }
+    data = {
+        # "model": "pre-Human3DGS",
+        "model": "pre-image-to-3d-gradio",
+        # "model": "pre-motionshop-render-h20-test",
+        "input": {
+            "image_url": img_url,
+        },
+        "parameters": {
+        }
+    }
+    query_result_request = requests.post(
+        url,
+        json=data,
+        headers=headers
+    )
+    print("Call image to 3d api, params: " + json.dumps(data))
+    return json.loads(query_result_request.text)
+def image_to_3d(image_path):
+    url = get_url(image_path)
+    task_send_result = online_img_to_3d(url)
+    image_to_3d_task_id = task_send_result["output"]["task_id"]
+    return image_to_3d_task_id
+def gradio_demo():
+    with gr.Blocks() as iface:
+        """
+            state for
+        """
+        video_state = gr.State(
+            {
+                "check_result": False,
+                "select_frame_index": 0,
+                "box": [],
+                "replace_ids": [],
+                "image_to_3d_tasks": {},
+                "video_url": "",
+                "video_path": ""
+            }
+        )
+        gr.HTML(
+            """
+            <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+            <div>
+                <h1 >Motionshop2</h1>
+                <div style="display: flex; justify-content: center; align-items: center; text-align: center; margin: 20px; gap: 10px;">
+                    <a class="flex-item" href="https://aigc3d.github.io/motionshop-2" target="_blank">
+                        <img src="https://img.shields.io/badge/Project_Page-Motionshop2-green.svg" alt="Project Page">
+                    </a>
+                    <a class="flex-item" href="https://lingtengqiu.github.io/LHM/" target="_blank">
+                        <img src="https://img.shields.io/badge/Project_Page-LHM-green.svg" alt="Project Page">
+                    </a>
+                    <a class="flex-item" href="https://github.com/aigc3d/LHM" target="_blank">
+                        <img src="https://img.shields.io/badge/Github-LHM-blue.svg" alt="GitHub Code">
+                    </a>
+                    <a class="flex-item" href="https://arxiv.org/abs/2503.10625" target="_blank">
+                        <img src="https://img.shields.io/badge/Paper-LHM-darkred.svg" alt="arXiv Paper">
+                    </a>
+                </div>
+            </div>
+            </div>
+            """
+        )
+        gr.Markdown("""<h4 style="color: green;"> 1. Choose or upload a video (duration<=15s, resolution<=720p)</h4>""")
+        with gr.Row():
+            with gr.Column():
+                gr.HTML("""
+                <style>
+                #input_video video, #output_video video {
+                    height: 480px !important;
+                    object-fit: contain;
+                }
+                #template_frame img {
+                    height: 480px !important;
+                    object-fit: contain;
+                }
+                </style>
+                """)
+                video_input = gr.Video(elem_id="input_video")
+                template_frame = gr.Image(type="pil",interactive=True, elem_id="template_frame", visible=False)
+                Examples(
+                    fn=detect_human,
+                    examples=sorted([
+                        os.path.join("files", "example_videos", name)
+                        for name in os.listdir(os.path.join("files", "example_videos"))
+                    ]),
+                    run_on_click=True,
+                    inputs=[video_input],
+                    outputs=[video_state, template_frame, template_frame, video_input],
+                    directory_name="examples_videos",
+                    cache_examples=False,
+                )
+        gr.Markdown("""<h4 style="color: green;"> 2.Choose or upload images to replace </h4>""")
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("Replace the character in the red box with...")
+                with gr.Row():
+                    first_image = gr.Image(type="filepath",interactive=True, elem_id="first_image", visible=True, height=480, width=270)
+                    first_example = gr.Examples(
+                        examples=sorted([os.path.join("files", "example_images", name) for name in os.listdir(os.path.join("files", "example_images"))]),
+                        inputs=[first_image],
+                        examples_per_page=6
+                    )
+            with gr.Column():
+                gr.Markdown("Replace the character in the green box with...")
+                with gr.Row():
+                    second_image = gr.Image(type="filepath",interactive=True, elem_id="second_image", visible=True, height=480, width=270)
+                    second_example = gr.Examples(
+                        examples=sorted([os.path.join("files", "example_images", name) for name in os.listdir(os.path.join("files", "example_images"))]),
+                        inputs=[second_image],
+                        examples_per_page=6
+                    )
+            with gr.Column():
+                gr.Markdown("Replace the character in the blue box with...")
+                with gr.Row():
+                    third_image = gr.Image(type="filepath",interactive=True, elem_id="third_image", visible=True, height=480, width=270)
+                    third_example = gr.Examples(
+                        examples=sorted([os.path.join("files", "example_images", name) for name in os.listdir(os.path.join("files", "example_images"))]),
+                        inputs=[third_image],
+                        examples_per_page=6
+                    )
+        gr.Markdown("""<h4 style="color: green;"> 3.Click Start, each generation may takes 5 minutes. </h4>""")
+        with gr.Row():
+            with gr.Column():
+                motion_shop_predict_button = gr.Button(value="Start", variant="primary")
+                video_output = gr.Video(elem_id="output_video")
+                error_message = gr.Textbox(label="Processing Status", visible=True, interactive=False)
+        video_input.upload(
+            fn=detect_human,
+            inputs=[
+                video_input
+            ],
+            outputs=[video_state, template_frame, template_frame, video_input],
+        )
+        motion_shop_predict_button.click(
+            fn=predict,
+            inputs=[video_state, first_image, second_image, third_image],
+            outputs=[video_output, error_message]
+        )
+        # clear input
+        template_frame.clear(
+            lambda: (
+            {
+                "check_result": False,
+                "select_frame_index": 0,
+                "box": [],
+                "replace_ids": [],
+                "image_to_3d_tasks": {},
+                "video_url": "",
+                "video_path": ""
+            },
+            None,
+            None,
+            None,
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(value=None),
+            gr.update(value=None),
+            gr.update(value=None),
+            gr.update(value="")
+            ),
+            [],
+            [
+                video_state,
+                video_output,
+                template_frame,
+                video_input,
+                video_input,
+                template_frame,
+                first_image,
+                second_image,
+                third_image,
+                error_message
+            ],
+            queue=False,
+            show_progress=False)
+        # print("username:", uuid_output_field)
+        # set example
+        # gr.Markdown("##  Examples")
+        # gr.Examples(
+        #     examples=[os.path.join(os.path.dirname(__file__), "./test_sample/", test_sample) for test_sample in ["test-sample8.mp4","test-sample4.mp4", \
+        #                                                                                                          "test-sample2.mp4","test-sample13.mp4"]],
+        #     fn=run_example,
+        #     inputs=[
+        #  e.s       video_input
+        #     ],
+        #     outputs=[video_input],
+        #     # cache_examples=True,
+        # )
+    iface.queue(default_concurrency_limit=200)
+    iface.launch(debug=False, max_threads=10, server_name="0.0.0.0")
+if __name__=="__main__":
+    gradio_demo()
+# iface.launch(debug=True, enable_queue=True)

files/example_images/ai_woman1.jpg ADDED Viewed

Git LFS Details

SHA256: 42e1bdeeafe337427fa9a952d5f5ed6a1b5dd5c7ecd5e4d7b7aa52029639bab2
Pointer size: 131 Bytes
Size of remote file: 337 kB

files/example_images/ai_woman2.jpg ADDED Viewed

Git LFS Details

SHA256: 2d94a62844c95eddef4f34f9915ae90af04619df4bf9f287774bc5317f7ae800
Pointer size: 131 Bytes
Size of remote file: 925 kB

files/example_images/anime.jpg ADDED Viewed

Git LFS Details

SHA256: 07b1102db133110e12556a7697fa3d40e15c36ebc86ae4745e46560fefc34539
Pointer size: 131 Bytes
Size of remote file: 504 kB

files/example_images/anime2.jpg ADDED Viewed

Git LFS Details

SHA256: 35f92682908a5681a8eb16ea3c1c443f7da7f649e0183e89f9ff43ba89a084a9
Pointer size: 130 Bytes
Size of remote file: 77.9 kB

files/example_images/basket.jpg ADDED Viewed

Git LFS Details

SHA256: 45cebed3ac4aa1dcc74395eb2f19159f5df2c8e3d11e2dc749eeea7a68b7958c
Pointer size: 132 Bytes
Size of remote file: 1.35 MB

files/example_images/eins.jpg ADDED Viewed

Git LFS Details

SHA256: 0fea73d77b89b8b8c1a6698669fc3270bcda3bcf6f74a19f6230e80a3597ae15
Pointer size: 131 Bytes
Size of remote file: 253 kB

files/example_images/girl1.jpg ADDED Viewed

Git LFS Details

SHA256: b523c75156cebfd91b70b309faf7cc07b4a8f4cf9fc63c9bbeb481a7e858af9f
Pointer size: 131 Bytes
Size of remote file: 225 kB

files/example_images/joker.jpg ADDED Viewed

Git LFS Details

SHA256: 0f81508988c7d8e5c4467c4e23162fd9df21859c6ef04d6105c86d9f9e88f23b
Pointer size: 131 Bytes
Size of remote file: 410 kB

files/example_images/robot.jpg ADDED Viewed

Git LFS Details

SHA256: 19d4aeeeb6a2e0b6383cfe332c92c5a3dbbd119a715facbecf539de13411f295
Pointer size: 131 Bytes
Size of remote file: 245 kB

files/example_images/zz.jpg ADDED Viewed

Git LFS Details

SHA256: f73d2accf3bb65f30d89c38b1ee0a0dd50a6bb3a68d74117541e870efabcd40b
Pointer size: 130 Bytes
Size of remote file: 19.7 kB

files/example_videos/cai.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31bdf441813a78b4f0f259044ad6726d9dbeb608fb319c86a705799f5c750baf
+size 1080169

files/example_videos/girl.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89c05e6d68eebf7c87b282f4bc8bbef5b35069753c437f6c83a388d3e961f05d
+size 2137986

files/example_videos/lee.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4775fafc1dc2da3c4cd65df764f3dae43b95f8c5ff1ab3c30faea0070108e53
+size 617387

files/example_videos/ma.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e31c0cc0897859cb6ea4c226287005b6e424511998a3a3549562afacabacbb5
+size 1423057

files/example_videos/mimo1_origin.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83d140279bfca2e649a7c84141361f88b9d2c899d66ca7e33f917692cf21c381
+size 577767

files/example_videos/mimo2_origin.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aecea22a348a584d2240f9565019ad36bcf959101456251e8990e48ca604b09e
+size 440988

files/example_videos/play_basketball.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a3c99291456ea39efa1adbb4a86bc119a6f9def86c6b2d638a76e52c8247fc9
+size 545834

files/example_videos/wushu.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1665e613af247ff59ee53250ab2bca981993f6250e21b3210539d2f32f57eee7
+size 2319594

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio==5.1.0
+opencv-python
+ffmpeg
+decord
+numpy
+oss2
+wget