import gradio as gr import json import os import random import time from datetime import datetime from huggingface_hub import Repository import subprocess # Set Git user information subprocess.run(["git", "config", "--global", "user.email", "yuqingll@umich.edu"]) subprocess.run(["git", "config", "--global", "user.name", "yuqingluo0509"]) hf_token = os.getenv("HF_TOKEN") print("HF Token is none?", hf_token is None) # Define questions for both sets # audio_image_questions = ["Ignore the image quality, which image best matches the audio?"] # image_audio_questions = ["Ignore the audio quality, which audio best matches the image?"] question = ["Which one sounds more realistic"] # Load audio-image pairs and image-audio pairs with random sampling def load_random_pairs(): random.seed(time.time()) video_pairs = [] video_mappings = {} gt_videos = random.sample(os.listdir("./gt/"), k=3) # k=num_sample groups = [gt_videos[i::4] for i in range(4)] sarf_dirs = [ "./sarf_no_cavp/", "./sarf_no_cavp_no_clip/", "./sarf_no_cavp_no_hand/", "./sarf_no_cavp_single_view/" ] for group, sarf_dir in zip(groups, sarf_dirs): for name in group: fn = name sarf_video_path = f"{sarf_dir}{fn}" pair = (f"./gt/{fn}", sarf_video_path) sarf_dir_name = sarf_dir.split('/')[1] mapping = ("gt", sarf_dir_name) if random.random() > 0.5: pair = (pair[1], pair[0]) mapping = (sarf_dir_name, "gt") video_pairs.append(pair) video_mappings[fn] = {} # fn=gt_path video_mappings[fn]["Video 1"] = mapping[0] video_mappings[fn]["Video 2"] = mapping[1] random.shuffle(video_pairs) print("load pairs after shuffle:") print(video_pairs) print("load mapping:") print(video_mappings) return video_pairs, video_mappings # Initialize the Hugging Face repository repo_url = "https://huggingface.co/datasets/yuqingluo0509/sound_generation_response" repo = Repository(local_dir="user_responses", clone_from=repo_url, use_auth_token=hf_token) def save_responses(*responses): global video_mappings global video_pairs session_id = f"session_{int(time.time())}" data = { "user_id": session_id, "timestamp": datetime.now().isoformat(), "video_responses": [] } for i, (video1_path, video2_path) in enumerate(video_pairs): gt_path = "" if "gt" in video1_path: gt_path = video1_path else: gt_path = video2_path gt_name = gt_path.split('/')[-1] selected_video = responses[i] original_video_index = video_mappings[gt_name][selected_video] data["video_responses"].append({ # "video_1": os.path.basename(video1_path), # "video_2": os.path.basename(video2_path), "video_1": video1_path, "video_2": video2_path, "selected_video": f"{original_video_index}" }) print("save response:") print(data) response_file = f"user_responses/{session_id}_responses.json" os.makedirs("user_responses", exist_ok=True) with open(response_file, "w") as f: json.dump(data, f, indent=4) repo.git_pull() repo.push_to_hub() return "All responses saved! Thank you." def load_and_update(): global video_mappings global video_pairs video_pairs, video_mappings = load_random_pairs() flat_output = [] for video1, video2 in video_pairs: flat_output.extend([video1, video2]) return flat_output def create_interface(): with gr.Blocks() as demo: gr.Markdown("## Sound Generation User Study") gr.Markdown("Below are 32 pairs of videos with virtual hand interacting with the scene, making different sounds.") gr.Markdown("Please listen to each pair of videos and select which one sounds more realistic.") gr.Markdown("* It may take some time to load all the videos. We are appreciated for your time and patience!") video_components = [] video_responses = [] for idx in range(3): # k=num_samples gr.Markdown(f"### Pair {idx + 1}") with gr.Row(): video1 = gr.Video(label="Video 1") video2 = gr.Video(label="Video 2") radio = gr.Radio(["Video 1", "Video 2"], label="Which one sounds more realistic?") gr.Markdown("---") video_components.extend([video1, video2]) video_responses.append(radio) submit_btn = gr.Button("Submit All") result_message = gr.Textbox(label="Message", interactive=False) def validate_and_save(*responses): if any(response is None for response in responses): return "Please answer all questions before submitting." return save_responses(*responses) demo.load( fn=load_and_update, inputs=None, outputs=video_components[:6] # Displaying two videos for each pair ) submit_btn.click( fn=validate_and_save, inputs=video_responses, outputs=result_message ) return demo demo = create_interface() demo.launch(share=True)