import gradio as gr
import json
import os
import random
import time
from datetime import datetime
from huggingface_hub import Repository
import subprocess

# Set Git user information
subprocess.run(["git", "config", "--global", "user.email", "yuqingll@umich.edu"])
subprocess.run(["git", "config", "--global", "user.name", "yuqingluo0509"])

hf_token = os.getenv("HF_TOKEN")
print("HF Token is none?", hf_token is None)

# Define questions for both sets
# audio_image_questions = ["Ignore the image quality, which image best matches the audio?"]
# image_audio_questions = ["Ignore the audio quality, which audio best matches the image?"]
question = ["Which one sounds more realistic"]

# Load audio-image pairs and image-audio pairs with random sampling
def load_random_pairs():
    random.seed(time.time())

    video_pairs = []
    video_mappings = {}

    gt_videos = random.sample(os.listdir("./gt/"), k=3) # k=num_sample
    groups = [gt_videos[i::4] for i in range(4)]  
    sarf_dirs = [
        "./sarf_no_cavp/",
        "./sarf_no_cavp_no_clip/",
        "./sarf_no_cavp_no_hand/",
        "./sarf_no_cavp_single_view/"
    ]

    for group, sarf_dir in zip(groups, sarf_dirs):
        for name in group:
            fn = name
            sarf_video_path = f"{sarf_dir}{fn}"
            pair = (f"./gt/{fn}", sarf_video_path)
            sarf_dir_name = sarf_dir.split('/')[1]
            mapping = ("gt", sarf_dir_name)

            if random.random() > 0.5:
                pair = (pair[1], pair[0])  
                mapping = (sarf_dir_name, "gt") 

            video_pairs.append(pair)
            video_mappings[fn] = {} # fn=gt_path
            video_mappings[fn]["Video 1"] = mapping[0]
            video_mappings[fn]["Video 2"] = mapping[1]

    random.shuffle(video_pairs)
    print("load pairs after shuffle:")
    print(video_pairs)
    print("load mapping:")
    print(video_mappings)

    return video_pairs, video_mappings

# Initialize the Hugging Face repository
repo_url = "https://huggingface.co/datasets/yuqingluo0509/sound_generation_response"
repo = Repository(local_dir="user_responses", clone_from=repo_url, use_auth_token=hf_token)

def save_responses(*responses):
    global video_mappings
    global video_pairs
    session_id = f"session_{int(time.time())}"
    data = {
        "user_id": session_id,
        "timestamp": datetime.now().isoformat(),
        "video_responses": []
    }

    for i, (video1_path, video2_path) in enumerate(video_pairs):
        gt_path = ""
        if "gt" in video1_path:
            gt_path = video1_path
        else:
            gt_path = video2_path
        gt_name = gt_path.split('/')[-1]
        selected_video = responses[i]
        original_video_index = video_mappings[gt_name][selected_video]
        data["video_responses"].append({
            # "video_1": os.path.basename(video1_path),
            # "video_2": os.path.basename(video2_path),
            "video_1": video1_path,
            "video_2": video2_path,
            "selected_video": f"{original_video_index}"
        })
    print("save response:")
    print(data)
    response_file = f"user_responses/{session_id}_responses.json"
    os.makedirs("user_responses", exist_ok=True)
    with open(response_file, "w") as f:
        json.dump(data, f, indent=4)
    repo.git_pull()
    repo.push_to_hub()
    return "All responses saved! Thank you."

def load_and_update():
    global video_mappings
    global video_pairs
    video_pairs, video_mappings = load_random_pairs()
    
    flat_output = []
    for video1, video2 in video_pairs:
        flat_output.extend([video1, video2])
    return flat_output

def create_interface():
    with gr.Blocks() as demo:
        gr.Markdown("## Sound Generation User Study")
        gr.Markdown("Below are 32 pairs of videos with virtual hand interacting with the scene, making different sounds.")
        gr.Markdown("Please listen to each pair of videos and select which one sounds more realistic.")
        gr.Markdown("* It may take some time to load all the videos. We are appreciated for your time and patience!")

        video_components = []
        video_responses = []
        for idx in range(3): # k=num_samples
            gr.Markdown(f"### Pair {idx + 1}")
            with gr.Row():
                video1 = gr.Video(label="Video 1")
                video2 = gr.Video(label="Video 2")
                radio = gr.Radio(["Video 1", "Video 2"], label="Which one sounds more realistic?")
            gr.Markdown("---")
            video_components.extend([video1, video2])
            video_responses.append(radio)

        submit_btn = gr.Button("Submit All")
        result_message = gr.Textbox(label="Message", interactive=False)
        
        def validate_and_save(*responses):
            if any(response is None for response in responses):
                return "Please answer all questions before submitting."
            return save_responses(*responses)

        demo.load(
            fn=load_and_update,
            inputs=None,
            outputs=video_components[:6]  # Displaying two videos for each pair
        )

        submit_btn.click(
            fn=validate_and_save,
            inputs=video_responses,
            outputs=result_message
        )

    return demo

demo = create_interface()
demo.launch(share=True)