Matrix Game Image to Action

Custom block that generates action inputs for Matrix Game interactive world model.

How to use

import torch
from diffusers import ModularPipelineBlocks
from diffusers.utils import export_to_video, load_image
from diffusers.modular_pipelines import WanModularPipeline

class MatrixGameWanModularPipeline(WanModularPipeline):
    """
    A ModularPipeline for MatrixGameWan.

    <Tip warning={true}>

        This is an experimental feature and is likely to change in the future.

    </Tip>
    """

    @property
    def default_sample_height(self):
        return 44

    @property
    def default_sample_width(self):
        return 80


blocks = ModularPipelineBlocks.from_pretrained("diffusers/matrix-game-2-modular", trust_remote_code=True)
image_to_action_block = ModularPipelineBlocks.from_pretrained("dn6/matrix-game-image-to-action", trust_remote_code=True)

blocks.sub_blocks.insert("image_to_action", image_to_action_block, 0)

pipe = MatrixGameWanModularPipeline(blocks, "diffusers-internal-dev/matrix-game-2-modular")
pipe.load_components(trust_remote_code=True, device_map="cuda", torch_dtype={"default": torch.bfloat16, "vae": torch.float32})

image = load_image("https://images.unsplash.com/photo-1730652201845-095193ddb555?q=80&w=1322&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D")
prompt = "Turn around and look at what's behind you"

output = pipe(image=image, num_frames=141, prompt=prompt, num_inference_steps=8)
export_to_video(output.values['videos'][0], "matrix-game-vlm.mp4")

Downloads last month: -; Downloads are not tracked for this model. How to track

Inference Providers NEW

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

dn6
/

matrix-game-image-to-action

Matrix Game Image to Action

How to use

Space using dn6/matrix-game-image-to-action 1