training:
  optimizer:
    type: adam
    args:
      lr: 1.0e-05
      betas:
      - 0.9
      - 0.999
      eps: 1.0e-08
      weight_decay: 0.0
      amsgrad: false
  scheduler:
    type: step
    step_size: 100000
    gamma: 0.1
  num_epochs: 1000
  continue: true
  checkpoint_every: 2500
  log_every_iters: 100
  n_saved: 4
  stop_iteration: 250000
  from_pretrained: out/post_submission/anycam_baseline_fc2_backend-nccl-2_1311123/training_checkpoint_247500.pt
dataset_cfgs:
  sintel:
    type: sintel
    data_path_training: data/Sintel/training
    data_path_testing: data/Sintel/training
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: data/unimatch_flows/sintel/training
    preprocessed_path_testing: data/unimatch_flows/sintel/training
    return_depth: false
    return_flow: true
  sintel_gt:
    type: sintel-gt
    data_path_training: data/Sintel/training
    data_path_testing: data/Sintel/training
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: data/unimatch_flows/sintel/training
    preprocessed_path_testing: data/unimatch_flows/sintel/training
    return_depth: true
    return_flow: true
  waymo:
    type: waymo
    data_path_training: data/waymo/training
    data_path_testing: data/waymo/testing
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: data/unimatch_flows/waymo/training
    preprocessed_path_testing: data/unimatch_flows/waymo/testing
    return_depth: false
    return_flow: true
  re10k_eval_seqs:
    type: re10k
    data_path_training: ${data_root}/re10k/data/test.pickle
    data_path_testing: ${data_root}/re10k/data/test.pickle
    split: anycam/datasets/realestate10k/splits/eval_seqs
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/re10k/flows_1
    preprocessed_path_testing: ${data_root}/re10k/flows_1
    return_depth: false
    return_flow: true
  re10k_eval_seqs_clean:
    type: re10k
    data_path_training: ${data_root}/re10k/data/test.pickle
    data_path_testing: ${data_root}/re10k/data/test.pickle
    split: anycam/datasets/realestate10k/splits/eval_seqs_clean
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/re10k/flows_1
    preprocessed_path_testing: ${data_root}/re10k/flows_1
    return_depth: false
    return_flow: true
  tumrgbd_eval_seqs:
    type: tumrgbd
    data_path_training: data/TUM_RGBD
    data_path_testing: data/TUM_RGBD
    split: anycam/datasets/tum_rgbd/splits/dynamic_seqs
    image_size: 384
    frame_count: 2
    dilation: 10
    return_depth: false
    return_flow: false
  tumrgbd_eval_seqs_64:
    type: tumrgbd
    data_path_training: data/TUM_RGBD
    data_path_testing: data/TUM_RGBD
    split: anycam/datasets/tum_rgbd/splits/dynamic_seqs_64
    image_size: 384
    frame_count: 2
    dilation: 10
    return_depth: false
    return_flow: false
  waymo_eval_seqs_2_64:
    type: waymo
    data_path_training: data/waymo/training
    data_path_testing: data/waymo/testing
    split: anycam/datasets/waymo/splits/eval_seqs_2_64
    image_size: 384
    frame_count: 2
    preprocessed_path_training: data/unimatch_flows/waymo/training
    preprocessed_path_testing: data/unimatch_flows/waymo/testing
    return_depth: false
    return_flow: true
  re10k:
    type: re10k
    data_path_training: ${data_root}/re10k/data/test.pickle
    data_path_testing: ${data_root}/re10k/data/test.pickle
    split: null
    image_size: 384
    frame_count: 2
    dilation: 1
    preprocessed_path_training: ${data_root}/re10k/flows_1
    preprocessed_path_testing: ${data_root}/re10k/flows_1
    return_depth: false
    return_flow: true
  re10k_n2:
    type: re10k
    data_path_training: ${data_root}/re10k/data/test.pickle
    data_path_testing: ${data_root}/re10k/data/test.pickle
    split: null
    image_size: 384
    frame_count: 2
    dilation: 2
    preprocessed_path_training: ${data_root}/re10k/flows_2
    preprocessed_path_testing: ${data_root}/re10k/flows_2
    return_depth: false
    return_flow: true
  re10k_n3:
    type: re10k
    data_path_training: ${data_root}/re10k/data/test.pickle
    data_path_testing: ${data_root}/re10k/data/test.pickle
    split: null
    image_size: 384
    frame_count: 2
    dilation: 3
    preprocessed_path_training: ${data_root}/re10k/flows_3
    preprocessed_path_testing: ${data_root}/re10k/flows_3
    return_depth: false
    return_flow: true
  youtube_vos:
    type: youtubevos
    data_path_training: ${data_root}/youtubevos/data/train
    data_path_testing: ${data_root}/youtubevos/data/valid
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/youtubevos/flows_1/train
    preprocessed_path_testing: ${data_root}/youtubevos/flows_1/valid
    return_depth: false
    return_flow: false
  youtube_vos_n2:
    type: youtubevos
    data_path_training: ${data_root}/youtubevos/data/train
    data_path_testing: ${data_root}/youtubevos/data/train
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/youtubevos/flows_2
    preprocessed_path_testing: ${data_root}/youtubevos/flows_2
    return_depth: false
    return_flow: false
    dilation: 2
  youtube_vos_n3:
    type: youtubevos
    data_path_training: ${data_root}/youtubevos/data/train
    data_path_testing: ${data_root}/youtubevos/data/train
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/youtubevos/flows_3
    preprocessed_path_testing: ${data_root}/youtubevos/flows_3
    return_depth: false
    return_flow: false
    dilation: 3
  opendv:
    type: opendv
    data_path_training: ${data_root}/opendv/data
    data_path_testing: ${data_root}/opendv/data
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/opendv/flows_1
    preprocessed_path_testing: ${data_root}/opendv/flows_1
    return_depth: false
    return_flow: true
  opendv_n2:
    type: opendv
    data_path_training: ${data_root}/opendv/data
    data_path_testing: ${data_root}/opendv/data
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/opendv/flows_2
    preprocessed_path_testing: ${data_root}/opendv/flows_2
    return_depth: false
    return_flow: true
    dilation: 2
  opendv_n3:
    type: opendv
    data_path_training: ${data_root}/opendv/data
    data_path_testing: ${data_root}/opendv/data
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/opendv/flows_3
    preprocessed_path_testing: ${data_root}/opendv/flows_3
    return_depth: false
    return_flow: true
    dilation: 3
  walkingtours:
    type: walkingtours
    data_path_training: ${data_root}/walkingtours/data
    data_path_testing: ${data_root}/walkingtours/data
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/walkingtours/flows_1
    preprocessed_path_testing: ${data_root}/walkingtours/flows_1
    return_depth: false
    return_flow: true
  walkingtours_n2:
    type: walkingtours
    data_path_training: ${data_root}/walkingtours/data
    data_path_testing: ${data_root}/walkingtours/data
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/walkingtours/flows_2
    preprocessed_path_testing: ${data_root}/walkingtours/flows_2
    return_depth: false
    return_flow: true
    dilation: 2
  walkingtours_n3:
    type: walkingtours
    data_path_training: ${data_root}/walkingtours/data
    data_path_testing: ${data_root}/walkingtours/data
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/walkingtours/flows_3
    preprocessed_path_testing: ${data_root}/walkingtours/flows_3
    return_depth: false
    return_flow: true
    dilation: 3
  epickitchens:
    type: epickitchens
    data_path_training: ${data_root}/epickitchens/data
    data_path_testing: ${data_root}/epickitchens/data
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/epickitchens/flows_1
    preprocessed_path_testing: ${data_root}/epickitchens/flows_1
    return_depth: false
    return_flow: true
  epickitchens_n2:
    type: epickitchens
    data_path_training: ${data_root}/epickitchens/data
    data_path_testing: ${data_root}/epickitchens/data
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/epickitchens/flows_2
    preprocessed_path_testing: ${data_root}/epickitchens/flows_2
    return_depth: false
    return_flow: true
    dilation: 2
  epickitchens_n3:
    type: epickitchens
    data_path_training: ${data_root}/epickitchens/data
    data_path_testing: ${data_root}/epickitchens/data
    split: null
    image_size: 384
    frame_count: 2
    preprocessed_path_training: ${data_root}/epickitchens/flows_3
    preprocessed_path_testing: ${data_root}/epickitchens/flows_3
    return_depth: false
    return_flow: true
    dilation: 3
data_root: ${oc.env:ANYCAM_DATA, data_new}
training_type: anycam_training
seed: 0
backend: nccl
nproc_per_node: 2
with_amp: true
name: anycam_seq8
batch_size: 4
num_workers: 8
dataset:
- re10k
- youtube_vos
- opendv
- walkingtours
- epickitchens
val_dataset:
- re10k_eval_seqs
- re10k_eval_seqs_clean
dataset_params:
  frame_count: 8
  return_flow: true
  image_size: 336
  sequential: true
dataloading:
  epoch_length: 80000
output:
  path: pretrained_models/anycam_seq8
  unique_id: 5051794
loss:
- type: pose_loss
  lambda_dist: 0
  pose_token_weight_decay: 0.01
  lambda_fwd_bwd_consistency: 1
  lambda_label_scale: 100
model:
  depth_predictor:
    type: unidepth
  pose_predictor:
    type: anycam
    focal_parameterization: linlog-candidates
    focal_min: 0.2
    focal_max: 7
    rotation_parameterization: axis-angle
    separate_pose_candidates: true
    separate_uncertainty_candidates: true
    pose_token_partial_dropout: 0.5
  depth_aligner:
    type: identity
  flow_model: unimatch
  use_provided_flow: true
  use_provided_proj: false
  train_directions: both
  perform_subsampled_pose_pass: false
  subsampling_drop_n: 1
  single_focal_warmup_iters: 0
  z_near: 0.1
  z_far: 100
validation:
  validation:
    batch_size: 1
    subset:
      type: range
      args:
        start: 0
        end: 512
    custom_validator: anycam.video_validator.video_validator
    fit_video_config: anycam/configs/eval_cfgs/train_eval.yaml
    log_loss: false
    global_step:
      type: trainer iteration
    events:
    - type: ITERATION_COMPLETED
      args:
        every: 5000
    - type: COMPLETED
      args: null
  visualization:
    metrics: []
    subset:
      type: range
      args:
        start: 0
        end: 1
    visualize:
      input_imgs: null
      depth: null
      occlusions: null
      rendered_flow: null
      gt_flow: null
      predicted_occlusions: null
      uncertainty: null
    log_loss: false
    global_step:
      type: trainer iteration
    events:
    - type: ITERATION_COMPLETED
      args:
        every: 2500
    - type: COMPLETED
      args: null
master_port: 2222
cuda device name: NVIDIA A100-SXM4-40GB