File size: 1,269 Bytes
03e05c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d0d952
 
 
03e05c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
{
    "sil_token": 69,
    "bos_token": 71,
    "eos_token": 72,
    "end_pad": 5,
    "num_codebooks": 12,
    "num_phones_per_frame": 2,
    "audio_delay_frames": 1,
    "temperature": 0.9,
    "topk": 5,
    "max_audio_length_ms": 60000,
    "model_repo": "herimor/voxtream",
    "model_name": "model.safetensors",
    "model_config_name": "config.json",
    "mimi_sr": 24000,
    "mimi_vocab_size": 2048,
    "mimi_frame_ms": 80,
    "mimi_repo": "kyutai/moshiko-pytorch-bf16",
    "mimi_name": "tokenizer-e351c8d8-checkpoint125.safetensors",
    "spk_enc_sr": 16000,
    "spk_enc_repo": "IDRnD/ReDimNet",
    "spk_enc_model": "ReDimNet",
    "spk_enc_model_name": "M",
    "spk_enc_train_type": "ft_mix",
    "spk_enc_dataset": "vb2+vox2+cnc",
    "phoneme_dict_name": "phoneme_to_token.json",
    "nltk_resource": "taggers/averaged_perceptron_tagger_eng",
    "aligner": "charsiu/en_w2v2_fc_10ms",
    "max_prompt_sec": 10,
    "max_prompt_chars": 250,
    "max_phone_tokens": 1000,
    "cache_prompt": false,
    "phoneme_index_map": {
        "0": [
            0,
            1
        ],
        "1": [
            0,
            2
        ],
        "2": [
            1,
            1
        ],
        "3": [
            1,
            2
        ]
    }
}