torch numpy # Audio processing librosa soundfile # Model s3prl pyannote.audio funasr transformers # WER jiwer==3.1.0 # Normalization zhconv zhon