Spaces:

Vgjkmhf
/

cctvoi

Runtime error

App Files Files Community

Vgjkmhf commited on 11 days ago

Commit

f3dd9e4

verified ·

1 Parent(s): d0c0da1

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -61

app.py CHANGED Viewed

@@ -1,100 +1,166 @@
 import os
-import imageio_ffmpeg
 import inspect
 import gradio as gr
-from rvc_python.infer import RVCInference
-import static_ffmpeg
-# تنظیمات سیستمی و FFmpeg
-static_ffmpeg.add_paths()
-ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
-os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path)
-def rvc_process(audio_path, model_file, index_file, pitch_change, f0_method, index_rate, protect_val, filter_radius):
     if not audio_path or not model_file:
-        return None, "⚠️ فایل‌ها کامل نیستند."
     try:
-        model_path = model_file.name
-        index_path = index_file.name if index_file else None
-        print(f"Processing Model: {model_path}")
         rvc = RVCInference(device="cpu")
-        rvc.load_model(model_path)
-        out_path = "/tmp/output_persian_fix.wav"
-        if os.path.exists(out_path): os.remove(out_path)
-        # کشف پارامترهای تابع (برای سازگاری با نسخه‌های مختلف)
         sig = inspect.signature(rvc.infer_file)
         params = sig.parameters
-        kwargs = {
-            "input_path": audio_path,
-            "output_path": out_path,
-        }
-        # 1. پیچ (Pitch)
         if "pitch" in params: kwargs["pitch"] = int(pitch_change)
         elif "f0_up_key" in params: kwargs["f0_up_key"] = int(pitch_change)
-        # 2. متد (Method) - حتما RMVPE
         if "method" in params: kwargs["method"] = f0_method
         elif "f0_method" in params: kwargs["f0_method"] = f0_method
-        # 3. ایندکس (Index)
-        if "index_path" in params: kwargs["index_path"] = index_path
         if "index_rate" in params: kwargs["index_rate"] = float(index_rate)
-        # 4. محافظت (Protect) - حیاتی برای فارسی
         if "protect" in params: kwargs["protect"] = float(protect_val)
-        # 5. فیلتر (Filter Radius) - برای رفع لرزش صدا
         if "filter_radius" in params: kwargs["filter_radius"] = int(filter_radius)
-        # 6. دقت زمانی (Hop Length) - برای کیفیت بهتر
-        if "hop_length" in params: kwargs["hop_length"] = 64  # دقت بالاتر (پیشفرض 128 است)
-        print(f"Running with: {kwargs}")
         rvc.infer_file(**kwargs)
-        return out_path, "✅ تبدیل انجام شد (تنظیمات فارسی اعمال شد)"
     except Exception as e:
-        return None, f"❌ خطا: {str(e)}"
-# رابط کاربری پیشرفته فارسی
-with gr.Blocks(title="Persian RVC", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🇮🇷 مبدل صدای RVC (بهینه برای فارسی)")
-    gr.Markdown("این نسخه برای رفع مشکل خش‌دار بودن صدا و تداخل لهجه تنظیم شده است.")
     with gr.Row():
         with gr.Column():
-            gr.Markdown("### 1. فایل‌های ورودی")
-            audio_in = gr.Audio(label="صدای ورودی (واضح و بدون نویز)", type="filepath")
-            model_in = gr.File(label="فایل مدل (.pth)", file_types=[".pth"])
-            index_in = gr.File(label="فایل ایندکس (.index)", file_types=[".index"])
         with gr.Column():
-            gr.Markdown("### 2. تنظیمات حیاتی")
-            pitch = gr.Slider(-12, 12, value=0, step=1, label="تغییر گام (Pitch)", info="مرد به زن: +12 | زن به مرد: -12")
-            method = gr.Dropdown(["rmvpe", "pm"], value="rmvpe", label="الگوریتم", info="فقط RMVPE کیفیت مناسب دارد.")
-            gr.Markdown("### 3. تنظیمات رفع خش و تداخل")
-            index_rate = gr.Slider(0, 1, value=0.3, step=0.05, label="شدت ایندکس (Index Rate)", info="پیشنهاد فارسی: 0.3 تا 0.4 (کمتر = تداخل کمتر)")
-            protect = gr.Slider(0, 0.5, value=0.5, step=0.01, label="محافظت (Protect)", info="پیشنهاد فارسی: 0.5 (حداکثر) برای جلوگیری از خش‌دار شدن")
-            filter_radius = gr.Slider(0, 7, value=3, step=1, label="فیلتر نرم‌کننده (Filter Radius)", info="برای حذف لرزش‌های اضافه (پیشنهاد: 3)")
-            btn = gr.Button("✨ شروع پردازش", variant="primary")
-    with gr.Row():
-        audio_out = gr.Audio(label="خروجی نهایی")
-        status = gr.Textbox(label="پیام سیستم")
     btn.click(
-        rvc_process,
-        [audio_in, model_in, index_in, pitch, method, index_rate, protect, filter_radius],
         [audio_out, status]
     )

 import os
+import sys
+import time
 import inspect
+import numpy as np
+import soundfile as sf
+import librosa
 import gradio as gr
+from scipy.signal import butter, lfilter
+# ==========================================
+# 1. تنظیمات اولیه
+# ==========================================
+try:
+    import imageio_ffmpeg
+    import static_ffmpeg
+    from rvc_python.infer import RVCInference
+    static_ffmpeg.add_paths()
+    ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
+    os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path)
+    print("✅ سیستم آماده است.")
+except ImportError as e:
+    print(f"❌ خطای ایمپورت: {e}")
+    sys.exit(1)
+TEMP_DIR = "/tmp/rvc_studio"
+os.makedirs(TEMP_DIR, exist_ok=True)
+# ==========================================
+# 2. توابع پردازش فرکانس (برای رفع گنگی صدا)
+# ==========================================
+def anti_muffle_filter(y, sr):
+    """فیلتر برای کاهش فرکانس‌های گنگ و تو دماغی (400-800Hz)"""
+    try:
+        from scipy.signal import iirpeak
+        # کاهش 3 دسی‌بل در فرکانس 600Hz
+        b, a = iirpeak(600, Q=1.5, fs=sr, ftype='notch')
+        return lfilter(b, a, y)
+    except:
+        return y
+def clarity_boost(y, sr):
+    """تقویت فرکانس‌های بالا برای شفافیت بیشتر"""
+    try:
+        # افزایش 2 دسی‌بل در فرکانس 8000Hz
+        from scipy.signal import iirpeak
+        b, a = iirpeak(8000, Q=1.0, fs=sr, ftype='peak')
+        return lfilter(b, a, y)
+    except:
+        return y
+def preprocess_audio(input_path):
+    """پیش‌پردازش صدا: نرمال‌سازی و فیلتر ضد گنگ"""
+    y, sr = librosa.load(input_path, sr=None, mono=True)
+    y = librosa.util.normalize(y) * 0.95
+    y = anti_muffle_filter(y, sr)
+    processed_path = os.path.join(TEMP_DIR, "preprocessed.wav")
+    sf.write(processed_path, y, sr)
+    return processed_path, f"✅ پیش‌پردازش انجام شد (SR: {sr}Hz, Anti-Muffle)"
+def postprocess_audio(input_path):
+    """پس‌پردازش: تقویت شفافیت"""
+    y, sr = librosa.load(input_path, sr=None, mono=True)
+    y = clarity_boost(y, sr)
+    post_path = os.path.join(TEMP_DIR, "postprocessed.wav")
+    sf.write(post_path, y, sr)
+    return post_path, "✅ پس‌پردازش انجام شد (Clarity Boost)"
+# ==========================================
+# 3. موتور اصلی تبدیل
+# ==========================================
+def rvc_process_pipeline(
+    audio_path, model_file, index_file, pitch_change, f0_method,
+    index_rate, protect_val, filter_radius, resample_sr, envelope_mix, hop_length
+):
     if not audio_path or not model_file:
+        return None, "❌ ورودی‌ها کامل نیست."
     try:
+        # پیش‌پردازش
+        clean_audio, log1 = preprocess_audio(audio_path)
+        # تبدیل RVC
         rvc = RVCInference(device="cpu")
+        rvc.load_model(model_file.name)
+        rvc_out_path = os.path.join(TEMP_DIR, "rvc_output.wav")
         sig = inspect.signature(rvc.infer_file)
         params = sig.parameters
+        kwargs = {"input_path": clean_audio, "output_path": rvc_out_path}
         if "pitch" in params: kwargs["pitch"] = int(pitch_change)
         elif "f0_up_key" in params: kwargs["f0_up_key"] = int(pitch_change)
         if "method" in params: kwargs["method"] = f0_method
         elif "f0_method" in params: kwargs["f0_method"] = f0_method
+        if "index_path" in params and index_file: kwargs["index_path"] = index_file.name
         if "index_rate" in params: kwargs["index_rate"] = float(index_rate)
         if "protect" in params: kwargs["protect"] = float(protect_val)
         if "filter_radius" in params: kwargs["filter_radius"] = int(filter_radius)
+        if "resample_sr" in params: kwargs["resample_sr"] = int(resample_sr)
+        if "rms_mix_rate" in params: kwargs["rms_mix_rate"] = float(envelope_mix)
+        if "hop_length" in params: kwargs["hop_length"] = int(hop_length)
         rvc.infer_file(**kwargs)
+        log2 = "✅ تبدیل RVC انجام شد."
+        # پس‌پردازش
+        final_output, log3 = postprocess_audio(rvc_out_path)
+        return final_output, f"{log1}
+{log2}
+{log3}"
     except Exception as e:
+        return None, f"❌ خطا: {traceback.format_exc()}"
+# ==========================================
+# 4. رابط کاربری (UI)
+# ==========================================
+with gr.Blocks(title="RVC Studio Pro", theme=gr.themes.Monochrome()) as demo:
+    gr.Markdown("# 🎙️ RVC Studio Pro - Edition")
     with gr.Row():
+        # ستون ورودی
         with gr.Column():
+            audio_in = gr.Audio(label="صدای ورودی", type="filepath")
+            with gr.Row():
+                model_in = gr.File(label="فایل مدل (.pth)", file_types=[".pth"])
+                index_in = gr.File(label="فایل ایندکس (.index)", file_types=[".index"])
+            btn = gr.Button("✨ شروع پردازش", variant="primary")
+        # ستون خروجی
         with gr.Column():
+            audio_out = gr.Audio(label="صدای نهایی")
+            status = gr.Textbox(label="گزارش عملیات", lines=4)
+    # تب‌های تنظیمات
+    with gr.Tabs():
+        with gr.TabItem("تنظیمات اصلی"):
+            pitch = gr.Slider(-24, 24, value=0, step=1, label="Pitch (تغییر گام)")
+            method = gr.Dropdown(
+                ["rmvpe", "harvest", "crepe", "pm"],
+                value="harvest",
+                label="الگوریتم",
+                info="برای فارسی Harvest یا RMVPE بهترین هستند"
+            )
+        with gr.TabItem("تنظیمات کیفیت (رفع خش و تداخل)"):
+            index_rate = gr.Slider(0, 1, value=0.3, label="Index Rate (شدت لهجه مدل)")
+            protect = gr.Slider(0, 0.5, value=0.4, label="Protect (محافظت از حروف بی‌صدا)")
+            filter_radius = gr.Slider(3, 7, value=3, label="Filter Radius (نرم‌کننده لرزش)")
+        with gr.TabItem("تنظیمات فرکانس (رفع گنگی صدا)"):
+            hop_length = gr.Slider(32, 256, value=64, step=32, label="Hop Length (دقت زمانی)")
+            envelope_mix = gr.Slider(0, 1, value=0.2, label="Volume Envelope Mix (میکس حجم صدا)")
+            resample_sr = gr.Slider(0, 48000, value=40000, step=8000, label="Resample SR (قفل فرکانس)")
     btn.click(
+        rvc_process_pipeline,
+        [audio_in, model_in, index_in, pitch, method, index_rate, protect, filter_radius, resample_sr, envelope_mix, hop_length],
         [audio_out, status]
     )