Update app.py
Browse files
app.py
CHANGED
|
@@ -14,7 +14,7 @@ import scipy.signal as signal
|
|
| 14 |
from datetime import datetime
|
| 15 |
|
| 16 |
# ==========================================
|
| 17 |
-
# 1.
|
| 18 |
# ==========================================
|
| 19 |
print(">>> System Startup: RVC Pro Max...")
|
| 20 |
|
|
@@ -27,7 +27,7 @@ except ImportError as e:
|
|
| 27 |
print(f"Import Error: {e}")
|
| 28 |
sys.exit(1)
|
| 29 |
|
| 30 |
-
#
|
| 31 |
try:
|
| 32 |
static_ffmpeg.add_paths()
|
| 33 |
ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe()
|
|
@@ -41,7 +41,7 @@ os.environ["TEMP"] = TEMP_DIR
|
|
| 41 |
os.environ["TMPDIR"] = TEMP_DIR
|
| 42 |
|
| 43 |
# ==========================================
|
| 44 |
-
# 2.
|
| 45 |
# ==========================================
|
| 46 |
|
| 47 |
def log_message(message):
|
|
@@ -49,18 +49,19 @@ def log_message(message):
|
|
| 49 |
return f"[{timestamp}] {message}"
|
| 50 |
|
| 51 |
def apply_clarity_eq(y, sr):
|
| 52 |
-
|
| 53 |
try:
|
| 54 |
-
# 1. Low-Cut (
|
| 55 |
sos_hp = signal.butter(4, 60, 'hp', fs=sr, output='sos')
|
| 56 |
y = signal.sosfilt(sos_hp, y)
|
| 57 |
|
| 58 |
-
# 2.
|
| 59 |
sos_mid = signal.butter(2, [800, 1200], 'bandstop', fs=sr, output='sos')
|
| 60 |
y_filtered = signal.sosfilt(sos_mid, y)
|
|
|
|
| 61 |
y = (y * 0.7) + (y_filtered * 0.3)
|
| 62 |
|
| 63 |
-
# 3. High Boost (
|
| 64 |
sos_high = signal.butter(2, 5000, 'hp', fs=sr, output='sos')
|
| 65 |
y_high = signal.sosfilt(sos_high, y)
|
| 66 |
y = y + (y_high * 0.15)
|
|
@@ -102,7 +103,7 @@ def cleanup_temp():
|
|
| 102 |
pass
|
| 103 |
|
| 104 |
# ==========================================
|
| 105 |
-
# 3.
|
| 106 |
# ==========================================
|
| 107 |
|
| 108 |
def rvc_process_pipeline(
|
|
@@ -125,18 +126,18 @@ def rvc_process_pipeline(
|
|
| 125 |
model_path = model_file.name
|
| 126 |
index_path = index_file.name if index_file else None
|
| 127 |
|
| 128 |
-
#
|
| 129 |
clean_audio, msg = preprocess_audio(audio_path)
|
| 130 |
logs.append(log_message(msg))
|
| 131 |
|
| 132 |
-
#
|
| 133 |
logs.append(log_message(f"Model: {os.path.basename(model_path)}"))
|
| 134 |
rvc = RVCInference(device="cpu")
|
| 135 |
rvc.load_model(model_path)
|
| 136 |
|
| 137 |
output_temp = os.path.join(TEMP_DIR, f"rvc_out_{int(time.time())}.wav")
|
| 138 |
|
| 139 |
-
#
|
| 140 |
kwargs = {
|
| 141 |
"input_path": clean_audio,
|
| 142 |
"output_path": output_temp,
|
|
@@ -151,7 +152,7 @@ def rvc_process_pipeline(
|
|
| 151 |
"hop_length": int(hop_length)
|
| 152 |
}
|
| 153 |
|
| 154 |
-
#
|
| 155 |
sig = inspect.signature(rvc.infer_file)
|
| 156 |
valid_keys = sig.parameters.keys()
|
| 157 |
|
|
@@ -169,7 +170,7 @@ def rvc_process_pipeline(
|
|
| 169 |
start_time = time.time()
|
| 170 |
rvc.infer_file(**final_kwargs)
|
| 171 |
|
| 172 |
-
#
|
| 173 |
final_output = output_temp
|
| 174 |
if enable_clarity and os.path.exists(output_temp):
|
| 175 |
logs.append(log_message("Applying clarity filter..."))
|
|
@@ -178,9 +179,7 @@ def rvc_process_pipeline(
|
|
| 178 |
duration = time.time() - start_time
|
| 179 |
logs.append(log_message(f"Done! ({duration:.2f}s)"))
|
| 180 |
|
| 181 |
-
#
|
| 182 |
-
# استفاده از chr(10) به جای "
|
| 183 |
-
" برای جلوگیری از شکستن خط در کپی
|
| 184 |
separator = chr(10)
|
| 185 |
log_text = separator.join(logs)
|
| 186 |
|
|
@@ -193,7 +192,7 @@ def rvc_process_pipeline(
|
|
| 193 |
return None, err_msg
|
| 194 |
|
| 195 |
# ==========================================
|
| 196 |
-
# 4.
|
| 197 |
# ==========================================
|
| 198 |
|
| 199 |
custom_css = """
|
|
@@ -201,35 +200,35 @@ custom_css = """
|
|
| 201 |
"""
|
| 202 |
|
| 203 |
with gr.Blocks(title="RVC Pro Persian", theme=gr.themes.Soft(), css=custom_css) as demo:
|
| 204 |
-
gr.Markdown("##
|
| 205 |
|
| 206 |
with gr.Row():
|
| 207 |
with gr.Column():
|
| 208 |
-
audio_input = gr.Audio(label="
|
| 209 |
with gr.Row():
|
| 210 |
-
model_input = gr.File(label="
|
| 211 |
-
index_input = gr.File(label="
|
| 212 |
|
| 213 |
algo_dropdown = gr.Dropdown(
|
| 214 |
choices=["rmvpe", "fcpe", "crepe", "harvest", "pm"],
|
| 215 |
value="rmvpe",
|
| 216 |
-
label="
|
| 217 |
)
|
| 218 |
-
pitch_slider = gr.Slider(-24, 24, value=0, step=1, label="
|
| 219 |
-
btn_run = gr.Button("
|
| 220 |
|
| 221 |
with gr.Column():
|
| 222 |
-
with gr.Accordion("
|
| 223 |
-
enable_clarity = gr.Checkbox(value=True, label="
|
| 224 |
-
index_rate = gr.Slider(0, 1, value=0.4, step=0.05, label="
|
| 225 |
envelope_mix = gr.Slider(0, 1, value=0.25, step=0.05, label="Volume Mix")
|
| 226 |
protect_val = gr.Slider(0, 0.5, value=0.33, step=0.01, label="Protect")
|
| 227 |
filter_radius = gr.Slider(0, 7, value=3, step=1, label="Filter Radius")
|
| 228 |
resample_sr = gr.Slider(0, 48000, value=0, step=1000, label="Resample SR")
|
| 229 |
hop_len = gr.Slider(1, 512, value=128, step=1, label="Hop Length")
|
| 230 |
|
| 231 |
-
output_audio = gr.Audio(label="
|
| 232 |
-
logs = gr.Textbox(label="
|
| 233 |
|
| 234 |
btn_run.click(
|
| 235 |
rvc_process_pipeline,
|
|
|
|
| 14 |
from datetime import datetime
|
| 15 |
|
| 16 |
# ==========================================
|
| 17 |
+
# 1. SETUP & IMPORTS
|
| 18 |
# ==========================================
|
| 19 |
print(">>> System Startup: RVC Pro Max...")
|
| 20 |
|
|
|
|
| 27 |
print(f"Import Error: {e}")
|
| 28 |
sys.exit(1)
|
| 29 |
|
| 30 |
+
# Setup FFmpeg
|
| 31 |
try:
|
| 32 |
static_ffmpeg.add_paths()
|
| 33 |
ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe()
|
|
|
|
| 41 |
os.environ["TMPDIR"] = TEMP_DIR
|
| 42 |
|
| 43 |
# ==========================================
|
| 44 |
+
# 2. AUDIO PROCESSING (DSP)
|
| 45 |
# ==========================================
|
| 46 |
|
| 47 |
def log_message(message):
|
|
|
|
| 49 |
return f"[{timestamp}] {message}"
|
| 50 |
|
| 51 |
def apply_clarity_eq(y, sr):
|
| 52 |
+
# Apply EQ to fix nasal sound and boost clarity
|
| 53 |
try:
|
| 54 |
+
# 1. Low-Cut (remove rumble < 60Hz)
|
| 55 |
sos_hp = signal.butter(4, 60, 'hp', fs=sr, output='sos')
|
| 56 |
y = signal.sosfilt(sos_hp, y)
|
| 57 |
|
| 58 |
+
# 2. Cut Nasal Frequencies (around 1000Hz)
|
| 59 |
sos_mid = signal.butter(2, [800, 1200], 'bandstop', fs=sr, output='sos')
|
| 60 |
y_filtered = signal.sosfilt(sos_mid, y)
|
| 61 |
+
# Mix: 70% original, 30% filtered
|
| 62 |
y = (y * 0.7) + (y_filtered * 0.3)
|
| 63 |
|
| 64 |
+
# 3. High Boost (Air/Clarity > 5000Hz)
|
| 65 |
sos_high = signal.butter(2, 5000, 'hp', fs=sr, output='sos')
|
| 66 |
y_high = signal.sosfilt(sos_high, y)
|
| 67 |
y = y + (y_high * 0.15)
|
|
|
|
| 103 |
pass
|
| 104 |
|
| 105 |
# ==========================================
|
| 106 |
+
# 3. CORE INFERENCE LOGIC
|
| 107 |
# ==========================================
|
| 108 |
|
| 109 |
def rvc_process_pipeline(
|
|
|
|
| 126 |
model_path = model_file.name
|
| 127 |
index_path = index_file.name if index_file else None
|
| 128 |
|
| 129 |
+
# Pre-process
|
| 130 |
clean_audio, msg = preprocess_audio(audio_path)
|
| 131 |
logs.append(log_message(msg))
|
| 132 |
|
| 133 |
+
# Load Model
|
| 134 |
logs.append(log_message(f"Model: {os.path.basename(model_path)}"))
|
| 135 |
rvc = RVCInference(device="cpu")
|
| 136 |
rvc.load_model(model_path)
|
| 137 |
|
| 138 |
output_temp = os.path.join(TEMP_DIR, f"rvc_out_{int(time.time())}.wav")
|
| 139 |
|
| 140 |
+
# Params
|
| 141 |
kwargs = {
|
| 142 |
"input_path": clean_audio,
|
| 143 |
"output_path": output_temp,
|
|
|
|
| 152 |
"hop_length": int(hop_length)
|
| 153 |
}
|
| 154 |
|
| 155 |
+
# Filter invalid params based on installed library version
|
| 156 |
sig = inspect.signature(rvc.infer_file)
|
| 157 |
valid_keys = sig.parameters.keys()
|
| 158 |
|
|
|
|
| 170 |
start_time = time.time()
|
| 171 |
rvc.infer_file(**final_kwargs)
|
| 172 |
|
| 173 |
+
# Post-process
|
| 174 |
final_output = output_temp
|
| 175 |
if enable_clarity and os.path.exists(output_temp):
|
| 176 |
logs.append(log_message("Applying clarity filter..."))
|
|
|
|
| 179 |
duration = time.time() - start_time
|
| 180 |
logs.append(log_message(f"Done! ({duration:.2f}s)"))
|
| 181 |
|
| 182 |
+
# SAFE STRING JOINING
|
|
|
|
|
|
|
| 183 |
separator = chr(10)
|
| 184 |
log_text = separator.join(logs)
|
| 185 |
|
|
|
|
| 192 |
return None, err_msg
|
| 193 |
|
| 194 |
# ==========================================
|
| 195 |
+
# 4. GRADIO UI
|
| 196 |
# ==========================================
|
| 197 |
|
| 198 |
custom_css = """
|
|
|
|
| 200 |
"""
|
| 201 |
|
| 202 |
with gr.Blocks(title="RVC Pro Persian", theme=gr.themes.Soft(), css=custom_css) as demo:
|
| 203 |
+
gr.Markdown("## RVC Pro: Professional Voice Converter")
|
| 204 |
|
| 205 |
with gr.Row():
|
| 206 |
with gr.Column():
|
| 207 |
+
audio_input = gr.Audio(label="Input Audio", type="filepath")
|
| 208 |
with gr.Row():
|
| 209 |
+
model_input = gr.File(label="Model (.pth)", file_types=[".pth"])
|
| 210 |
+
index_input = gr.File(label="Index (.index)", file_types=[".index"])
|
| 211 |
|
| 212 |
algo_dropdown = gr.Dropdown(
|
| 213 |
choices=["rmvpe", "fcpe", "crepe", "harvest", "pm"],
|
| 214 |
value="rmvpe",
|
| 215 |
+
label="Algorithm"
|
| 216 |
)
|
| 217 |
+
pitch_slider = gr.Slider(-24, 24, value=0, step=1, label="Pitch Change")
|
| 218 |
+
btn_run = gr.Button("Start Conversion", elem_id="run_btn", variant="primary")
|
| 219 |
|
| 220 |
with gr.Column():
|
| 221 |
+
with gr.Accordion("Quality Settings", open=True):
|
| 222 |
+
enable_clarity = gr.Checkbox(value=True, label="Fix Nasal Sound (Clarity)")
|
| 223 |
+
index_rate = gr.Slider(0, 1, value=0.4, step=0.05, label="Index Rate")
|
| 224 |
envelope_mix = gr.Slider(0, 1, value=0.25, step=0.05, label="Volume Mix")
|
| 225 |
protect_val = gr.Slider(0, 0.5, value=0.33, step=0.01, label="Protect")
|
| 226 |
filter_radius = gr.Slider(0, 7, value=3, step=1, label="Filter Radius")
|
| 227 |
resample_sr = gr.Slider(0, 48000, value=0, step=1000, label="Resample SR")
|
| 228 |
hop_len = gr.Slider(1, 512, value=128, step=1, label="Hop Length")
|
| 229 |
|
| 230 |
+
output_audio = gr.Audio(label="Final Output", type="filepath")
|
| 231 |
+
logs = gr.Textbox(label="Logs", lines=5)
|
| 232 |
|
| 233 |
btn_run.click(
|
| 234 |
rvc_process_pipeline,
|