Spaces:
Sleeping
Sleeping
| """ | |
| Main entry point for the Audio Translation Web Application | |
| Handles file upload, processing pipeline, and UI rendering | |
| """ | |
| # Configure logging first | |
| import logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.FileHandler("app.log"), | |
| logging.StreamHandler() | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| import streamlit as st | |
| import os | |
| import time | |
| import subprocess | |
| from utils.stt import transcribe_audio | |
| from utils.translation import translate_text | |
| from utils.tts_dummy import generate_speech | |
| # Hugging Face Spaces Setup Automation | |
| def setup_huggingface_space(): | |
| """Automatically configure Hugging Face Space requirements""" | |
| logger.info("Running Hugging Face space setup") | |
| st.sidebar.header("Space Configuration") | |
| try: | |
| subprocess.run(["espeak-ng", "--version"], check=True, capture_output=True) | |
| logger.info("espeak-ng verification successful") | |
| except (FileNotFoundError, subprocess.CalledProcessError): | |
| logger.error("Missing espeak-ng dependency") | |
| st.sidebar.error(""" | |
| **Missing System Dependencies!** Add this to your Space settings: | |
| ```txt | |
| apt-get update && apt-get install -y espeak-ng | |
| ``` | |
| """) | |
| st.stop() | |
| model_dir = "./kokoro" | |
| required_files = [ | |
| f"{model_dir}/kokoro-v0_19.pth", | |
| f"{model_dir}/voices/af_bella.pt" | |
| ] | |
| if not all(os.path.exists(f) for f in required_files): | |
| logger.error("Missing model files in %s", model_dir) | |
| st.sidebar.warning(""" | |
| **Missing Model Files!** Add this to your Space settings: | |
| ```txt | |
| git clone https://huggingface.co/hexgrad/Kokoro-82M ./kokoro | |
| ``` | |
| """) | |
| st.stop() | |
| # Initialize environment configurations | |
| os.makedirs("temp/uploads", exist_ok=True) | |
| os.makedirs("temp/outputs", exist_ok=True) | |
| def configure_page(): | |
| """Set up Streamlit page configuration""" | |
| logger.info("Configuring Streamlit page") | |
| st.set_page_config( | |
| page_title="Audio Translator", | |
| page_icon="π§", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| st.markdown(""" | |
| <style> | |
| .reportview-container {margin-top: -2em;} | |
| #MainMenu {visibility: hidden;} | |
| .stDeployButton {display:none;} | |
| .stAlert {padding: 20px !important;} | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def handle_file_processing(upload_path): | |
| """ | |
| Execute the complete processing pipeline: | |
| 1. Speech-to-Text (STT) | |
| 2. Machine Translation | |
| 3. Text-to-Speech (TTS) | |
| """ | |
| logger.info(f"Starting processing for: {upload_path}") | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| try: | |
| # STT Phase | |
| logger.info("Beginning STT processing") | |
| status_text.markdown("π **Performing Speech Recognition...**") | |
| with st.spinner("Initializing Whisper model..."): | |
| english_text = transcribe_audio(upload_path) | |
| progress_bar.progress(30) | |
| logger.info(f"STT completed. Text length: {len(english_text)} characters") | |
| # Translation Phase | |
| logger.info("Beginning translation") | |
| status_text.markdown("π **Translating Content...**") | |
| with st.spinner("Loading translation model..."): | |
| chinese_text = translate_text(english_text) | |
| progress_bar.progress(60) | |
| logger.info(f"Translation completed. Translated length: {len(chinese_text)} characters") | |
| # TTS Phase | |
| logger.info("Beginning TTS generation") | |
| status_text.markdown("π΅ **Generating Chinese Speech...**") | |
| with st.spinner("Initializing TTS engine..."): | |
| output_path = generate_speech(chinese_text, language="zh") | |
| progress_bar.progress(100) | |
| logger.info(f"TTS completed. Output file: {output_path}") | |
| # Display results | |
| # Display results | |
| status_text.success("β Processing Complete!") | |
| return english_text, chinese_text, output_path | |
| except Exception as e: | |
| logger.error(f"Processing failed: {str(e)}", exc_info=True) | |
| status_text.error(f"β Processing Failed: {str(e)}") | |
| st.exception(e) | |
| raise | |
| def render_results(english_text, chinese_text, output_path): | |
| """Display processing results in organized columns""" | |
| logger.info("Rendering results") | |
| st.divider() | |
| col1, col2 = st.columns([2, 1]) | |
| with col1: | |
| st.subheader("Recognition Results") | |
| st.code(english_text, language="text") | |
| st.subheader("Translation Results") | |
| st.code(chinese_text, language="text") | |
| with col2: | |
| st.subheader("Audio Output") | |
| st.audio(output_path) | |
| with open(output_path, "rb") as f: | |
| st.download_button( | |
| label="Download Audio", | |
| data=f, | |
| file_name="translated_audio.wav", | |
| mime="audio/wav" | |
| ) | |
| def main(): | |
| """Main application workflow""" | |
| logger.info("Starting application") | |
| # setup_huggingface_space() # First-run configuration checks | |
| configure_page() | |
| st.title("π§ High-Quality Audio Translation System") | |
| st.markdown("Upload English Audio β Get Chinese Speech Output") | |
| uploaded_file = st.file_uploader( | |
| "Select Audio File (MP3/WAV)", | |
| type=["mp3", "wav"], | |
| accept_multiple_files=False | |
| ) | |
| if uploaded_file: | |
| logger.info(f"File uploaded: {uploaded_file.name}") | |
| upload_path = os.path.join("temp/uploads", uploaded_file.name) | |
| with open(upload_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| results = handle_file_processing(upload_path) | |
| if results: | |
| render_results(*results) | |
| if __name__ == "__main__": | |
| main() |