| |
| |
| |
| |
|
|
| import cv2 |
| import subprocess |
| import numpy as np |
| import soundfile |
| import docx |
|
|
| import urllib |
| from pathlib import Path |
| from moviepy.editor import * |
|
|
| FS = 16000 |
| ROOT_DIR = './tts_audiobooks/voices/' |
| Path(ROOT_DIR).mkdir(parents=True, |
| exist_ok=True) |
| voices = [ |
| |
| |
| |
| |
| |
| |
| 'en_US_m-ailabs_mary_ann' |
| ] |
|
|
| |
|
|
| d = docx.Document('assets/audiobook_TTS.docx') |
|
|
| last_paragraph_was_silence = False |
|
|
| chapter_counter = 0 |
|
|
| for vox in voices: |
|
|
| |
|
|
| vox_str = vox.replace( |
| '/', '_').replace( |
| '#', '_').replace( |
| 'cmu-arctic', 'cmu_arctic').replace( |
| '_low', '').replace('-','') |
|
|
| |
|
|
| Path(ROOT_DIR + vox_str + '/').mkdir(parents=True, |
| exist_ok=True) |
|
|
|
|
| print(vox) |
|
|
| |
|
|
| total = [] |
| chapter = [] |
|
|
| final_paragraph_for_saving_last_chapter = d.paragraphs[-1] |
| final_paragraph_for_saving_last_chapter.text = 'CHAPTER: END OF AUDIOBOOK' |
|
|
| for para in d.paragraphs + [final_paragraph_for_saving_last_chapter,]: |
| t = para.text |
|
|
|
|
|
|
|
|
| |
|
|
| if t.startswith('CHAPTER:'): |
|
|
|
|
| |
|
|
| audio = np.concatenate(chapter) |
|
|
| soundfile.write( |
| ROOT_DIR + vox_str + f'/{vox_str}_chapter_{chapter_counter}.wav', |
| audio, |
| 16000) |
|
|
| |
|
|
| total.append(audio) |
|
|
| |
|
|
| chapter = [] |
|
|
| chapter_counter += 1 |
|
|
| print(f'Start Chapter {chapter_counter}, timestamp:{int(np.concatenate(total).shape[0]/16000)//60}:{int(np.concatenate(total).shape[0]/16000)%60}') |
|
|
|
|
|
|
| |
|
|
| if len(t) > 2 and t[0] != '{' and t[-1] != '}' and 'Figure' not in t: |
|
|
| |
|
|
| with open('_tmp.txt', 'w') as f: |
| f.write(t.lower()) |
|
|
|
|
|
|
|
|
| |
|
|
| subprocess.run( |
| [ |
| "python", |
| "tts.py", |
| "--text", |
| "_tmp.txt", |
| '--soundscape', 'birds formig' if chapter_counter < 2 else '', |
| '--voice', vox, |
| '--out_file', '_tmp' |
| ]) |
|
|
| audio, _fs = soundfile.read('out/_tmp.wav') |
| |
| chapter.append(audio) |
|
|
| |
|
|
| last_paragraph_was_silence = False |
|
|
| |
|
|
| else: |
|
|
| if not last_paragraph_was_silence: |
|
|
| last_paragraph_was_silence = True |
|
|
| |
|
|
| soundfile.write( |
| ROOT_DIR + vox_str + '/' + f'{vox_str}_full_audiobook.wav', |
| np.concatenate(total), |
| 16000) |
|
|
|
|
|
|
|
|
| |
|
|
| voice_pic = np.zeros((1920, 1080, 3), dtype=np.uint8) |
|
|
| |
| |
| font = cv2.FONT_HERSHEY_SIMPLEX |
| bottomLeftCornerOfText = (0, 640) |
| fontScale = 2 |
| fontColor = (69, 74, 74) |
| thickness = 4 |
| lineType = 2 |
| |
| cv2.putText(voice_pic, vox, |
| bottomLeftCornerOfText, |
| font, |
| fontScale, |
| fontColor, |
| thickness, |
| lineType) |
| |
| cv2.putText(voice_pic, 'AUDIOBOOK', |
| (170, 170), |
| font, |
| 4, |
| fontColor, |
| thickness, |
| lineType) |
| |
| cv2.putText(voice_pic, 'TTS voice =', |
| (0, 500), |
| font, |
| fontScale, |
| fontColor, |
| thickness, |
| lineType) |
| STATIC_FRAME = '_tmp.png' |
| cv2.imwrite(STATIC_FRAME, voice_pic) |
|
|
|
|
| |
|
|
|
|
| SILENT_VIDEO = '_tmp.mp4' |
|
|
| |
|
|
| clip_silent = ImageClip(STATIC_FRAME).set_duration(5) |
| clip_silent.fps = 24 |
| clip_silent.write_videofile(SILENT_VIDEO) |
|
|
|
|
|
|
|
|
|
|
| |
|
|
| |
| subprocess.call( |
| ["ffmpeg", |
| "-y", |
| "-i", |
| SILENT_VIDEO, |
| "-i", |
| ROOT_DIR + vox_str + '/' + f'{vox_str}_full_audiobook.wav', |
| "-c:v", |
| "copy", |
| "-map", |
| "0:v:0", |
| "-map", |
| " 1:a:0", |
| ROOT_DIR + vox_str + '/' + f'{vox_str}_full_audiobook.mp4', |
| ]) |
|
|