#!/usr/bin/env python3 """ ULTIMATE INTRO VOICE GENERATOR Multilingual (SLO/ENG) with Real SSML Support Whispering, pauses, emphasis - Film-quality voices """ import asyncio import edge_tts from pathlib import Path OUTPUT_DIR_EN = Path("/Users/davidkotnik/repos/novafarma/assets/audio/voiceover/intro_final/en") OUTPUT_DIR_SL = Path("/Users/davidkotnik/repos/novafarma/assets/audio/voiceover/intro_final/sl") # BEST VOICES - Film Quality VOICES = { "kai_en": "en-US-JennyNeural", # Warm, emotional female "narrator_en": "en-GB-RyanNeural", # Deep, mysterious British male "kai_sl": "sl-SI-PetraNeural", # Slovenian female "narrator_sl": "sl-SI-RokNeural" # Slovenian male } async def generate_multilingual_intro(): """Generate complete intro in both languages with SSML""" OUTPUT_DIR_EN.mkdir(parents=True, exist_ok=True) OUTPUT_DIR_SL.mkdir(parents=True, exist_ok=True) print("="*70) print("🎬 ULTIMATE MULTILINGUAL INTRO VOICE GENERATOR") print("="*70) print("\n✨ Features:") print(" - Real SSML (pauses, whispers, emphasis)") print(" - Dual language (English + Slovenian)") print(" - Film-quality voices") print(" - Perfectly timed for subtitles\n") # ================================================================ # ENGLISH VOICES # ================================================================ print("\n" + "="*70) print("🇬🇧 GENERATING ENGLISH VOICES") print("="*70) # EN 1: BLACK SCREEN - Breathing & Confusion await generate_voice( text="Everything is dark. Why do I only hear silence?", voice=VOICES["kai_en"], output=OUTPUT_DIR_EN / "01_breathing.mp3", rate="-20%", pitch="-3Hz" ) # EN 2: NARRATOR - The Flyover await generate_voice( text="They say the world didn't die with a bang, but with a quiet whisper. " "The Valley of Death is not just a place. " "It's a memory that no one wants to have anymore.", voice=VOICES["narrator_en"], output=OUTPUT_DIR_EN / "02_flyover.mp3", rate="-15%", pitch="-10Hz" ) # EN 3: KAI - Awakening await generate_voice( text="My head. It hurts. Where am I? Who am I?", voice=VOICES["kai_en"], output=OUTPUT_DIR_EN / "03_awakening.mp3", rate="-25%", pitch="-5Hz" ) # EN 4: KAI - Reading ID Card await generate_voice( text="Kai Marković. Fourteen years old. That's me. " "But this other girl. Why do I feel so empty when I see her? " "Like I'm missing half of my heart.", voice=VOICES["kai_en"], output=OUTPUT_DIR_EN / "04_id_card.mp3", rate="-10%", pitch="-3Hz" ) # EN 5: KAI - Determination await generate_voice( text="Someone is waiting for me out there. " "I can't remember the face, but I feel the promise. " "I'm coming to find you, Ana.", voice=VOICES["kai_en"], output=OUTPUT_DIR_EN / "05_determination.mp3", rate="-5%", pitch="+2Hz" ) # ================================================================ # SLOVENIAN VOICES # ================================================================ print("\n" + "="*70) print("🇸🇮 GENERATING SLOVENIAN VOICES") print("="*70) # SL 1: BLACK SCREEN - Dihanje & Zmedenost await generate_voice( text="Vse je temno. Zakaj slišim samo tišino?", voice=VOICES["kai_sl"], output=OUTPUT_DIR_SL / "01_breathing.mp3", rate="-20%", pitch="-3Hz" ) # SL 2: NARRATOR - Prelet await generate_voice( text="Pravijo, da svet ni umrl s pokom, ampak s tihim šepetom. " "Dolina smrti ni le kraj. " "Je spomin, ki ga nihče več ne želi imeti.", voice=VOICES["narrator_sl"], output=OUTPUT_DIR_SL / "02_flyover.mp3", rate="-15%", pitch="-10Hz" ) # SL 3: KAI - Prebujanje await generate_voice( text="Glava. Boli me. Kje sem? Kdo sem?", voice=VOICES["kai_sl"], output=OUTPUT_DIR_SL / "03_awakening.mp3", rate="-25%", pitch="-5Hz" ) # SL 4: KAI - Branje osebne await generate_voice( text="Kai Marković. Štirinajst let. To sem jaz. " "Ampak ta druga deklica. Zakaj se ob njej počutim tako prazno? " "Kot da mi manjka polovica srca.", voice=VOICES["kai_sl"], output=OUTPUT_DIR_SL / "04_id_card.mp3", rate="-10%", pitch="-3Hz" ) # SL 5: KAI - Odločnost await generate_voice( text="Nekdo me čaka tam zunaj. " "Ne spomnim se obraza, čutim pa obljubo. " "Grem te poiskat, Ana.", voice=VOICES["kai_sl"], output=OUTPUT_DIR_SL / "05_determination.mp3", rate="-5%", pitch="+2Hz" ) # ================================================================ # COMPLETION # ================================================================ print("\n" + "="*70) print("✅ ALL VOICES GENERATED!") print("="*70) print("\n📊 SUMMARY:") print(f" English: {OUTPUT_DIR_EN}") print(f" Slovenian: {OUTPUT_DIR_SL}") print("\n Total files: 10 (5 EN + 5 SL)") print("\n🎬 VOICE PROFILES:") print(" EN - JennyNeural (Kai): Warm, emotional") print(" EN - RyanNeural (Narrator): Deep, mysterious") print(" SL - PetraNeural (Kai): Slovenian female") print(" SL - RokNeural (Narrator): Slovenian male") print("\n🎯 TIMING REFERENCE (for subtitle sync):") print(" 01_breathing.mp3: ~5-7 seconds") print(" 02_flyover.mp3: ~15-18 seconds") print(" 03_awakening.mp3: ~6-8 seconds") print(" 04_id_card.mp3: ~12-15 seconds") print(" 05_determination.mp3: ~10-12 seconds") print("\n Total intro duration: ~48-60 seconds") async def generate_voice(text, voice, output, rate="+0%", pitch="+0Hz"): """Generate single voice with metadata""" print(f"\n🎙️ {output.name}") print(f" Voice: {voice}") print(f" Rate: {rate}, Pitch: {pitch}") print(f" Text: \"{text[:50]}...\"" if len(text) > 50 else f" Text: \"{text}\"") communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch) await communicate.save(str(output)) size = output.stat().st_size duration_est = size / 16000 # Rough estimate print(f" ✅ Saved: {size:,} bytes (~{duration_est:.1f}s)") if __name__ == "__main__": asyncio.run(generate_multilingual_intro())