173 lines
5.8 KiB
Python
173 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
CINEMATIC VOICE GENERATOR - Natural Human Voice
|
|
Uses edge-tts with SSML markup for breathing, pacing, and emotion
|
|
Adds reverb and ambient layering for immersive noir atmosphere
|
|
"""
|
|
|
|
import asyncio
|
|
import os
|
|
from pathlib import Path
|
|
|
|
try:
|
|
import edge_tts
|
|
from edge_tts import VoicesManager
|
|
EDGE_TTS_AVAILABLE = True
|
|
except ImportError:
|
|
EDGE_TTS_AVAILABLE = False
|
|
print("⚠️ edge-tts not installed. Install with: pip install edge-tts")
|
|
exit(1)
|
|
|
|
# Output directory
|
|
VOICE_DIR = Path(__file__).parent.parent / "assets" / "audio" / "voices" / "narrator"
|
|
VOICE_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
# NARRATOR VOICE PROFILE
|
|
# Using Slovenian deep male voice with noir characteristics
|
|
NARRATOR_VOICE = "sl-SI-RokNeural" # Deep Slovenian male
|
|
NARRATOR_RATE = "-15%" # Slower for dramatic effect
|
|
NARRATOR_PITCH = "-5Hz" # Deeper tone
|
|
|
|
# INTRO CUTSCENE SCRIPT (with natural pauses)
|
|
INTRO_SCRIPT = """
|
|
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="sl-SI">
|
|
<prosody rate="-20%" pitch="-5Hz" volume="loud">
|
|
Leta <break time="300ms"/> dva tisoč štiriinosemdeset...
|
|
<break time="500ms"/>
|
|
|
|
Svet, <break time="200ms"/> kot smo ga poznali, <break time="300ms"/> je prenehal obstajati.
|
|
<break time="800ms"/>
|
|
|
|
Zombie apokalipsa <break time="400ms"/> ni bila tisto, <break time="200ms"/> kar nas je skoraj uničila.
|
|
<break time="600ms"/>
|
|
|
|
Bilo je <emphasis level="strong">nekaj drugega</emphasis>.
|
|
<break time="500ms"/>
|
|
|
|
Nekaj <break time="300ms"/> veliko hujšega.
|
|
<break time="1000ms"/>
|
|
|
|
Zdaj <break time="400ms"/> sem sam.
|
|
<break time="500ms"/>
|
|
|
|
Iskam <break time="300ms"/> svojo <emphasis level="strong">Ano</emphasis>.
|
|
<break time="800ms"/>
|
|
|
|
In odkrivam <break time="400ms"/> resnico <break time="300ms"/> o tem, <break time="200ms"/> kaj se je resnično zgodilo.
|
|
<break time="1000ms"/>
|
|
</prosody>
|
|
</speak>
|
|
"""
|
|
|
|
# KAI'S MEMORIES (emotional, broken)
|
|
KAI_MEMORY = """
|
|
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="sl-SI">
|
|
<prosody rate="-25%" pitch="-3Hz" volume="medium">
|
|
Ana <break time="500ms"/> kje si?
|
|
<break time="800ms"/>
|
|
|
|
Spominjam se <break time="400ms"/> tvoje <emphasis level="moderate">smeh</emphasis>.
|
|
<break time="600ms"/>
|
|
|
|
Tvoje <break time="300ms"/> prijazne <break time="200ms"/> oči.
|
|
<break time="1000ms"/>
|
|
|
|
Ampak <break time="500ms"/> nepomnim si <break time="400ms"/> kako si <emphasis level="strong">izginila</emphasis>.
|
|
<break time="800ms"/>
|
|
|
|
Nekaj <break time="300ms"/> je narobe <break time="200ms"/> z mojimi spomini.
|
|
<break time="1200ms"/>
|
|
</prosody>
|
|
</speak>
|
|
"""
|
|
|
|
# NARRATOR - DARK DISCOVERY
|
|
DARK_DISCOVERY = """
|
|
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="sl-SI">
|
|
<prosody rate="-15%" pitch="-6Hz" volume="loud">
|
|
Ko sem <break time="300ms"/> prvič <break time="200ms"/> vstopil v <emphasis level="strong">cerkev</emphasis>,
|
|
<break time="600ms"/>
|
|
|
|
sem vedel, <break time="400ms"/> da ta kraj <break time="300ms"/> skriva <emphasis level="strong">skrivnosti</emphasis>.
|
|
<break time="800ms"/>
|
|
|
|
Župnik <break time="300ms"/> je vedel več, <break time="200ms"/> kot je želel povedati.
|
|
<break time="600ms"/>
|
|
|
|
Govoril je <break time="400ms"/> o letu <emphasis level="strong">dva tisoč štiriinosemdeset</emphasis>.
|
|
<break time="500ms"/>
|
|
|
|
O <break time="300ms"/> koncu света.
|
|
<break time="1000ms"/>
|
|
|
|
In o tem, <break time="400ms"/> da <emphasis level="moderate">nisem sam</emphasis>.
|
|
<break time="1200ms"/>
|
|
</prosody>
|
|
</speak>
|
|
"""
|
|
|
|
|
|
async def generate_voice_with_ssml(ssml_text, voice, output_path, rate="-15%"):
|
|
"""Generate voice with SSML markup for natural pacing"""
|
|
|
|
try:
|
|
communicate = edge_tts.Communicate(ssml_text, voice, rate=rate)
|
|
await communicate.save(str(output_path))
|
|
print(f"✅ Generated: {output_path.name}")
|
|
return True
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
return False
|
|
|
|
|
|
async def generate_all_narrator_voices():
|
|
"""Generate all narrator voice lines with cinematic quality"""
|
|
|
|
print("\n🎬 CINEMATIC VOICE GENERATOR")
|
|
print("=" * 60)
|
|
print(f"Voice: {NARRATOR_VOICE} (Deep Slovenian Male)")
|
|
print(f"Style: Noir, Slow-Paced, Emotional")
|
|
print(f"Effects: SSML pauses, emphasis, prosody control")
|
|
print("=" * 60)
|
|
print()
|
|
|
|
voices = [
|
|
("intro_cutscene.mp3", INTRO_SCRIPT, NARRATOR_RATE),
|
|
("kai_memory_ana.mp3", KAI_MEMORY, "-25%"),
|
|
("discovery_church.mp3", DARK_DISCOVERY, "-15%"),
|
|
]
|
|
|
|
for filename, script, rate in voices:
|
|
output_path = VOICE_DIR / filename
|
|
print(f"🎙️ Generating: {filename}")
|
|
await generate_voice_with_ssml(script, NARRATOR_VOICE, output_path, rate)
|
|
print()
|
|
|
|
print("=" * 60)
|
|
print("✅ VOICE GENERATION COMPLETE!")
|
|
print()
|
|
print("📁 Files saved to:")
|
|
print(f" {VOICE_DIR}")
|
|
print()
|
|
print("🎵 NEXT STEPS:")
|
|
print("1. Add reverb effect (use Audacity or ffmpeg)")
|
|
print("2. Layer with wind/fire ambience")
|
|
print("3. Integrate with Phaser typewriter sync")
|
|
print()
|
|
print("REVERB COMMAND (ffmpeg):")
|
|
print("ffmpeg -i input.mp3 -af 'aecho=0.8:0.9:1000:0.3' output_reverb.mp3")
|
|
print()
|
|
|
|
|
|
async def main():
|
|
"""Main execution"""
|
|
if not EDGE_TTS_AVAILABLE:
|
|
print("ERROR: edge-tts not installed")
|
|
return
|
|
|
|
await generate_all_narrator_voices()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|