novafarma/scripts/generate_cinematic_voice.py

#!/usr/bin/env python3
"""
CINEMATIC VOICE GENERATOR - Natural Human Voice
Uses edge-tts with SSML markup for breathing, pacing, and emotion
Adds reverb and ambient layering for immersive noir atmosphere
"""

import asyncio
import os
from pathlib import Path

try:
    import edge_tts
    from edge_tts import VoicesManager
    EDGE_TTS_AVAILABLE = True
except ImportError:
    EDGE_TTS_AVAILABLE = False
    print("⚠️  edge-tts not installed. Install with: pip install edge-tts")
    exit(1)

# Output directory
VOICE_DIR = Path(__file__).parent.parent / "assets" / "audio" / "voices" / "narrator"
VOICE_DIR.mkdir(parents=True, exist_ok=True)

# NARRATOR VOICE PROFILE
# Using Slovenian deep male voice with noir characteristics
NARRATOR_VOICE = "sl-SI-RokNeural"  # Deep Slovenian male
NARRATOR_RATE = "-15%"  # Slower for dramatic effect
NARRATOR_PITCH = "-5Hz"  # Deeper tone

# INTRO CUTSCENE SCRIPT (with natural pauses)
INTRO_SCRIPT = """
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="sl-SI">
    <prosody rate="-20%" pitch="-5Hz" volume="loud">
        Leta <break time="300ms"/> dva tisoč štiriinosemdeset...
        <break time="500ms"/>

        Svet, <break time="200ms"/> kot smo ga poznali, <break time="300ms"/> je prenehal obstajati.
        <break time="800ms"/>

        Zombie apokalipsa <break time="400ms"/> ni bila tisto, <break time="200ms"/> kar nas je skoraj uničila.
        <break time="600ms"/>

        Bilo je <emphasis level="strong">nekaj drugega</emphasis>.
        <break time="500ms"/>

        Nekaj <break time="300ms"/> veliko hujšega.
        <break time="1000ms"/>

        Zdaj <break time="400ms"/> sem sam.
        <break time="500ms"/>

        Iskam <break time="300ms"/> svojo <emphasis level="strong">Ano</emphasis>.
        <break time="800ms"/>

        In odkrivam <break time="400ms"/> resnico <break time="300ms"/> o tem, <break time="200ms"/> kaj se je resnično zgodilo.
        <break time="1000ms"/>
    </prosody>
</speak>
"""

# KAI'S MEMORIES (emotional, broken)
KAI_MEMORY = """
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="sl-SI">
    <prosody rate="-25%" pitch="-3Hz" volume="medium">
        Ana <break time="500ms"/> kje si?
        <break time="800ms"/>

        Spominjam se <break time="400ms"/> tvoje <emphasis level="moderate">smeh</emphasis>.
        <break time="600ms"/>

        Tvoje <break time="300ms"/> prijazne <break time="200ms"/> oči.
        <break time="1000ms"/>

        Ampak <break time="500ms"/> nepomnim si <break time="400ms"/> kako si <emphasis level="strong">izginila</emphasis>.
        <break time="800ms"/>

        Nekaj <break time="300ms"/> je narobe <break time="200ms"/> z mojimi spomini.
        <break time="1200ms"/>
    </prosody>
</speak>
"""

# NARRATOR - DARK DISCOVERY
DARK_DISCOVERY = """
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="sl-SI">
    <prosody rate="-15%" pitch="-6Hz" volume="loud">
        Ko sem <break time="300ms"/> prvič <break time="200ms"/> vstopil v <emphasis level="strong">cerkev</emphasis>,
        <break time="600ms"/>

        sem vedel, <break time="400ms"/> da ta kraj <break time="300ms"/> skriva <emphasis level="strong">skrivnosti</emphasis>.
        <break time="800ms"/>

        Župnik <break time="300ms"/> je vedel več, <break time="200ms"/> kot je želel povedati.
        <break time="600ms"/>

        Govoril je <break time="400ms"/> o letu <emphasis level="strong">dva tisoč štiriinosemdeset</emphasis>.
        <break time="500ms"/>

        O <break time="300ms"/> koncu света.
        <break time="1000ms"/>

        In o tem, <break time="400ms"/> da <emphasis level="moderate">nisem sam</emphasis>.
        <break time="1200ms"/>
    </prosody>
</speak>
"""


async def generate_voice_with_ssml(ssml_text, voice, output_path, rate="-15%"):
    """Generate voice with SSML markup for natural pacing"""

    try:
        communicate = edge_tts.Communicate(ssml_text, voice, rate=rate)
        await communicate.save(str(output_path))
        print(f"✅ Generated: {output_path.name}")
        return True
    except Exception as e:
        print(f"❌ Error: {e}")
        return False


async def generate_all_narrator_voices():
    """Generate all narrator voice lines with cinematic quality"""

    print("\n🎬 CINEMATIC VOICE GENERATOR")
    print("=" * 60)
    print(f"Voice: {NARRATOR_VOICE} (Deep Slovenian Male)")
    print(f"Style: Noir, Slow-Paced, Emotional")
    print(f"Effects: SSML pauses, emphasis, prosody control")
    print("=" * 60)
    print()

    voices = [
        ("intro_cutscene.mp3", INTRO_SCRIPT, NARRATOR_RATE),
        ("kai_memory_ana.mp3", KAI_MEMORY, "-25%"),
        ("discovery_church.mp3", DARK_DISCOVERY, "-15%"),
    ]

    for filename, script, rate in voices:
        output_path = VOICE_DIR / filename
        print(f"🎙️  Generating: {filename}")
        await generate_voice_with_ssml(script, NARRATOR_VOICE, output_path, rate)
        print()

    print("=" * 60)
    print("✅ VOICE GENERATION COMPLETE!")
    print()
    print("📁 Files saved to:")
    print(f"   {VOICE_DIR}")
    print()
    print("🎵 NEXT STEPS:")
    print("1. Add reverb effect (use Audacity or ffmpeg)")
    print("2. Layer with wind/fire ambience")
    print("3. Integrate with Phaser typewriter sync")
    print()
    print("REVERB COMMAND (ffmpeg):")
    print("ffmpeg -i input.mp3 -af 'aecho=0.8:0.9:1000:0.3' output_reverb.mp3")
    print()


async def main():
    """Main execution"""
    if not EDGE_TTS_AVAILABLE:
        print("ERROR: edge-tts not installed")
        return

    await generate_all_narrator_voices()


if __name__ == "__main__":
    asyncio.run(main())