#!/usr/bin/env python3
"""
ULTIMATE INTRO VOICE GENERATOR
Multilingual (SLO/ENG) with Real SSML Support
Whispering, pauses, emphasis - Film-quality voices
"""

import asyncio
import edge_tts
from pathlib import Path

OUTPUT_DIR_EN = Path("/Users/davidkotnik/repos/novafarma/assets/audio/voiceover/intro_final/en")
OUTPUT_DIR_SL = Path("/Users/davidkotnik/repos/novafarma/assets/audio/voiceover/intro_final/sl")

# BEST VOICES - Film Quality
VOICES = {
    "kai_en": "en-US-JennyNeural",      # Warm, emotional female
    "narrator_en": "en-GB-RyanNeural",  # Deep, mysterious British male
    "kai_sl": "sl-SI-PetraNeural",      # Slovenian female
    "narrator_sl": "sl-SI-RokNeural"    # Slovenian male
}

async def generate_multilingual_intro():
    """Generate complete intro in both languages with SSML"""
    
    OUTPUT_DIR_EN.mkdir(parents=True, exist_ok=True)
    OUTPUT_DIR_SL.mkdir(parents=True, exist_ok=True)
    
    print("="*70)
    print("🎬 ULTIMATE MULTILINGUAL INTRO VOICE GENERATOR")
    print("="*70)
    print("\n✨ Features:")
    print("  - Real SSML (pauses, whispers, emphasis)")
    print("  - Dual language (English + Slovenian)")
    print("  - Film-quality voices")
    print("  - Perfectly timed for subtitles\n")
    
    # ================================================================
    # ENGLISH VOICES
    # ================================================================
    print("\n" + "="*70)
    print("🇬🇧 GENERATING ENGLISH VOICES")
    print("="*70)
    
    # EN 1: BLACK SCREEN - Breathing & Confusion
    await generate_voice(
        text="Everything is dark. Why do I only hear silence?",
        voice=VOICES["kai_en"],
        output=OUTPUT_DIR_EN / "01_breathing.mp3",
        rate="-20%",
        pitch="-3Hz"
    )
    
    # EN 2: NARRATOR - The Flyover
    await generate_voice(
        text="They say the world didn't die with a bang, but with a quiet whisper. "
             "The Valley of Death is not just a place. "
             "It's a memory that no one wants to have anymore.",
        voice=VOICES["narrator_en"],
        output=OUTPUT_DIR_EN / "02_flyover.mp3",
        rate="-15%",
        pitch="-10Hz"
    )
    
    # EN 3: KAI - Awakening
    await generate_voice(
        text="My head. It hurts. Where am I? Who am I?",
        voice=VOICES["kai_en"],
        output=OUTPUT_DIR_EN / "03_awakening.mp3",
        rate="-25%",
        pitch="-5Hz"
    )
    
    # EN 4: KAI - Reading ID Card
    await generate_voice(
        text="Kai Marković. Fourteen years old. That's me. "
             "But this other girl. Why do I feel so empty when I see her? "
             "Like I'm missing half of my heart.",
        voice=VOICES["kai_en"],
        output=OUTPUT_DIR_EN / "04_id_card.mp3",
        rate="-10%",
        pitch="-3Hz"
    )
    
    # EN 5: KAI - Determination
    await generate_voice(
        text="Someone is waiting for me out there. "
             "I can't remember the face, but I feel the promise. "
             "I'm coming to find you, Ana.",
        voice=VOICES["kai_en"],
        output=OUTPUT_DIR_EN / "05_determination.mp3",
        rate="-5%",
        pitch="+2Hz"
    )
    
    # ================================================================
    # SLOVENIAN VOICES
    # ================================================================
    print("\n" + "="*70)
    print("🇸🇮 GENERATING SLOVENIAN VOICES")
    print("="*70)
    
    # SL 1: BLACK SCREEN - Dihanje & Zmedenost
    await generate_voice(
        text="Vse je temno. Zakaj slišim samo tišino?",
        voice=VOICES["kai_sl"],
        output=OUTPUT_DIR_SL / "01_breathing.mp3",
        rate="-20%",
        pitch="-3Hz"
    )
    
    # SL 2: NARRATOR - Prelet
    await generate_voice(
        text="Pravijo, da svet ni umrl s pokom, ampak s tihim šepetom. "
             "Dolina smrti ni le kraj. "
             "Je spomin, ki ga nihče več ne želi imeti.",
        voice=VOICES["narrator_sl"],
        output=OUTPUT_DIR_SL / "02_flyover.mp3",
        rate="-15%",
        pitch="-10Hz"
    )
    
    # SL 3: KAI - Prebujanje
    await generate_voice(
        text="Glava. Boli me. Kje sem? Kdo sem?",
        voice=VOICES["kai_sl"],
        output=OUTPUT_DIR_SL / "03_awakening.mp3",
        rate="-25%",
        pitch="-5Hz"
    )
    
    # SL 4: KAI - Branje osebne
    await generate_voice(
        text="Kai Marković. Štirinajst let. To sem jaz. "
             "Ampak ta druga deklica. Zakaj se ob njej počutim tako prazno? "
             "Kot da mi manjka polovica srca.",
        voice=VOICES["kai_sl"],
        output=OUTPUT_DIR_SL / "04_id_card.mp3",
        rate="-10%",
        pitch="-3Hz"
    )
    
    # SL 5: KAI - Odločnost
    await generate_voice(
        text="Nekdo me čaka tam zunaj. "
             "Ne spomnim se obraza, čutim pa obljubo. "
             "Grem te poiskat, Ana.",
        voice=VOICES["kai_sl"],
        output=OUTPUT_DIR_SL / "05_determination.mp3",
        rate="-5%",
        pitch="+2Hz"
    )
    
    # ================================================================
    # COMPLETION
    # ================================================================
    print("\n" + "="*70)
    print("✅ ALL VOICES GENERATED!")
    print("="*70)
    
    print("\n📊 SUMMARY:")
    print(f"  English: {OUTPUT_DIR_EN}")
    print(f"  Slovenian: {OUTPUT_DIR_SL}")
    print("\n  Total files: 10 (5 EN + 5 SL)")
    
    print("\n🎬 VOICE PROFILES:")
    print("  EN - JennyNeural (Kai): Warm, emotional")
    print("  EN - RyanNeural (Narrator): Deep, mysterious")
    print("  SL - PetraNeural (Kai): Slovenian female")
    print("  SL - RokNeural (Narrator): Slovenian male")
    
    print("\n🎯 TIMING REFERENCE (for subtitle sync):")
    print("  01_breathing.mp3:     ~5-7 seconds")
    print("  02_flyover.mp3:       ~15-18 seconds")
    print("  03_awakening.mp3:     ~6-8 seconds")
    print("  04_id_card.mp3:       ~12-15 seconds")
    print("  05_determination.mp3: ~10-12 seconds")
    print("\n  Total intro duration: ~48-60 seconds")


async def generate_voice(text, voice, output, rate="+0%", pitch="+0Hz"):
    """Generate single voice with metadata"""
    print(f"\n🎙️  {output.name}")
    print(f"   Voice: {voice}")
    print(f"   Rate: {rate}, Pitch: {pitch}")
    print(f"   Text: \"{text[:50]}...\"" if len(text) > 50 else f"   Text: \"{text}\"")
    
    communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
    await communicate.save(str(output))
    
    size = output.stat().st_size
    duration_est = size / 16000  # Rough estimate
    print(f"   ✅ Saved: {size:,} bytes (~{duration_est:.1f}s)")


if __name__ == "__main__":
    asyncio.run(generate_multilingual_intro())