import os
import hashlib
import sys

# Define target directories in clean structure
TARGET_DIRS = [
    "assets/slike/animals",
    "assets/slike/characters",
    "assets/slike/environment",
    "assets/slike/ui",
    "assets/slike/items",
    "assets/slike/biomes",
    "assets/slike/intro"
]

def calculate_md5(filepath):
    """Calculates MD5 hash of a file."""
    hash_md5 = hashlib.md5()
    try:
        with open(filepath, "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""):
                hash_md5.update(chunk)
        return hash_md5.hexdigest()
    except Exception as e:
        print(f"Error reading {filepath}: {e}")
        return None

def find_duplicates():
    print("🕵️‍♂️  STARTING DUPLICATE DETECTIVE...")
    print("    Scanning folders for identical images...")
    
    seen_hashes = {} # {md5_hash: [list_of_filepaths]}
    total_files = 0
    
    # 1. SCAN ALL FILES
    root_base = os.getcwd()
    
    for relative_dir in TARGET_DIRS:
        abs_dir = os.path.join(root_base, relative_dir)
        if not os.path.exists(abs_dir):
            continue
            
        for root, dirs, files in os.walk(abs_dir):
            for file in files:
                if file.startswith(".") or not file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
                    continue
                
                filepath = os.path.join(root, file)
                total_files += 1
                
                file_hash = calculate_md5(filepath)
                if file_hash:
                    if file_hash in seen_hashes:
                        seen_hashes[file_hash].append(filepath)
                    else:
                        seen_hashes[file_hash] = [filepath]

    # 2. REPORT DUPLICATES
    duplicates_found = 0
    print(f"\n✅ Scanned {total_files} files.")
    
    for file_hash, paths in seen_hashes.items():
        if len(paths) > 1:
            duplicates_found += 1
            print(f"\n⚠️  DUPLICATE GROUP FOUND ({len(paths)} copies):")
            for p in paths:
                # Print relative path for cleaner output
                rel_p = os.path.relpath(p, root_base)
                print(f"   sc -> {rel_p}")

    if duplicates_found == 0:
        print("\n✨  Great! No identical duplicates found in scanned folders.")
    else:
        print(f"\n🚨  WARNING: Found {duplicates_found} groups of duplicate images!")
        print("    Consider deleting the extras to save space and avoid confusion.")

if __name__ == "__main__":
    find_duplicates()