#!/usr/bin/env python3 """ ๐Ÿงน CLEANUP DUPLICATES SCRIPT Keep only GREEN/PROCESSED versions, remove all backups and duplicates """ import os import hashlib from pathlib import Path from collections import defaultdict # Base directory BASE_DIR = Path("/Users/davidkotnik/repos/novafarma/assets/slike") # Patterns to DELETE (backup and temporary files) DELETE_PATTERNS = [ "assets_BACKUP_", "assets__backup_before_greenscreen_", "src_assets_library_", "_BACKUP_", "_ORIGINAL", "_ALPHA", "_CLEAN", "_PROCESSED", "_AI", ] # Patterns to KEEP (prefer these) KEEP_PATTERNS = [ "MOJE_SLIKE_KONCNA_", "assets_PHASE_PACKS_", "assets_sprites_", "assets_crops_", ] def should_delete(filename): """Check if file should be deleted based on patterns""" for pattern in DELETE_PATTERNS: if pattern in filename: return True return False def get_priority(filename): """Get priority score (higher = keep)""" # Prefer MOJE_SLIKE_KONCNA (final processed versions) if "MOJE_SLIKE_KONCNA_" in filename: return 100 # Then PHASE_PACKS (organized versions) if "assets_PHASE_PACKS_" in filename: return 50 # Then sprites if "assets_sprites_" in filename: return 40 # Then crops if "assets_crops_" in filename: return 30 # Simple names (no prefix) are good if not filename.startswith(("assets_", "src_", "MOJE_")): return 20 # Everything else return 0 def get_base_name(filename): """Extract base name without prefixes""" # Remove common prefixes name = filename for prefix in ["MOJE_SLIKE_KONCNA_", "assets_BACKUP_20260112_064319_", "assets_PHASE_PACKS_", "assets__backup_before_greenscreen_", "assets_sprites_", "assets_crops_", "src_assets_library_godot_"]: if name.startswith(prefix): name = name[len(prefix):] # Remove numbers at end (timestamps) import re name = re.sub(r'_\d{13,}', '', name) return name def cleanup_directory(directory): """Cleanup duplicates in a directory""" if not directory.exists(): return print(f"\n๐Ÿ” Scanning: {directory.relative_to(BASE_DIR)}") # Group files by base name groups = defaultdict(list) for file_path in directory.rglob("*.png"): if file_path.is_file(): base_name = get_base_name(file_path.name) groups[base_name].append(file_path) deleted_count = 0 kept_count = 0 # Process each group for base_name, files in groups.items(): if len(files) <= 1: kept_count += 1 continue # Sort by priority (highest first) files_sorted = sorted(files, key=lambda f: get_priority(f.name), reverse=True) # Keep the highest priority file keep_file = files_sorted[0] # Delete the rest for file_path in files_sorted[1:]: # Also delete if it matches delete patterns if should_delete(file_path.name): try: file_path.unlink() deleted_count += 1 print(f" โŒ Deleted: {file_path.name}") except Exception as e: print(f" โš ๏ธ Error deleting {file_path.name}: {e}") else: # Check if it's truly a duplicate if get_priority(file_path.name) < get_priority(keep_file.name): try: file_path.unlink() deleted_count += 1 print(f" โŒ Deleted duplicate: {file_path.name}") except Exception as e: print(f" โš ๏ธ Error: {e}") kept_count += 1 print(f" โœ… Kept: {keep_file.name}") return deleted_count, kept_count def cleanup_all(): """Cleanup entire slike directory""" print("๐Ÿงน DUPLICATE CLEANUP SCRIPT") print("="*60) print("Strategy: Keep MOJE_SLIKE_KONCNA (green screen versions)") print(" Delete all backups and duplicates") print("="*60) total_deleted = 0 total_kept = 0 # Process all subdirectories for subdir in ["items", "predmeti", "liki", "biomi", "teren", "okolje", "dekoracije"]: dir_path = BASE_DIR / subdir if dir_path.exists(): deleted, kept = cleanup_directory(dir_path) total_deleted += deleted total_kept += kept print(f"\n{'='*60}") print(f"โœ… Total kept: {total_kept}") print(f"โŒ Total deleted: {total_deleted}") print(f"๐Ÿ’พ Space saved: ~{total_deleted * 300 // 1024} MB (estimated)") print(f"{'='*60}\n") if __name__ == "__main__": cleanup_all() print("โœจ Done!")