- Kept only green screen processed versions (MOJE_SLIKE_KONCNA) - Deleted all BACKUP, src_assets_library, and duplicate versions - Saved ~65 MB of space - Added cleanup_duplicates.py script for future use
168 lines
4.8 KiB
Python
168 lines
4.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
🧹 CLEANUP DUPLICATES SCRIPT
|
|
Keep only GREEN/PROCESSED versions, remove all backups and duplicates
|
|
"""
|
|
|
|
import os
|
|
import hashlib
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
|
|
# Base directory
|
|
BASE_DIR = Path("/Users/davidkotnik/repos/novafarma/assets/slike")
|
|
|
|
# Patterns to DELETE (backup and temporary files)
|
|
DELETE_PATTERNS = [
|
|
"assets_BACKUP_",
|
|
"assets__backup_before_greenscreen_",
|
|
"src_assets_library_",
|
|
"_BACKUP_",
|
|
"_ORIGINAL",
|
|
"_ALPHA",
|
|
"_CLEAN",
|
|
"_PROCESSED",
|
|
"_AI",
|
|
]
|
|
|
|
# Patterns to KEEP (prefer these)
|
|
KEEP_PATTERNS = [
|
|
"MOJE_SLIKE_KONCNA_",
|
|
"assets_PHASE_PACKS_",
|
|
"assets_sprites_",
|
|
"assets_crops_",
|
|
]
|
|
|
|
def should_delete(filename):
|
|
"""Check if file should be deleted based on patterns"""
|
|
for pattern in DELETE_PATTERNS:
|
|
if pattern in filename:
|
|
return True
|
|
return False
|
|
|
|
def get_priority(filename):
|
|
"""Get priority score (higher = keep)"""
|
|
# Prefer MOJE_SLIKE_KONCNA (final processed versions)
|
|
if "MOJE_SLIKE_KONCNA_" in filename:
|
|
return 100
|
|
|
|
# Then PHASE_PACKS (organized versions)
|
|
if "assets_PHASE_PACKS_" in filename:
|
|
return 50
|
|
|
|
# Then sprites
|
|
if "assets_sprites_" in filename:
|
|
return 40
|
|
|
|
# Then crops
|
|
if "assets_crops_" in filename:
|
|
return 30
|
|
|
|
# Simple names (no prefix) are good
|
|
if not filename.startswith(("assets_", "src_", "MOJE_")):
|
|
return 20
|
|
|
|
# Everything else
|
|
return 0
|
|
|
|
def get_base_name(filename):
|
|
"""Extract base name without prefixes"""
|
|
# Remove common prefixes
|
|
name = filename
|
|
|
|
for prefix in ["MOJE_SLIKE_KONCNA_", "assets_BACKUP_20260112_064319_",
|
|
"assets_PHASE_PACKS_", "assets__backup_before_greenscreen_",
|
|
"assets_sprites_", "assets_crops_", "src_assets_library_godot_"]:
|
|
if name.startswith(prefix):
|
|
name = name[len(prefix):]
|
|
|
|
# Remove numbers at end (timestamps)
|
|
import re
|
|
name = re.sub(r'_\d{13,}', '', name)
|
|
|
|
return name
|
|
|
|
def cleanup_directory(directory):
|
|
"""Cleanup duplicates in a directory"""
|
|
if not directory.exists():
|
|
return
|
|
|
|
print(f"\n🔍 Scanning: {directory.relative_to(BASE_DIR)}")
|
|
|
|
# Group files by base name
|
|
groups = defaultdict(list)
|
|
|
|
for file_path in directory.rglob("*.png"):
|
|
if file_path.is_file():
|
|
base_name = get_base_name(file_path.name)
|
|
groups[base_name].append(file_path)
|
|
|
|
deleted_count = 0
|
|
kept_count = 0
|
|
|
|
# Process each group
|
|
for base_name, files in groups.items():
|
|
if len(files) <= 1:
|
|
kept_count += 1
|
|
continue
|
|
|
|
# Sort by priority (highest first)
|
|
files_sorted = sorted(files, key=lambda f: get_priority(f.name), reverse=True)
|
|
|
|
# Keep the highest priority file
|
|
keep_file = files_sorted[0]
|
|
|
|
# Delete the rest
|
|
for file_path in files_sorted[1:]:
|
|
# Also delete if it matches delete patterns
|
|
if should_delete(file_path.name):
|
|
try:
|
|
file_path.unlink()
|
|
deleted_count += 1
|
|
print(f" ❌ Deleted: {file_path.name}")
|
|
except Exception as e:
|
|
print(f" ⚠️ Error deleting {file_path.name}: {e}")
|
|
else:
|
|
# Check if it's truly a duplicate
|
|
if get_priority(file_path.name) < get_priority(keep_file.name):
|
|
try:
|
|
file_path.unlink()
|
|
deleted_count += 1
|
|
print(f" ❌ Deleted duplicate: {file_path.name}")
|
|
except Exception as e:
|
|
print(f" ⚠️ Error: {e}")
|
|
|
|
kept_count += 1
|
|
print(f" ✅ Kept: {keep_file.name}")
|
|
|
|
return deleted_count, kept_count
|
|
|
|
def cleanup_all():
|
|
"""Cleanup entire slike directory"""
|
|
print("🧹 DUPLICATE CLEANUP SCRIPT")
|
|
print("="*60)
|
|
print("Strategy: Keep MOJE_SLIKE_KONCNA (green screen versions)")
|
|
print(" Delete all backups and duplicates")
|
|
print("="*60)
|
|
|
|
total_deleted = 0
|
|
total_kept = 0
|
|
|
|
# Process all subdirectories
|
|
for subdir in ["items", "predmeti", "liki", "biomi", "teren", "okolje", "dekoracije"]:
|
|
dir_path = BASE_DIR / subdir
|
|
if dir_path.exists():
|
|
deleted, kept = cleanup_directory(dir_path)
|
|
total_deleted += deleted
|
|
total_kept += kept
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"✅ Total kept: {total_kept}")
|
|
print(f"❌ Total deleted: {total_deleted}")
|
|
print(f"💾 Space saved: ~{total_deleted * 300 // 1024} MB (estimated)")
|
|
print(f"{'='*60}\n")
|
|
|
|
if __name__ == "__main__":
|
|
cleanup_all()
|
|
print("✨ Done!")
|