Files
novafarma/scripts/cleanup_duplicates.py
David Kotnik e7759433a2 🧹 Cleanup Duplicates - Removed 224 backup/duplicate files
- Kept only green screen processed versions (MOJE_SLIKE_KONCNA)
- Deleted all BACKUP, src_assets_library, and duplicate versions
- Saved ~65 MB of space
- Added cleanup_duplicates.py script for future use
2026-01-20 01:36:58 +01:00

168 lines
4.8 KiB
Python

#!/usr/bin/env python3
"""
🧹 CLEANUP DUPLICATES SCRIPT
Keep only GREEN/PROCESSED versions, remove all backups and duplicates
"""
import os
import hashlib
from pathlib import Path
from collections import defaultdict
# Base directory
BASE_DIR = Path("/Users/davidkotnik/repos/novafarma/assets/slike")
# Patterns to DELETE (backup and temporary files)
DELETE_PATTERNS = [
"assets_BACKUP_",
"assets__backup_before_greenscreen_",
"src_assets_library_",
"_BACKUP_",
"_ORIGINAL",
"_ALPHA",
"_CLEAN",
"_PROCESSED",
"_AI",
]
# Patterns to KEEP (prefer these)
KEEP_PATTERNS = [
"MOJE_SLIKE_KONCNA_",
"assets_PHASE_PACKS_",
"assets_sprites_",
"assets_crops_",
]
def should_delete(filename):
"""Check if file should be deleted based on patterns"""
for pattern in DELETE_PATTERNS:
if pattern in filename:
return True
return False
def get_priority(filename):
"""Get priority score (higher = keep)"""
# Prefer MOJE_SLIKE_KONCNA (final processed versions)
if "MOJE_SLIKE_KONCNA_" in filename:
return 100
# Then PHASE_PACKS (organized versions)
if "assets_PHASE_PACKS_" in filename:
return 50
# Then sprites
if "assets_sprites_" in filename:
return 40
# Then crops
if "assets_crops_" in filename:
return 30
# Simple names (no prefix) are good
if not filename.startswith(("assets_", "src_", "MOJE_")):
return 20
# Everything else
return 0
def get_base_name(filename):
"""Extract base name without prefixes"""
# Remove common prefixes
name = filename
for prefix in ["MOJE_SLIKE_KONCNA_", "assets_BACKUP_20260112_064319_",
"assets_PHASE_PACKS_", "assets__backup_before_greenscreen_",
"assets_sprites_", "assets_crops_", "src_assets_library_godot_"]:
if name.startswith(prefix):
name = name[len(prefix):]
# Remove numbers at end (timestamps)
import re
name = re.sub(r'_\d{13,}', '', name)
return name
def cleanup_directory(directory):
"""Cleanup duplicates in a directory"""
if not directory.exists():
return
print(f"\n🔍 Scanning: {directory.relative_to(BASE_DIR)}")
# Group files by base name
groups = defaultdict(list)
for file_path in directory.rglob("*.png"):
if file_path.is_file():
base_name = get_base_name(file_path.name)
groups[base_name].append(file_path)
deleted_count = 0
kept_count = 0
# Process each group
for base_name, files in groups.items():
if len(files) <= 1:
kept_count += 1
continue
# Sort by priority (highest first)
files_sorted = sorted(files, key=lambda f: get_priority(f.name), reverse=True)
# Keep the highest priority file
keep_file = files_sorted[0]
# Delete the rest
for file_path in files_sorted[1:]:
# Also delete if it matches delete patterns
if should_delete(file_path.name):
try:
file_path.unlink()
deleted_count += 1
print(f" ❌ Deleted: {file_path.name}")
except Exception as e:
print(f" ⚠️ Error deleting {file_path.name}: {e}")
else:
# Check if it's truly a duplicate
if get_priority(file_path.name) < get_priority(keep_file.name):
try:
file_path.unlink()
deleted_count += 1
print(f" ❌ Deleted duplicate: {file_path.name}")
except Exception as e:
print(f" ⚠️ Error: {e}")
kept_count += 1
print(f" ✅ Kept: {keep_file.name}")
return deleted_count, kept_count
def cleanup_all():
"""Cleanup entire slike directory"""
print("🧹 DUPLICATE CLEANUP SCRIPT")
print("="*60)
print("Strategy: Keep MOJE_SLIKE_KONCNA (green screen versions)")
print(" Delete all backups and duplicates")
print("="*60)
total_deleted = 0
total_kept = 0
# Process all subdirectories
for subdir in ["items", "predmeti", "liki", "biomi", "teren", "okolje", "dekoracije"]:
dir_path = BASE_DIR / subdir
if dir_path.exists():
deleted, kept = cleanup_directory(dir_path)
total_deleted += deleted
total_kept += kept
print(f"\n{'='*60}")
print(f"✅ Total kept: {total_kept}")
print(f"❌ Total deleted: {total_deleted}")
print(f"💾 Space saved: ~{total_deleted * 300 // 1024} MB (estimated)")
print(f"{'='*60}\n")
if __name__ == "__main__":
cleanup_all()
print("✨ Done!")