🧹 Cleanup Duplicates - Removed 224 backup/duplicate files
- Kept only green screen processed versions (MOJE_SLIKE_KONCNA) - Deleted all BACKUP, src_assets_library, and duplicate versions - Saved ~65 MB of space - Added cleanup_duplicates.py script for future use
This commit is contained in:
167
scripts/cleanup_duplicates.py
Normal file
167
scripts/cleanup_duplicates.py
Normal file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
🧹 CLEANUP DUPLICATES SCRIPT
|
||||
Keep only GREEN/PROCESSED versions, remove all backups and duplicates
|
||||
"""
|
||||
|
||||
import os
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
# Base directory
|
||||
BASE_DIR = Path("/Users/davidkotnik/repos/novafarma/assets/slike")
|
||||
|
||||
# Patterns to DELETE (backup and temporary files)
|
||||
DELETE_PATTERNS = [
|
||||
"assets_BACKUP_",
|
||||
"assets__backup_before_greenscreen_",
|
||||
"src_assets_library_",
|
||||
"_BACKUP_",
|
||||
"_ORIGINAL",
|
||||
"_ALPHA",
|
||||
"_CLEAN",
|
||||
"_PROCESSED",
|
||||
"_AI",
|
||||
]
|
||||
|
||||
# Patterns to KEEP (prefer these)
|
||||
KEEP_PATTERNS = [
|
||||
"MOJE_SLIKE_KONCNA_",
|
||||
"assets_PHASE_PACKS_",
|
||||
"assets_sprites_",
|
||||
"assets_crops_",
|
||||
]
|
||||
|
||||
def should_delete(filename):
|
||||
"""Check if file should be deleted based on patterns"""
|
||||
for pattern in DELETE_PATTERNS:
|
||||
if pattern in filename:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_priority(filename):
|
||||
"""Get priority score (higher = keep)"""
|
||||
# Prefer MOJE_SLIKE_KONCNA (final processed versions)
|
||||
if "MOJE_SLIKE_KONCNA_" in filename:
|
||||
return 100
|
||||
|
||||
# Then PHASE_PACKS (organized versions)
|
||||
if "assets_PHASE_PACKS_" in filename:
|
||||
return 50
|
||||
|
||||
# Then sprites
|
||||
if "assets_sprites_" in filename:
|
||||
return 40
|
||||
|
||||
# Then crops
|
||||
if "assets_crops_" in filename:
|
||||
return 30
|
||||
|
||||
# Simple names (no prefix) are good
|
||||
if not filename.startswith(("assets_", "src_", "MOJE_")):
|
||||
return 20
|
||||
|
||||
# Everything else
|
||||
return 0
|
||||
|
||||
def get_base_name(filename):
|
||||
"""Extract base name without prefixes"""
|
||||
# Remove common prefixes
|
||||
name = filename
|
||||
|
||||
for prefix in ["MOJE_SLIKE_KONCNA_", "assets_BACKUP_20260112_064319_",
|
||||
"assets_PHASE_PACKS_", "assets__backup_before_greenscreen_",
|
||||
"assets_sprites_", "assets_crops_", "src_assets_library_godot_"]:
|
||||
if name.startswith(prefix):
|
||||
name = name[len(prefix):]
|
||||
|
||||
# Remove numbers at end (timestamps)
|
||||
import re
|
||||
name = re.sub(r'_\d{13,}', '', name)
|
||||
|
||||
return name
|
||||
|
||||
def cleanup_directory(directory):
|
||||
"""Cleanup duplicates in a directory"""
|
||||
if not directory.exists():
|
||||
return
|
||||
|
||||
print(f"\n🔍 Scanning: {directory.relative_to(BASE_DIR)}")
|
||||
|
||||
# Group files by base name
|
||||
groups = defaultdict(list)
|
||||
|
||||
for file_path in directory.rglob("*.png"):
|
||||
if file_path.is_file():
|
||||
base_name = get_base_name(file_path.name)
|
||||
groups[base_name].append(file_path)
|
||||
|
||||
deleted_count = 0
|
||||
kept_count = 0
|
||||
|
||||
# Process each group
|
||||
for base_name, files in groups.items():
|
||||
if len(files) <= 1:
|
||||
kept_count += 1
|
||||
continue
|
||||
|
||||
# Sort by priority (highest first)
|
||||
files_sorted = sorted(files, key=lambda f: get_priority(f.name), reverse=True)
|
||||
|
||||
# Keep the highest priority file
|
||||
keep_file = files_sorted[0]
|
||||
|
||||
# Delete the rest
|
||||
for file_path in files_sorted[1:]:
|
||||
# Also delete if it matches delete patterns
|
||||
if should_delete(file_path.name):
|
||||
try:
|
||||
file_path.unlink()
|
||||
deleted_count += 1
|
||||
print(f" ❌ Deleted: {file_path.name}")
|
||||
except Exception as e:
|
||||
print(f" ⚠️ Error deleting {file_path.name}: {e}")
|
||||
else:
|
||||
# Check if it's truly a duplicate
|
||||
if get_priority(file_path.name) < get_priority(keep_file.name):
|
||||
try:
|
||||
file_path.unlink()
|
||||
deleted_count += 1
|
||||
print(f" ❌ Deleted duplicate: {file_path.name}")
|
||||
except Exception as e:
|
||||
print(f" ⚠️ Error: {e}")
|
||||
|
||||
kept_count += 1
|
||||
print(f" ✅ Kept: {keep_file.name}")
|
||||
|
||||
return deleted_count, kept_count
|
||||
|
||||
def cleanup_all():
|
||||
"""Cleanup entire slike directory"""
|
||||
print("🧹 DUPLICATE CLEANUP SCRIPT")
|
||||
print("="*60)
|
||||
print("Strategy: Keep MOJE_SLIKE_KONCNA (green screen versions)")
|
||||
print(" Delete all backups and duplicates")
|
||||
print("="*60)
|
||||
|
||||
total_deleted = 0
|
||||
total_kept = 0
|
||||
|
||||
# Process all subdirectories
|
||||
for subdir in ["items", "predmeti", "liki", "biomi", "teren", "okolje", "dekoracije"]:
|
||||
dir_path = BASE_DIR / subdir
|
||||
if dir_path.exists():
|
||||
deleted, kept = cleanup_directory(dir_path)
|
||||
total_deleted += deleted
|
||||
total_kept += kept
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"✅ Total kept: {total_kept}")
|
||||
print(f"❌ Total deleted: {total_deleted}")
|
||||
print(f"💾 Space saved: ~{total_deleted * 300 // 1024} MB (estimated)")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
cleanup_all()
|
||||
print("✨ Done!")
|
||||
Reference in New Issue
Block a user