ok
This commit is contained in:
121
scripts/dedupe_keep_green.py
Normal file
121
scripts/dedupe_keep_green.py
Normal file
@@ -0,0 +1,121 @@
|
||||
|
||||
import os
|
||||
import re
|
||||
from PIL import Image
|
||||
|
||||
TARGET = "assets/slike/glavna_referenca"
|
||||
|
||||
# Regex to strip prefixes/suffixes to find "Core Name"
|
||||
# e.g. "assets_BACKUP_2026_kai_walk_01_1.png" -> "kai_walk_01"
|
||||
def get_core_name(filename):
|
||||
name = filename.lower()
|
||||
# Remove extension
|
||||
name = os.path.splitext(name)[0]
|
||||
|
||||
# Remove common prefixes
|
||||
name = re.sub(r"assets_backup_\d+_\d+_", "", name)
|
||||
name = re.sub(r"moje_slike_koncna_", "", name)
|
||||
name = re.sub(r"src_assets_library_", "", name)
|
||||
name = re.sub(r"assets__backup_[a-z0-9_]+_", "", name)
|
||||
|
||||
# Remove trailing counters like _1, _2, _v2
|
||||
name = re.sub(r"_\d+$", "", name)
|
||||
name = re.sub(r"_v\d+$", "", name)
|
||||
|
||||
# Remove "copy", "resized"
|
||||
name = name.replace("_copy", "").replace("_resized", "")
|
||||
|
||||
# Clean up leading/trailing underscores
|
||||
name = name.strip("_")
|
||||
|
||||
return name
|
||||
|
||||
def is_green_screen(filepath):
|
||||
try:
|
||||
with Image.open(filepath) as img:
|
||||
rgb = img.convert("RGB")
|
||||
# Check top-left corner
|
||||
pixel = rgb.getpixel((0, 0))
|
||||
# Strict Chroma Green is (0, 255, 0), but let's be tolerant
|
||||
# Check if Green is dominant and Red/Blue are low
|
||||
r, g, b = pixel
|
||||
if g > 150 and r < 50 and b < 50:
|
||||
return True
|
||||
# Check top-right just in case
|
||||
pixel = rgb.getpixel((img.width - 1, 0))
|
||||
r, g, b = pixel
|
||||
if g > 150 and r < 50 and b < 50:
|
||||
return True
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
def dedupe():
|
||||
print(f"🚀 DEDUPING {TARGET} (Keep Green Preference)...")
|
||||
|
||||
if not os.path.exists(TARGET):
|
||||
print("❌ Target not found.")
|
||||
return
|
||||
|
||||
# Group files by Core Name
|
||||
groups = {}
|
||||
all_files = [f for f in os.listdir(TARGET) if f.lower().endswith(('.png', '.jpg'))]
|
||||
|
||||
print(f"Scanning {len(all_files)} files...")
|
||||
|
||||
for f in all_files:
|
||||
core = get_core_name(f)
|
||||
if core not in groups:
|
||||
groups[core] = []
|
||||
groups[core].append(f)
|
||||
|
||||
deleted_count = 0
|
||||
|
||||
for core, files in groups.items():
|
||||
if len(files) < 2:
|
||||
continue
|
||||
|
||||
# Analyze the group
|
||||
candidates = []
|
||||
for f in files:
|
||||
full_path = os.path.join(TARGET, f)
|
||||
is_green = is_green_screen(full_path)
|
||||
candidates.append({"name": f, "green": is_green, "path": full_path})
|
||||
|
||||
# Logic:
|
||||
# 1. Look for Green Screen versions
|
||||
greens = [c for c in candidates if c["green"]]
|
||||
others = [c for c in candidates if not c["green"]]
|
||||
|
||||
keep = None
|
||||
remove = []
|
||||
|
||||
if greens:
|
||||
# If we have green ones, KEEP the best green one (shortest name)
|
||||
# Sort by name length
|
||||
greens.sort(key=lambda x: len(x["name"]))
|
||||
keep = greens[0]
|
||||
remove = greens[1:] + others # Remove other greens and ALL non-greens
|
||||
else:
|
||||
# No green ones. Keep shortest name of others.
|
||||
others.sort(key=lambda x: len(x["name"]))
|
||||
keep = others[0]
|
||||
remove = others[1:]
|
||||
|
||||
# Execute Delete
|
||||
for item in remove:
|
||||
# print(f"🗑️ Deleting duplicate: {item['name']} (Kept: {keep['name']})")
|
||||
try:
|
||||
os.remove(item["path"])
|
||||
deleted_count += 1
|
||||
except Exception as e:
|
||||
print(f"Error removing {item['name']}: {e}")
|
||||
|
||||
print("="*40)
|
||||
print(f"✅ DEDUPE COMPLETE.")
|
||||
print(f"Deleted {deleted_count} duplicates.")
|
||||
print(f"Remaining: {len(all_files) - deleted_count}")
|
||||
print("="*40)
|
||||
|
||||
if __name__ == "__main__":
|
||||
dedupe()
|
||||
Reference in New Issue
Block a user