import os import re from PIL import Image TARGET = "assets/slike/glavna_referenca" # Regex to strip prefixes/suffixes to find "Core Name" # e.g. "assets_BACKUP_2026_kai_walk_01_1.png" -> "kai_walk_01" def get_core_name(filename): name = filename.lower() # Remove extension name = os.path.splitext(name)[0] # Remove common prefixes name = re.sub(r"assets_backup_\d+_\d+_", "", name) name = re.sub(r"moje_slike_koncna_", "", name) name = re.sub(r"src_assets_library_", "", name) name = re.sub(r"assets__backup_[a-z0-9_]+_", "", name) # Remove trailing counters like _1, _2, _v2 name = re.sub(r"_\d+$", "", name) name = re.sub(r"_v\d+$", "", name) # Remove "copy", "resized" name = name.replace("_copy", "").replace("_resized", "") # Clean up leading/trailing underscores name = name.strip("_") return name def is_green_screen(filepath): try: with Image.open(filepath) as img: rgb = img.convert("RGB") # Check top-left corner pixel = rgb.getpixel((0, 0)) # Strict Chroma Green is (0, 255, 0), but let's be tolerant # Check if Green is dominant and Red/Blue are low r, g, b = pixel if g > 150 and r < 50 and b < 50: return True # Check top-right just in case pixel = rgb.getpixel((img.width - 1, 0)) r, g, b = pixel if g > 150 and r < 50 and b < 50: return True return False except: return False def dedupe(): print(f"🚀 DEDUPING {TARGET} (Keep Green Preference)...") if not os.path.exists(TARGET): print("❌ Target not found.") return # Group files by Core Name groups = {} all_files = [f for f in os.listdir(TARGET) if f.lower().endswith(('.png', '.jpg'))] print(f"Scanning {len(all_files)} files...") for f in all_files: core = get_core_name(f) if core not in groups: groups[core] = [] groups[core].append(f) deleted_count = 0 for core, files in groups.items(): if len(files) < 2: continue # Analyze the group candidates = [] for f in files: full_path = os.path.join(TARGET, f) is_green = is_green_screen(full_path) candidates.append({"name": f, "green": is_green, "path": full_path}) # Logic: # 1. Look for Green Screen versions greens = [c for c in candidates if c["green"]] others = [c for c in candidates if not c["green"]] keep = None remove = [] if greens: # If we have green ones, KEEP the best green one (shortest name) # Sort by name length greens.sort(key=lambda x: len(x["name"])) keep = greens[0] remove = greens[1:] + others # Remove other greens and ALL non-greens else: # No green ones. Keep shortest name of others. others.sort(key=lambda x: len(x["name"])) keep = others[0] remove = others[1:] # Execute Delete for item in remove: # print(f"🗑️ Deleting duplicate: {item['name']} (Kept: {keep['name']})") try: os.remove(item["path"]) deleted_count += 1 except Exception as e: print(f"Error removing {item['name']}: {e}") print("="*40) print(f"✅ DEDUPE COMPLETE.") print(f"Deleted {deleted_count} duplicates.") print(f"Remaining: {len(all_files) - deleted_count}") print("="*40) if __name__ == "__main__": dedupe()