import os import re from PIL import Image TARGET_ROOT = "assets/slike" # Regex to strip prefixes/suffixes to find "Core Name" def get_core_name(filename): name = filename.lower() name = os.path.splitext(name)[0] name = re.sub(r"assets_backup_\d+_\d+_", "", name) name = re.sub(r"moje_slike_koncna_", "", name) name = re.sub(r"src_assets_library_", "", name) name = re.sub(r"assets__backup_[a-z0-9_]+_", "", name) name = re.sub(r"_\d+$", "", name) name = re.sub(r"_v\d+$", "", name) name = name.replace("_copy", "").replace("_resized", "") name = name.strip("_") return name def is_green_screen(filepath): try: with Image.open(filepath) as img: rgb = img.convert("RGB") # Tolerate imperfect green p1 = rgb.getpixel((0, 0)) if p1[1] > 150 and p1[0] < 80 and p1[2] < 80: return True p2 = rgb.getpixel((0, img.height//2)) if p2[1] > 150 and p2[0] < 80 and p2[2] < 80: return True return False except: return False def dedupe_recursive(): print(f"🚀 GLOBAL DEDUPE in {TARGET_ROOT} (Recursive)...") # gather all files all_files_list = [] for root, dirs, files in os.walk(TARGET_ROOT): for f in files: if f.lower().endswith(('.png', '.jpg')): all_files_list.append(os.path.join(root, f)) print(f"Scanning {len(all_files_list)} files...") groups = {} for path in all_files_list: fname = os.path.basename(path) core = get_core_name(fname) if core not in groups: groups[core] = [] groups[core].append(path) deleted = 0 for core, paths in groups.items(): if len(paths) < 2: continue # We have duplicates (by name conceptual) # Check green candidates = [] for p in paths: is_g = is_green_screen(p) candidates.append({"path": p, "green": is_g, "name": os.path.basename(p)}) greens = [c for c in candidates if c["green"]] others = [c for c in candidates if not c["green"]] keep = None remove = [] if greens: # Prefer green. Use shortest name among greens. greens.sort(key=lambda x: len(x["name"])) keep = greens[0] remove = greens[1:] + others else: # No greens. Keep shortest name unique. others.sort(key=lambda x: len(x["name"])) keep = others[0] remove = others[1:] # Delete for item in remove: try: os.remove(item["path"]) deleted += 1 # print(f"Deleted {item['name']}") except: pass print("="*40) print(f"✅ GLOBAL DEDUPE COMPLETE.") print(f"Deleted {deleted} duplicates.") print("="*40) if __name__ == "__main__": dedupe_recursive()