ok
This commit is contained in:
100
scripts/dedupe_global.py
Normal file
100
scripts/dedupe_global.py
Normal file
@@ -0,0 +1,100 @@
|
||||
|
||||
import os
|
||||
import re
|
||||
from PIL import Image
|
||||
|
||||
TARGET_ROOT = "assets/slike"
|
||||
|
||||
# Regex to strip prefixes/suffixes to find "Core Name"
|
||||
def get_core_name(filename):
|
||||
name = filename.lower()
|
||||
name = os.path.splitext(name)[0]
|
||||
name = re.sub(r"assets_backup_\d+_\d+_", "", name)
|
||||
name = re.sub(r"moje_slike_koncna_", "", name)
|
||||
name = re.sub(r"src_assets_library_", "", name)
|
||||
name = re.sub(r"assets__backup_[a-z0-9_]+_", "", name)
|
||||
name = re.sub(r"_\d+$", "", name)
|
||||
name = re.sub(r"_v\d+$", "", name)
|
||||
name = name.replace("_copy", "").replace("_resized", "")
|
||||
name = name.strip("_")
|
||||
return name
|
||||
|
||||
def is_green_screen(filepath):
|
||||
try:
|
||||
with Image.open(filepath) as img:
|
||||
rgb = img.convert("RGB")
|
||||
# Tolerate imperfect green
|
||||
p1 = rgb.getpixel((0, 0))
|
||||
if p1[1] > 150 and p1[0] < 80 and p1[2] < 80: return True
|
||||
p2 = rgb.getpixel((0, img.height//2))
|
||||
if p2[1] > 150 and p2[0] < 80 and p2[2] < 80: return True
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
def dedupe_recursive():
|
||||
print(f"🚀 GLOBAL DEDUPE in {TARGET_ROOT} (Recursive)...")
|
||||
|
||||
# gather all files
|
||||
all_files_list = []
|
||||
for root, dirs, files in os.walk(TARGET_ROOT):
|
||||
for f in files:
|
||||
if f.lower().endswith(('.png', '.jpg')):
|
||||
all_files_list.append(os.path.join(root, f))
|
||||
|
||||
print(f"Scanning {len(all_files_list)} files...")
|
||||
|
||||
groups = {}
|
||||
for path in all_files_list:
|
||||
fname = os.path.basename(path)
|
||||
core = get_core_name(fname)
|
||||
if core not in groups:
|
||||
groups[core] = []
|
||||
groups[core].append(path)
|
||||
|
||||
deleted = 0
|
||||
|
||||
for core, paths in groups.items():
|
||||
if len(paths) < 2:
|
||||
continue
|
||||
|
||||
# We have duplicates (by name conceptual)
|
||||
# Check green
|
||||
candidates = []
|
||||
for p in paths:
|
||||
is_g = is_green_screen(p)
|
||||
candidates.append({"path": p, "green": is_g, "name": os.path.basename(p)})
|
||||
|
||||
greens = [c for c in candidates if c["green"]]
|
||||
others = [c for c in candidates if not c["green"]]
|
||||
|
||||
keep = None
|
||||
remove = []
|
||||
|
||||
if greens:
|
||||
# Prefer green. Use shortest name among greens.
|
||||
greens.sort(key=lambda x: len(x["name"]))
|
||||
keep = greens[0]
|
||||
remove = greens[1:] + others
|
||||
else:
|
||||
# No greens. Keep shortest name unique.
|
||||
others.sort(key=lambda x: len(x["name"]))
|
||||
keep = others[0]
|
||||
remove = others[1:]
|
||||
|
||||
# Delete
|
||||
for item in remove:
|
||||
try:
|
||||
os.remove(item["path"])
|
||||
deleted += 1
|
||||
# print(f"Deleted {item['name']}")
|
||||
except:
|
||||
pass
|
||||
|
||||
print("="*40)
|
||||
print(f"✅ GLOBAL DEDUPE COMPLETE.")
|
||||
print(f"Deleted {deleted} duplicates.")
|
||||
print("="*40)
|
||||
|
||||
if __name__ == "__main__":
|
||||
dedupe_recursive()
|
||||
Reference in New Issue
Block a user