74 lines
2.7 KiB
Python
74 lines
2.7 KiB
Python
import os
|
|
import difflib
|
|
|
|
# Target directory
|
|
NPC_DIR = os.path.abspath("assets/slike/glavna_referenca/_FOLDER_NPC")
|
|
|
|
def find_fuzzy_duplicates():
|
|
print(f"🕵️♂️ Analyzing similar files in {os.path.basename(NPC_DIR)}...")
|
|
|
|
files = {} # filename -> filepath
|
|
for f in os.listdir(NPC_DIR):
|
|
if f.startswith("."): continue
|
|
files[f] = os.path.join(NPC_DIR, f)
|
|
|
|
filenames = list(files.keys())
|
|
files_to_remove = set()
|
|
|
|
# Compare names
|
|
count = 0
|
|
total = len(filenames)
|
|
# Simple check: Name containment (e.g. "gronk.png" vs "gronk_01.png")
|
|
# sorted by length so we check shorter names against longer ones
|
|
sorted_names = sorted(filenames, key=len)
|
|
|
|
for i in range(len(sorted_names)):
|
|
name1 = sorted_names[i]
|
|
stem1, ext1 = os.path.splitext(name1)
|
|
path1 = files[name1]
|
|
|
|
# Skip if already marked
|
|
if path1 in files_to_remove: continue
|
|
|
|
for j in range(i + 1, len(sorted_names)):
|
|
name2 = sorted_names[j]
|
|
stem2, ext2 = os.path.splitext(name2)
|
|
path2 = files[name2]
|
|
|
|
if path2 in files_to_remove: continue
|
|
|
|
# RULE 1: If stem1 is contained in stem2 (e.g. "image" in "image_copy")
|
|
# AND extensions match or are compatible image types
|
|
if stem1 in stem2:
|
|
# Check similarity ratio to avoid false positives like "man" in "woman"
|
|
if difflib.SequenceMatcher(None, stem1, stem2).ratio() > 0.8 or stem2.startswith(stem1):
|
|
# Potential duplicate!
|
|
# Strategy: Keep the one with clearer name or larger size?
|
|
# Let's keep LARGER file usually (better quality)
|
|
size1 = os.path.getsize(path1)
|
|
size2 = os.path.getsize(path2)
|
|
|
|
print(f" ⚠️ Potential dupe: '{name1}' ({size1}b) vs '{name2}' ({size2}b)")
|
|
|
|
# REMOVE THE SMALLER ONE
|
|
if size1 >= size2:
|
|
print(f" 🗑️ Deleting smaller/same: {name2}")
|
|
files_to_remove.add(path2)
|
|
else:
|
|
print(f" 🗑️ Deleting smaller: {name1}")
|
|
files_to_remove.add(path1)
|
|
break # name1 is gone, stop checking it
|
|
|
|
# DO THE DELETION
|
|
print(f"\n🗑️ Deleting {len(files_to_remove)} files...")
|
|
for p in files_to_remove:
|
|
try:
|
|
os.remove(p)
|
|
except OSError as e:
|
|
print(f"Error removing {p}: {e}")
|
|
|
|
print("✨ Fuzzy cleanup done.")
|
|
|
|
if __name__ == "__main__":
|
|
find_fuzzy_duplicates()
|