🧹 Massive Asset Reorganization: Cleaned filenames, removed duplicates, and sorted into UI/Items/Env/Chars/Animals folders.
This commit is contained in:
73
scripts/fuzzy_dedupe_npc.py
Normal file
73
scripts/fuzzy_dedupe_npc.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import os
|
||||
import difflib
|
||||
|
||||
# Target directory
|
||||
NPC_DIR = os.path.abspath("assets/slike/glavna_referenca/_FOLDER_NPC")
|
||||
|
||||
def find_fuzzy_duplicates():
|
||||
print(f"🕵️♂️ Analyzing similar files in {os.path.basename(NPC_DIR)}...")
|
||||
|
||||
files = {} # filename -> filepath
|
||||
for f in os.listdir(NPC_DIR):
|
||||
if f.startswith("."): continue
|
||||
files[f] = os.path.join(NPC_DIR, f)
|
||||
|
||||
filenames = list(files.keys())
|
||||
files_to_remove = set()
|
||||
|
||||
# Compare names
|
||||
count = 0
|
||||
total = len(filenames)
|
||||
# Simple check: Name containment (e.g. "gronk.png" vs "gronk_01.png")
|
||||
# sorted by length so we check shorter names against longer ones
|
||||
sorted_names = sorted(filenames, key=len)
|
||||
|
||||
for i in range(len(sorted_names)):
|
||||
name1 = sorted_names[i]
|
||||
stem1, ext1 = os.path.splitext(name1)
|
||||
path1 = files[name1]
|
||||
|
||||
# Skip if already marked
|
||||
if path1 in files_to_remove: continue
|
||||
|
||||
for j in range(i + 1, len(sorted_names)):
|
||||
name2 = sorted_names[j]
|
||||
stem2, ext2 = os.path.splitext(name2)
|
||||
path2 = files[name2]
|
||||
|
||||
if path2 in files_to_remove: continue
|
||||
|
||||
# RULE 1: If stem1 is contained in stem2 (e.g. "image" in "image_copy")
|
||||
# AND extensions match or are compatible image types
|
||||
if stem1 in stem2:
|
||||
# Check similarity ratio to avoid false positives like "man" in "woman"
|
||||
if difflib.SequenceMatcher(None, stem1, stem2).ratio() > 0.8 or stem2.startswith(stem1):
|
||||
# Potential duplicate!
|
||||
# Strategy: Keep the one with clearer name or larger size?
|
||||
# Let's keep LARGER file usually (better quality)
|
||||
size1 = os.path.getsize(path1)
|
||||
size2 = os.path.getsize(path2)
|
||||
|
||||
print(f" ⚠️ Potential dupe: '{name1}' ({size1}b) vs '{name2}' ({size2}b)")
|
||||
|
||||
# REMOVE THE SMALLER ONE
|
||||
if size1 >= size2:
|
||||
print(f" 🗑️ Deleting smaller/same: {name2}")
|
||||
files_to_remove.add(path2)
|
||||
else:
|
||||
print(f" 🗑️ Deleting smaller: {name1}")
|
||||
files_to_remove.add(path1)
|
||||
break # name1 is gone, stop checking it
|
||||
|
||||
# DO THE DELETION
|
||||
print(f"\n🗑️ Deleting {len(files_to_remove)} files...")
|
||||
for p in files_to_remove:
|
||||
try:
|
||||
os.remove(p)
|
||||
except OSError as e:
|
||||
print(f"Error removing {p}: {e}")
|
||||
|
||||
print("✨ Fuzzy cleanup done.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
find_fuzzy_duplicates()
|
||||
Reference in New Issue
Block a user