79 lines
2.5 KiB
Python
79 lines
2.5 KiB
Python
import os
|
|
import hashlib
|
|
import sys
|
|
|
|
# Define target directories in clean structure
|
|
TARGET_DIRS = [
|
|
"assets/slike/animals",
|
|
"assets/slike/characters",
|
|
"assets/slike/environment",
|
|
"assets/slike/ui",
|
|
"assets/slike/items",
|
|
"assets/slike/biomes",
|
|
"assets/slike/intro"
|
|
]
|
|
|
|
def calculate_md5(filepath):
|
|
"""Calculates MD5 hash of a file."""
|
|
hash_md5 = hashlib.md5()
|
|
try:
|
|
with open(filepath, "rb") as f:
|
|
for chunk in iter(lambda: f.read(4096), b""):
|
|
hash_md5.update(chunk)
|
|
return hash_md5.hexdigest()
|
|
except Exception as e:
|
|
print(f"Error reading {filepath}: {e}")
|
|
return None
|
|
|
|
def find_duplicates():
|
|
print("🕵️♂️ STARTING DUPLICATE DETECTIVE...")
|
|
print(" Scanning folders for identical images...")
|
|
|
|
seen_hashes = {} # {md5_hash: [list_of_filepaths]}
|
|
total_files = 0
|
|
|
|
# 1. SCAN ALL FILES
|
|
root_base = os.getcwd()
|
|
|
|
for relative_dir in TARGET_DIRS:
|
|
abs_dir = os.path.join(root_base, relative_dir)
|
|
if not os.path.exists(abs_dir):
|
|
continue
|
|
|
|
for root, dirs, files in os.walk(abs_dir):
|
|
for file in files:
|
|
if file.startswith(".") or not file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
|
|
continue
|
|
|
|
filepath = os.path.join(root, file)
|
|
total_files += 1
|
|
|
|
file_hash = calculate_md5(filepath)
|
|
if file_hash:
|
|
if file_hash in seen_hashes:
|
|
seen_hashes[file_hash].append(filepath)
|
|
else:
|
|
seen_hashes[file_hash] = [filepath]
|
|
|
|
# 2. REPORT DUPLICATES
|
|
duplicates_found = 0
|
|
print(f"\n✅ Scanned {total_files} files.")
|
|
|
|
for file_hash, paths in seen_hashes.items():
|
|
if len(paths) > 1:
|
|
duplicates_found += 1
|
|
print(f"\n⚠️ DUPLICATE GROUP FOUND ({len(paths)} copies):")
|
|
for p in paths:
|
|
# Print relative path for cleaner output
|
|
rel_p = os.path.relpath(p, root_base)
|
|
print(f" sc -> {rel_p}")
|
|
|
|
if duplicates_found == 0:
|
|
print("\n✨ Great! No identical duplicates found in scanned folders.")
|
|
else:
|
|
print(f"\n🚨 WARNING: Found {duplicates_found} groups of duplicate images!")
|
|
print(" Consider deleting the extras to save space and avoid confusion.")
|
|
|
|
if __name__ == "__main__":
|
|
find_duplicates()
|