import os import hashlib import sys # Define target directories in clean structure TARGET_DIRS = [ "assets/slike/animals", "assets/slike/characters", "assets/slike/environment", "assets/slike/ui", "assets/slike/items", "assets/slike/biomes", "assets/slike/intro" ] def calculate_md5(filepath): """Calculates MD5 hash of a file.""" hash_md5 = hashlib.md5() try: with open(filepath, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() except Exception as e: print(f"Error reading {filepath}: {e}") return None def find_duplicates(): print("🕵️‍♂️ STARTING DUPLICATE DETECTIVE...") print(" Scanning folders for identical images...") seen_hashes = {} # {md5_hash: [list_of_filepaths]} total_files = 0 # 1. SCAN ALL FILES root_base = os.getcwd() for relative_dir in TARGET_DIRS: abs_dir = os.path.join(root_base, relative_dir) if not os.path.exists(abs_dir): continue for root, dirs, files in os.walk(abs_dir): for file in files: if file.startswith(".") or not file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')): continue filepath = os.path.join(root, file) total_files += 1 file_hash = calculate_md5(filepath) if file_hash: if file_hash in seen_hashes: seen_hashes[file_hash].append(filepath) else: seen_hashes[file_hash] = [filepath] # 2. REPORT DUPLICATES duplicates_found = 0 print(f"\n✅ Scanned {total_files} files.") for file_hash, paths in seen_hashes.items(): if len(paths) > 1: duplicates_found += 1 print(f"\n⚠️ DUPLICATE GROUP FOUND ({len(paths)} copies):") for p in paths: # Print relative path for cleaner output rel_p = os.path.relpath(p, root_base) print(f" sc -> {rel_p}") if duplicates_found == 0: print("\n✨ Great! No identical duplicates found in scanned folders.") else: print(f"\n🚨 WARNING: Found {duplicates_found} groups of duplicate images!") print(" Consider deleting the extras to save space and avoid confusion.") if __name__ == "__main__": find_duplicates()