import os import hashlib import shutil TARGET_DIR = os.path.abspath("assets/slike/glavna_referenca") TRASH_DIR = os.path.abspath("_TRASH_BIN/duplicates_referenca") def get_file_hash(filepath): """Calculates MD5 hash of file content.""" hasher = hashlib.md5() try: with open(filepath, 'rb') as f: buf = f.read(65536) while len(buf) > 0: hasher.update(buf) buf = f.read(65536) return hasher.hexdigest() except: return None def remove_duplicates(): if not os.path.exists(TARGET_DIR): print("❌ Target directory not found.") return if not os.path.exists(TRASH_DIR): os.makedirs(TRASH_DIR) print(f"🔍 Scanning for exact duplicates in {TARGET_DIR}...") unique_hashes = {} # hash -> filepath duplicates = 0 scanned = 0 files = [f for f in os.listdir(TARGET_DIR) if os.path.isfile(os.path.join(TARGET_DIR, f)) and not f.startswith(".")] total_files = len(files) for filename in files: filepath = os.path.join(TARGET_DIR, filename) file_hash = get_file_hash(filepath) if file_hash: if file_hash in unique_hashes: # Duplicate found! original = unique_hashes[file_hash] # Move to trash shutil.move(filepath, os.path.join(TRASH_DIR, filename)) duplicates += 1 # Optional: Print info # print(f"Duplicate: {filename} == {os.path.basename(original)}") else: # New unique file unique_hashes[file_hash] = filepath scanned += 1 if scanned % 500 == 0: print(f" Scanned {scanned}/{total_files} files...") print(f"✨ DONE! Found and moved {duplicates} duplicates to {TRASH_DIR}") print(f"✅ Unique files remaining: {len(unique_hashes)}") if __name__ == "__main__": remove_duplicates()