This commit is contained in:
2026-01-20 17:22:42 +01:00
parent cce5ed9791
commit ae1cf2e9bf
4777 changed files with 280 additions and 0 deletions

View File

@@ -0,0 +1,65 @@
import os
import hashlib
import shutil
TARGET_DIR = os.path.abspath("assets/slike/glavna_referenca")
TRASH_DIR = os.path.abspath("_TRASH_BIN/duplicates_referenca")
def get_file_hash(filepath):
"""Calculates MD5 hash of file content."""
hasher = hashlib.md5()
try:
with open(filepath, 'rb') as f:
buf = f.read(65536)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(65536)
return hasher.hexdigest()
except:
return None
def remove_duplicates():
if not os.path.exists(TARGET_DIR):
print("❌ Target directory not found.")
return
if not os.path.exists(TRASH_DIR):
os.makedirs(TRASH_DIR)
print(f"🔍 Scanning for exact duplicates in {TARGET_DIR}...")
unique_hashes = {} # hash -> filepath
duplicates = 0
scanned = 0
files = [f for f in os.listdir(TARGET_DIR) if os.path.isfile(os.path.join(TARGET_DIR, f)) and not f.startswith(".")]
total_files = len(files)
for filename in files:
filepath = os.path.join(TARGET_DIR, filename)
file_hash = get_file_hash(filepath)
if file_hash:
if file_hash in unique_hashes:
# Duplicate found!
original = unique_hashes[file_hash]
# Move to trash
shutil.move(filepath, os.path.join(TRASH_DIR, filename))
duplicates += 1
# Optional: Print info
# print(f"Duplicate: {filename} == {os.path.basename(original)}")
else:
# New unique file
unique_hashes[file_hash] = filepath
scanned += 1
if scanned % 500 == 0:
print(f" Scanned {scanned}/{total_files} files...")
print(f"✨ DONE! Found and moved {duplicates} duplicates to {TRASH_DIR}")
print(f"✅ Unique files remaining: {len(unique_hashes)}")
if __name__ == "__main__":
remove_duplicates()