66 lines
2.0 KiB
Python
66 lines
2.0 KiB
Python
import os
|
|
import hashlib
|
|
import shutil
|
|
|
|
TARGET_DIR = os.path.abspath("assets/slike/glavna_referenca")
|
|
TRASH_DIR = os.path.abspath("_TRASH_BIN/duplicates_referenca")
|
|
|
|
def get_file_hash(filepath):
|
|
"""Calculates MD5 hash of file content."""
|
|
hasher = hashlib.md5()
|
|
try:
|
|
with open(filepath, 'rb') as f:
|
|
buf = f.read(65536)
|
|
while len(buf) > 0:
|
|
hasher.update(buf)
|
|
buf = f.read(65536)
|
|
return hasher.hexdigest()
|
|
except:
|
|
return None
|
|
|
|
def remove_duplicates():
|
|
if not os.path.exists(TARGET_DIR):
|
|
print("❌ Target directory not found.")
|
|
return
|
|
|
|
if not os.path.exists(TRASH_DIR):
|
|
os.makedirs(TRASH_DIR)
|
|
|
|
print(f"🔍 Scanning for exact duplicates in {TARGET_DIR}...")
|
|
|
|
unique_hashes = {} # hash -> filepath
|
|
duplicates = 0
|
|
scanned = 0
|
|
|
|
files = [f for f in os.listdir(TARGET_DIR) if os.path.isfile(os.path.join(TARGET_DIR, f)) and not f.startswith(".")]
|
|
total_files = len(files)
|
|
|
|
for filename in files:
|
|
filepath = os.path.join(TARGET_DIR, filename)
|
|
file_hash = get_file_hash(filepath)
|
|
|
|
if file_hash:
|
|
if file_hash in unique_hashes:
|
|
# Duplicate found!
|
|
original = unique_hashes[file_hash]
|
|
|
|
# Move to trash
|
|
shutil.move(filepath, os.path.join(TRASH_DIR, filename))
|
|
duplicates += 1
|
|
|
|
# Optional: Print info
|
|
# print(f"Duplicate: {filename} == {os.path.basename(original)}")
|
|
else:
|
|
# New unique file
|
|
unique_hashes[file_hash] = filepath
|
|
|
|
scanned += 1
|
|
if scanned % 500 == 0:
|
|
print(f" Scanned {scanned}/{total_files} files...")
|
|
|
|
print(f"✨ DONE! Found and moved {duplicates} duplicates to {TRASH_DIR}")
|
|
print(f"✅ Unique files remaining: {len(unique_hashes)}")
|
|
|
|
if __name__ == "__main__":
|
|
remove_duplicates()
|