mhm
This commit is contained in:
65
scripts/remove_exact_duplicates.py
Normal file
65
scripts/remove_exact_duplicates.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import os
|
||||
import hashlib
|
||||
import shutil
|
||||
|
||||
TARGET_DIR = os.path.abspath("assets/slike/glavna_referenca")
|
||||
TRASH_DIR = os.path.abspath("_TRASH_BIN/duplicates_referenca")
|
||||
|
||||
def get_file_hash(filepath):
|
||||
"""Calculates MD5 hash of file content."""
|
||||
hasher = hashlib.md5()
|
||||
try:
|
||||
with open(filepath, 'rb') as f:
|
||||
buf = f.read(65536)
|
||||
while len(buf) > 0:
|
||||
hasher.update(buf)
|
||||
buf = f.read(65536)
|
||||
return hasher.hexdigest()
|
||||
except:
|
||||
return None
|
||||
|
||||
def remove_duplicates():
|
||||
if not os.path.exists(TARGET_DIR):
|
||||
print("❌ Target directory not found.")
|
||||
return
|
||||
|
||||
if not os.path.exists(TRASH_DIR):
|
||||
os.makedirs(TRASH_DIR)
|
||||
|
||||
print(f"🔍 Scanning for exact duplicates in {TARGET_DIR}...")
|
||||
|
||||
unique_hashes = {} # hash -> filepath
|
||||
duplicates = 0
|
||||
scanned = 0
|
||||
|
||||
files = [f for f in os.listdir(TARGET_DIR) if os.path.isfile(os.path.join(TARGET_DIR, f)) and not f.startswith(".")]
|
||||
total_files = len(files)
|
||||
|
||||
for filename in files:
|
||||
filepath = os.path.join(TARGET_DIR, filename)
|
||||
file_hash = get_file_hash(filepath)
|
||||
|
||||
if file_hash:
|
||||
if file_hash in unique_hashes:
|
||||
# Duplicate found!
|
||||
original = unique_hashes[file_hash]
|
||||
|
||||
# Move to trash
|
||||
shutil.move(filepath, os.path.join(TRASH_DIR, filename))
|
||||
duplicates += 1
|
||||
|
||||
# Optional: Print info
|
||||
# print(f"Duplicate: {filename} == {os.path.basename(original)}")
|
||||
else:
|
||||
# New unique file
|
||||
unique_hashes[file_hash] = filepath
|
||||
|
||||
scanned += 1
|
||||
if scanned % 500 == 0:
|
||||
print(f" Scanned {scanned}/{total_files} files...")
|
||||
|
||||
print(f"✨ DONE! Found and moved {duplicates} duplicates to {TRASH_DIR}")
|
||||
print(f"✅ Unique files remaining: {len(unique_hashes)}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
remove_duplicates()
|
||||
Reference in New Issue
Block a user