EXTENDED SESSION (03:00 - 03:45 CET): 1. ANIMAL GENERATION (assets/slike/animals/generated_steampunk/): ✅ 10 unique assets created: - Farm: Cow, Pig, Chicken, Duck, Goat, Horse, Rabbit, Donkey, Llama - Forest: Fox, Bear, Wolf - Style: Dark Noir Steampunk Chibi 2. REFERENCE ORGANIZATION (assets/slike/glavna_referenca/): ✅ Organized 2,626 files into subfolders ✅ Created comprehensive biome structure (200 folders) ✅ Moved docs to docs/art_guidelines/ SESSION UPDATE: - Total Time: 3h 03min - Files Processed: 5,788+ - Status: SESSION COMPLETE! 🚀
176 lines
6.2 KiB
Python
176 lines
6.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
SMART VISUAL CLEANUP
|
|
Detects duplicates based on VISUAL CONTENT (pixels), not filenames.
|
|
Keeps Green Screen versions, removes normal versions if subject matches.
|
|
"""
|
|
|
|
import os
|
|
import shutil
|
|
import numpy as np
|
|
from pathlib import Path
|
|
from PIL import Image
|
|
|
|
# Configuration
|
|
SOURCE_DIR = Path("/Users/davidkotnik/repos/novafarma/assets/slike/glavna_referenca")
|
|
TRASH_DIR = SOURCE_DIR / "_ZA_BRISANJE_DUPLIKATI"
|
|
|
|
# Green Screen Thresholds (RGB)
|
|
# Standard chroma green is approx (0, 177, 64) or (0, 255, 0)
|
|
# We define a range of "Green"
|
|
GREEN_MIN = np.array([0, 80, 0])
|
|
GREEN_MAX = np.array([120, 255, 120])
|
|
|
|
def is_green_pixel(arr):
|
|
"""Check if pixels are within green range"""
|
|
# specific check: Green component is dominant and bright enough
|
|
return (arr[:,:,1] > arr[:,:,0]) & (arr[:,:,1] > arr[:,:,2]) & (arr[:,:,1] > 100)
|
|
|
|
def is_green_screen_image(img_arr):
|
|
"""
|
|
Check if image is likely a green screen image.
|
|
Strategy: Check corners and borders.
|
|
"""
|
|
h, w, _ = img_arr.shape
|
|
if h < 10 or w < 10: return False
|
|
|
|
# Check 4 corners (5x5 patches)
|
|
corners = [
|
|
img_arr[0:5, 0:5], # Top-Left
|
|
img_arr[0:5, w-5:w], # Top-Right
|
|
img_arr[h-5:h, 0:5], # Bottom-Left
|
|
img_arr[h-5:h, w-5:w] # Bottom-Right
|
|
]
|
|
|
|
green_votes = 0
|
|
for patch in corners:
|
|
if np.mean(is_green_pixel(patch)) > 0.8: # If 80% of corner is green
|
|
green_votes += 1
|
|
|
|
return green_votes >= 3 # If at least 3 corners are green
|
|
|
|
def are_visually_identical_subject(normal_path, green_path):
|
|
"""
|
|
Compares two images. Returns True if the subject in Green Image
|
|
matches the subject in Normal Image (ignoring the background).
|
|
"""
|
|
try:
|
|
# 1. Open and resize to speed up comparison (e.g., 128px)
|
|
# We need to preserve aspect ratio to compare correctly,
|
|
# but for pixel matching they must be exact original size usually.
|
|
# Let's try matching headers first.
|
|
|
|
img_n = Image.open(normal_path).convert('RGB')
|
|
img_g = Image.open(green_path).convert('RGB')
|
|
|
|
if img_n.size != img_g.size:
|
|
return False # Different dimensions = not the same generation match
|
|
|
|
# Convert to numpy
|
|
arr_n = np.array(img_n)
|
|
arr_g = np.array(img_g)
|
|
|
|
# 2. Identify Green Mask in the Green Image
|
|
green_mask = is_green_pixel(arr_g)
|
|
|
|
# 3. Compare SUBJECT pixels (where mask is False)
|
|
# Difference between Normal and Green image pixels
|
|
diff = np.abs(arr_n.astype(int) - arr_g.astype(int))
|
|
diff_sum = np.sum(diff, axis=2) # Sum RGB diffs
|
|
|
|
# We only care about differences where the Green Image is NOT green
|
|
# (i.e., the subject preservation)
|
|
# However, generation tools often rewrite slightly.
|
|
# Let's check strict equality on the subject.
|
|
|
|
subject_diff = diff_sum[~green_mask]
|
|
|
|
if len(subject_diff) == 0:
|
|
return False # Image is 100% green?
|
|
|
|
# Allow small compression noise (tolerance)
|
|
# If mean difference of subject pixels is very low (< 5 out of 255)
|
|
score = np.mean(subject_diff)
|
|
|
|
return score < 15.0 # Tolerance for JPG compression artifacts
|
|
|
|
except Exception as e:
|
|
# print(f"Error comparing: {e}")
|
|
return False
|
|
|
|
def smart_cleanup():
|
|
print("🧠 STARTING VISUAL ANALYSIS (PIXEL MATCHING)...")
|
|
print(f"📂 Searching in: {SOURCE_DIR}")
|
|
|
|
TRASH_DIR.mkdir(exist_ok=True)
|
|
|
|
all_files = list(SOURCE_DIR.rglob("*.png")) + list(SOURCE_DIR.rglob("*.jpg"))
|
|
# Filter out ones already in trash or subfolders we don't want to touch yet
|
|
# We focus on the root or organized folders? Assuming recursive.
|
|
|
|
print(f"🔍 Analyzing {len(all_files)} images...")
|
|
|
|
green_images = []
|
|
normal_images = []
|
|
|
|
# 1. Classify Images
|
|
print("🎨 Classifying Green Screen vs Normal...")
|
|
for f in all_files:
|
|
if "_ZA_BRISANJE" in str(f): continue
|
|
|
|
try:
|
|
img = Image.open(f).convert('RGB')
|
|
arr = np.array(img)
|
|
if is_green_screen_image(arr):
|
|
green_images.append({'path': f, 'size': img.size})
|
|
else:
|
|
normal_images.append({'path': f, 'size': img.size})
|
|
except:
|
|
pass
|
|
|
|
print(f"✅ Found {len(green_images)} GREEN SCREEN images")
|
|
print(f"✅ Found {len(normal_images)} NORMAL images")
|
|
print("🔄 Comparing subjects to find duplicates...")
|
|
|
|
duplicates_found = 0
|
|
|
|
# Optimization: Group by dimensions to avoid N*M comparisons
|
|
# Dict: size -> list of green images
|
|
green_by_size = {}
|
|
for g in green_images:
|
|
size = g['size']
|
|
if size not in green_by_size: green_by_size[size] = []
|
|
green_by_size[size].append(g['path'])
|
|
|
|
# 2. Compare
|
|
for i, norm in enumerate(normal_images):
|
|
if i % 100 == 0: print(f" Processed {i}/{len(normal_images)} normal images...")
|
|
|
|
norm_path = norm['path']
|
|
size = norm['size']
|
|
|
|
if size in green_by_size:
|
|
# Candidates exist with same size
|
|
for green_path in green_by_size[size]:
|
|
# Visual Check
|
|
if are_visually_identical_subject(norm_path, green_path):
|
|
# MATCH FOUND!
|
|
# Move Normal (Duplicate) to trash
|
|
try:
|
|
new_name = f"{duplicates_found}_{norm_path.name}"
|
|
dest = TRASH_DIR / new_name
|
|
shutil.move(str(norm_path), str(dest))
|
|
# print(f" 🗑️ Duplicate found! Moving {norm_path.name}")
|
|
duplicates_found += 1
|
|
break # Found a match, move to next normal image
|
|
except Exception as e:
|
|
print(f"Error moving: {e}")
|
|
|
|
print("-" * 50)
|
|
print(f"🎉 DONE! Found and moved {duplicates_found} duplicates.")
|
|
print(f"📂 Check folder: {TRASH_DIR}")
|
|
print("⚠️ Please review the folder before deleting it!")
|
|
|
|
if __name__ == "__main__":
|
|
smart_cleanup()
|