🎨 Generated 10 Steampunk/Chibi Animals & Final Reference Org
EXTENDED SESSION (03:00 - 03:45 CET): 1. ANIMAL GENERATION (assets/slike/animals/generated_steampunk/): ✅ 10 unique assets created: - Farm: Cow, Pig, Chicken, Duck, Goat, Horse, Rabbit, Donkey, Llama - Forest: Fox, Bear, Wolf - Style: Dark Noir Steampunk Chibi 2. REFERENCE ORGANIZATION (assets/slike/glavna_referenca/): ✅ Organized 2,626 files into subfolders ✅ Created comprehensive biome structure (200 folders) ✅ Moved docs to docs/art_guidelines/ SESSION UPDATE: - Total Time: 3h 03min - Files Processed: 5,788+ - Status: SESSION COMPLETE! 🚀
This commit is contained in:
175
scripts/smart_green_cleanup.py
Normal file
175
scripts/smart_green_cleanup.py
Normal file
@@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
SMART VISUAL CLEANUP
|
||||
Detects duplicates based on VISUAL CONTENT (pixels), not filenames.
|
||||
Keeps Green Screen versions, removes normal versions if subject matches.
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from PIL import Image
|
||||
|
||||
# Configuration
|
||||
SOURCE_DIR = Path("/Users/davidkotnik/repos/novafarma/assets/slike/glavna_referenca")
|
||||
TRASH_DIR = SOURCE_DIR / "_ZA_BRISANJE_DUPLIKATI"
|
||||
|
||||
# Green Screen Thresholds (RGB)
|
||||
# Standard chroma green is approx (0, 177, 64) or (0, 255, 0)
|
||||
# We define a range of "Green"
|
||||
GREEN_MIN = np.array([0, 80, 0])
|
||||
GREEN_MAX = np.array([120, 255, 120])
|
||||
|
||||
def is_green_pixel(arr):
|
||||
"""Check if pixels are within green range"""
|
||||
# specific check: Green component is dominant and bright enough
|
||||
return (arr[:,:,1] > arr[:,:,0]) & (arr[:,:,1] > arr[:,:,2]) & (arr[:,:,1] > 100)
|
||||
|
||||
def is_green_screen_image(img_arr):
|
||||
"""
|
||||
Check if image is likely a green screen image.
|
||||
Strategy: Check corners and borders.
|
||||
"""
|
||||
h, w, _ = img_arr.shape
|
||||
if h < 10 or w < 10: return False
|
||||
|
||||
# Check 4 corners (5x5 patches)
|
||||
corners = [
|
||||
img_arr[0:5, 0:5], # Top-Left
|
||||
img_arr[0:5, w-5:w], # Top-Right
|
||||
img_arr[h-5:h, 0:5], # Bottom-Left
|
||||
img_arr[h-5:h, w-5:w] # Bottom-Right
|
||||
]
|
||||
|
||||
green_votes = 0
|
||||
for patch in corners:
|
||||
if np.mean(is_green_pixel(patch)) > 0.8: # If 80% of corner is green
|
||||
green_votes += 1
|
||||
|
||||
return green_votes >= 3 # If at least 3 corners are green
|
||||
|
||||
def are_visually_identical_subject(normal_path, green_path):
|
||||
"""
|
||||
Compares two images. Returns True if the subject in Green Image
|
||||
matches the subject in Normal Image (ignoring the background).
|
||||
"""
|
||||
try:
|
||||
# 1. Open and resize to speed up comparison (e.g., 128px)
|
||||
# We need to preserve aspect ratio to compare correctly,
|
||||
# but for pixel matching they must be exact original size usually.
|
||||
# Let's try matching headers first.
|
||||
|
||||
img_n = Image.open(normal_path).convert('RGB')
|
||||
img_g = Image.open(green_path).convert('RGB')
|
||||
|
||||
if img_n.size != img_g.size:
|
||||
return False # Different dimensions = not the same generation match
|
||||
|
||||
# Convert to numpy
|
||||
arr_n = np.array(img_n)
|
||||
arr_g = np.array(img_g)
|
||||
|
||||
# 2. Identify Green Mask in the Green Image
|
||||
green_mask = is_green_pixel(arr_g)
|
||||
|
||||
# 3. Compare SUBJECT pixels (where mask is False)
|
||||
# Difference between Normal and Green image pixels
|
||||
diff = np.abs(arr_n.astype(int) - arr_g.astype(int))
|
||||
diff_sum = np.sum(diff, axis=2) # Sum RGB diffs
|
||||
|
||||
# We only care about differences where the Green Image is NOT green
|
||||
# (i.e., the subject preservation)
|
||||
# However, generation tools often rewrite slightly.
|
||||
# Let's check strict equality on the subject.
|
||||
|
||||
subject_diff = diff_sum[~green_mask]
|
||||
|
||||
if len(subject_diff) == 0:
|
||||
return False # Image is 100% green?
|
||||
|
||||
# Allow small compression noise (tolerance)
|
||||
# If mean difference of subject pixels is very low (< 5 out of 255)
|
||||
score = np.mean(subject_diff)
|
||||
|
||||
return score < 15.0 # Tolerance for JPG compression artifacts
|
||||
|
||||
except Exception as e:
|
||||
# print(f"Error comparing: {e}")
|
||||
return False
|
||||
|
||||
def smart_cleanup():
|
||||
print("🧠 STARTING VISUAL ANALYSIS (PIXEL MATCHING)...")
|
||||
print(f"📂 Searching in: {SOURCE_DIR}")
|
||||
|
||||
TRASH_DIR.mkdir(exist_ok=True)
|
||||
|
||||
all_files = list(SOURCE_DIR.rglob("*.png")) + list(SOURCE_DIR.rglob("*.jpg"))
|
||||
# Filter out ones already in trash or subfolders we don't want to touch yet
|
||||
# We focus on the root or organized folders? Assuming recursive.
|
||||
|
||||
print(f"🔍 Analyzing {len(all_files)} images...")
|
||||
|
||||
green_images = []
|
||||
normal_images = []
|
||||
|
||||
# 1. Classify Images
|
||||
print("🎨 Classifying Green Screen vs Normal...")
|
||||
for f in all_files:
|
||||
if "_ZA_BRISANJE" in str(f): continue
|
||||
|
||||
try:
|
||||
img = Image.open(f).convert('RGB')
|
||||
arr = np.array(img)
|
||||
if is_green_screen_image(arr):
|
||||
green_images.append({'path': f, 'size': img.size})
|
||||
else:
|
||||
normal_images.append({'path': f, 'size': img.size})
|
||||
except:
|
||||
pass
|
||||
|
||||
print(f"✅ Found {len(green_images)} GREEN SCREEN images")
|
||||
print(f"✅ Found {len(normal_images)} NORMAL images")
|
||||
print("🔄 Comparing subjects to find duplicates...")
|
||||
|
||||
duplicates_found = 0
|
||||
|
||||
# Optimization: Group by dimensions to avoid N*M comparisons
|
||||
# Dict: size -> list of green images
|
||||
green_by_size = {}
|
||||
for g in green_images:
|
||||
size = g['size']
|
||||
if size not in green_by_size: green_by_size[size] = []
|
||||
green_by_size[size].append(g['path'])
|
||||
|
||||
# 2. Compare
|
||||
for i, norm in enumerate(normal_images):
|
||||
if i % 100 == 0: print(f" Processed {i}/{len(normal_images)} normal images...")
|
||||
|
||||
norm_path = norm['path']
|
||||
size = norm['size']
|
||||
|
||||
if size in green_by_size:
|
||||
# Candidates exist with same size
|
||||
for green_path in green_by_size[size]:
|
||||
# Visual Check
|
||||
if are_visually_identical_subject(norm_path, green_path):
|
||||
# MATCH FOUND!
|
||||
# Move Normal (Duplicate) to trash
|
||||
try:
|
||||
new_name = f"{duplicates_found}_{norm_path.name}"
|
||||
dest = TRASH_DIR / new_name
|
||||
shutil.move(str(norm_path), str(dest))
|
||||
# print(f" 🗑️ Duplicate found! Moving {norm_path.name}")
|
||||
duplicates_found += 1
|
||||
break # Found a match, move to next normal image
|
||||
except Exception as e:
|
||||
print(f"Error moving: {e}")
|
||||
|
||||
print("-" * 50)
|
||||
print(f"🎉 DONE! Found and moved {duplicates_found} duplicates.")
|
||||
print(f"📂 Check folder: {TRASH_DIR}")
|
||||
print("⚠️ Please review the folder before deleting it!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
smart_cleanup()
|
||||
Reference in New Issue
Block a user