MAJOR CLEANUP: Glavna referenčna mapa organizirana in preimenovana STATISTIKA: - Seansa 1: 1,732 slik (root mapa) - Seansa 2: 487 slik (podmape) - Skupaj: ~2,219 preimenovanj SPREMEMBE: - Angleška preprosta imena (brez podčrtajev) - Odstranjena dolga generirana imena - Format: category1.png, category2.png - Brez timestamps, brez prefixov PRIMER: - PREJ: src_assets_library_godot_references_references_enemies_zombies_variants_strong_animations_napad_zombi_strong_attack1.png - POTEM: zombie1.png - PREJ: libraryphases102.png - POTEM: library102.png NOVO DODANI SKRIPI: - complete_mass_rename.py (kompletni rename vključno podmape) - simple_rename.py (inteligentni rename sistem) - glavna_referenca_cleanup.py (analiza duplikatov) - DUPLICATE_REPORT.json (4 duplikati najdeni) ANALIZA: - Duplikati: 4 skupine (3.5 MB prostora) - Dolžina imen: iz 53.3 → ~10 znakov - Organizacija: Po kategorijah VSE SLIKE SEDAJ IMAJO KRATKA PREPROSTA IMENA! Trajanje: ~30 min Status: SUCCESS
204 lines
6.7 KiB
Python
204 lines
6.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
GLAVNA REFERENCA CLEANUP
|
|
1. Organizacija v pod-mape
|
|
2. Preimenovanje v kratka imena
|
|
3. Odstranjevanje duplikatov
|
|
4. Vizualna analiza kvalitete
|
|
"""
|
|
|
|
import os
|
|
import hashlib
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
import json
|
|
|
|
REFERENCA_DIR = Path("/Users/davidkotnik/repos/novafarma/assets/slike/glavna_referenca")
|
|
|
|
def get_file_hash(filepath):
|
|
"""Izračuna MD5 hash datoteke"""
|
|
hash_md5 = hashlib.md5()
|
|
try:
|
|
with open(filepath, "rb") as f:
|
|
for chunk in iter(lambda: f.read(4096), b""):
|
|
hash_md5.update(chunk)
|
|
return hash_md5.hexdigest()
|
|
except:
|
|
return None
|
|
|
|
def find_duplicates():
|
|
"""Najde vse duplikate po hash"""
|
|
print("🔍 IŠČEM DUPLIKATE...\n")
|
|
|
|
hash_map = defaultdict(list)
|
|
all_files = []
|
|
|
|
# Zberi vse PNG/JPG
|
|
for ext in ['*.png', '*.jpg', '*.jpeg']:
|
|
all_files.extend(REFERENCA_DIR.rglob(ext))
|
|
|
|
print(f"Najdenih {len(all_files)} slik...")
|
|
|
|
# Izračunaj hashe
|
|
for i, filepath in enumerate(all_files):
|
|
if i % 100 == 0:
|
|
print(f" Obdelanih: {i}/{len(all_files)}")
|
|
|
|
file_hash = get_file_hash(filepath)
|
|
if file_hash:
|
|
hash_map[file_hash].append(filepath)
|
|
|
|
# Najdi duplikate
|
|
duplicates = {hash_val: files for hash_val, files in hash_map.items() if len(files) > 1}
|
|
|
|
print(f"\n✅ Analiza končana!")
|
|
print(f"📊 DUPLIKATI: {len(duplicates)} skupin")
|
|
|
|
# Poročilo
|
|
duplicate_report = []
|
|
total_duplicates = 0
|
|
|
|
for hash_val, files in duplicates.items():
|
|
total_duplicates += len(files) - 1 # -1 ker enega obdržimo
|
|
|
|
group = {
|
|
'hash': hash_val,
|
|
'count': len(files),
|
|
'size': files[0].stat().st_size,
|
|
'files': [str(f.relative_to(REFERENCA_DIR)) for f in files]
|
|
}
|
|
duplicate_report.append(group)
|
|
|
|
# Shrani poročilo
|
|
report_file = REFERENCA_DIR / "DUPLICATE_REPORT.json"
|
|
with open(report_file, 'w', encoding='utf-8') as f:
|
|
json.dump({
|
|
'total_groups': len(duplicates),
|
|
'total_duplicates': total_duplicates,
|
|
'space_wasted_mb': sum(g['size'] * (g['count'] - 1) for g in duplicate_report) / 1024 / 1024,
|
|
'groups': duplicate_report
|
|
}, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"\n📄 Poročilo shranjeno: DUPLICATE_REPORT.json")
|
|
print(f"🗑️ Lahko zbrišeš: {total_duplicates} duplikatov")
|
|
print(f"💾 Prihranila bi: {sum(g['size'] * (g['count'] - 1) for g in duplicate_report) / 1024 / 1024:.1f} MB")
|
|
|
|
return duplicate_report
|
|
|
|
def categorize_files():
|
|
"""Kategorizira datoteke po vsebini imena"""
|
|
print("\n📂 KATEGORIZACIJA...\n")
|
|
|
|
categories = {
|
|
'characters': [],
|
|
'npcs': [],
|
|
'crops': [],
|
|
'trees': [],
|
|
'buildings': [],
|
|
'items': [],
|
|
'tools': [],
|
|
'ui': [],
|
|
'enemies': [],
|
|
'animals': [],
|
|
'biomes': [],
|
|
'effects': [],
|
|
'interior': [],
|
|
'other': []
|
|
}
|
|
|
|
# Ključne besede za kategorije
|
|
keywords = {
|
|
'characters': ['kai', 'ana', 'gronk', 'main_character'],
|
|
'npcs': ['npc', 'priest', 'merchant', 'farmer', 'guard', 'elder', 'innkeeper',
|
|
'blacksmith', 'mayor', 'teacher', 'herbalist', 'hunter'],
|
|
'crops': ['crop', 'wheat', 'corn', 'potato', 'tomato', 'carrot', 'cannabis',
|
|
'korenje', 'krompir', 'koruza', 'paradiznik', 'konoplja', 'stage', 'growth'],
|
|
'trees': ['tree', 'drevo', 'oak', 'pine', 'jablana', 'visnja', 'hruška'],
|
|
'buildings': ['building', 'house', 'barn', 'church', 'school', 'hospital',
|
|
'cerkev', 'sola', 'hisa', 'hlev', 'zgradbe'],
|
|
'items': ['item', 'predmet', 'blueprint', 'resource', 'seme', 'seed'],
|
|
'tools': ['tool', 'orodje', 'axe', 'pickaxe', 'hoe', 'sekira', 'kramp', 'motika'],
|
|
'ui': ['ui', 'button', 'gumb', 'icon', 'ikona', 'panel', 'okvir'],
|
|
'enemies': ['enemy', 'zombie', 'zombi', 'boss', 'monster', 'sovraznik'],
|
|
'animals': ['animal', 'cow', 'pig', 'sheep', 'chicken', 'krava', 'prasic'],
|
|
'biomes': ['biome', 'biom', 'grassland', 'forest', 'desert', 'snow'],
|
|
'effects': ['vfx', 'effect', 'sparkle', 'glow', 'particle'],
|
|
'interior': ['interior', 'notranjost', 'furniture', 'pohištvo']
|
|
}
|
|
|
|
# Zberi vse slike
|
|
all_files = list(REFERENCA_DIR.glob('*.png')) + list(REFERENCA_DIR.glob('*.jpg'))
|
|
|
|
print(f"Kategoriziram {len(all_files)} slik...")
|
|
|
|
for filepath in all_files:
|
|
filename_lower = filepath.name.lower()
|
|
|
|
categorized = False
|
|
for category, words in keywords.items():
|
|
if any(word in filename_lower for word in words):
|
|
categories[category].append(filepath)
|
|
categorized = True
|
|
break
|
|
|
|
if not categorized:
|
|
categories['other'].append(filepath)
|
|
|
|
# Poročilo
|
|
print("\n📊 KATEGORIZACIJA:")
|
|
for category, files in sorted(categories.items(), key=lambda x: len(x[1]), reverse=True):
|
|
if files:
|
|
print(f" {category}: {len(files)} slik")
|
|
|
|
return categories
|
|
|
|
def analyze_filenames():
|
|
"""Analizira dolžine imen"""
|
|
print("\n📏 ANALIZA DOLŽIN IMEN...\n")
|
|
|
|
all_files = list(REFERENCA_DIR.glob('*.png')) + list(REFERENCA_DIR.glob('*.jpg'))
|
|
|
|
lengths = [len(f.stem) for f in all_files]
|
|
avg_length = sum(lengths) / len(lengths) if lengths else 0
|
|
max_length = max(lengths) if lengths else 0
|
|
|
|
long_names = [f for f in all_files if len(f.stem) > 50]
|
|
|
|
print(f"Povprečna dolžina imena: {avg_length:.1f} znakov")
|
|
print(f"Najdaljše ime: {max_length} znakov")
|
|
print(f"Imen daljših od 50 znakov: {len(long_names)}")
|
|
|
|
if long_names:
|
|
print("\n🔴 TOP 10 NAJDALJŠIH IMEN:")
|
|
for f in sorted(long_names, key=lambda x: len(x.stem), reverse=True)[:10]:
|
|
print(f" {len(f.stem):3d} {f.name[:80]}...")
|
|
|
|
return {
|
|
'avg_length': avg_length,
|
|
'max_length': max_length,
|
|
'long_names_count': len(long_names)
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
print("=" * 80)
|
|
print(" GLAVNA REFERENCA - CLEANUP ANALIZA")
|
|
print("=" * 80)
|
|
|
|
# 1. Duplikati
|
|
duplicates = find_duplicates()
|
|
|
|
# 2. Kategorije
|
|
categories = categorize_files()
|
|
|
|
# 3. Dolžine imen
|
|
name_stats = analyze_filenames()
|
|
|
|
print("\n" + "=" * 80)
|
|
print(" ANALIZA KONČANA!")
|
|
print("=" * 80)
|
|
print("\n📋 NASLEDNJI KORAKI:")
|
|
print(" 1. Preglej DUPLICATE_REPORT.json")
|
|
print(" 2. Izbriši duplikate")
|
|
print(" 3. Organiziraj v pod-mape")
|
|
print(" 4. Preimenuj v kratka imena")
|