Files
novafarma/scripts/utils/glavna_referenca_cleanup.py
2026-01-25 12:20:50 +01:00

204 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""
GLAVNA REFERENCA CLEANUP
1. Organizacija v pod-mape
2. Preimenovanje v kratka imena
3. Odstranjevanje duplikatov
4. Vizualna analiza kvalitete
"""
import os
import hashlib
from pathlib import Path
from collections import defaultdict
import json
REFERENCA_DIR = Path("/Users/davidkotnik/repos/novafarma/assets/slike/glavna_referenca")
def get_file_hash(filepath):
"""Izračuna MD5 hash datoteke"""
hash_md5 = hashlib.md5()
try:
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
except:
return None
def find_duplicates():
"""Najde vse duplikate po hash"""
print("🔍 IŠČEM DUPLIKATE...\n")
hash_map = defaultdict(list)
all_files = []
# Zberi vse PNG/JPG
for ext in ['*.png', '*.jpg', '*.jpeg']:
all_files.extend(REFERENCA_DIR.rglob(ext))
print(f"Najdenih {len(all_files)} slik...")
# Izračunaj hashe
for i, filepath in enumerate(all_files):
if i % 100 == 0:
print(f" Obdelanih: {i}/{len(all_files)}")
file_hash = get_file_hash(filepath)
if file_hash:
hash_map[file_hash].append(filepath)
# Najdi duplikate
duplicates = {hash_val: files for hash_val, files in hash_map.items() if len(files) > 1}
print(f"\n✅ Analiza končana!")
print(f"📊 DUPLIKATI: {len(duplicates)} skupin")
# Poročilo
duplicate_report = []
total_duplicates = 0
for hash_val, files in duplicates.items():
total_duplicates += len(files) - 1 # -1 ker enega obdržimo
group = {
'hash': hash_val,
'count': len(files),
'size': files[0].stat().st_size,
'files': [str(f.relative_to(REFERENCA_DIR)) for f in files]
}
duplicate_report.append(group)
# Shrani poročilo
report_file = REFERENCA_DIR / "DUPLICATE_REPORT.json"
with open(report_file, 'w', encoding='utf-8') as f:
json.dump({
'total_groups': len(duplicates),
'total_duplicates': total_duplicates,
'space_wasted_mb': sum(g['size'] * (g['count'] - 1) for g in duplicate_report) / 1024 / 1024,
'groups': duplicate_report
}, f, indent=2, ensure_ascii=False)
print(f"\n📄 Poročilo shranjeno: DUPLICATE_REPORT.json")
print(f"🗑️ Lahko zbrišeš: {total_duplicates} duplikatov")
print(f"💾 Prihranila bi: {sum(g['size'] * (g['count'] - 1) for g in duplicate_report) / 1024 / 1024:.1f} MB")
return duplicate_report
def categorize_files():
"""Kategorizira datoteke po vsebini imena"""
print("\n📂 KATEGORIZACIJA...\n")
categories = {
'characters': [],
'npcs': [],
'crops': [],
'trees': [],
'buildings': [],
'items': [],
'tools': [],
'ui': [],
'enemies': [],
'animals': [],
'biomes': [],
'effects': [],
'interior': [],
'other': []
}
# Ključne besede za kategorije
keywords = {
'characters': ['kai', 'ana', 'gronk', 'main_character'],
'npcs': ['npc', 'priest', 'merchant', 'farmer', 'guard', 'elder', 'innkeeper',
'blacksmith', 'mayor', 'teacher', 'herbalist', 'hunter'],
'crops': ['crop', 'wheat', 'corn', 'potato', 'tomato', 'carrot', 'cannabis',
'korenje', 'krompir', 'koruza', 'paradiznik', 'konoplja', 'stage', 'growth'],
'trees': ['tree', 'drevo', 'oak', 'pine', 'jablana', 'visnja', 'hruška'],
'buildings': ['building', 'house', 'barn', 'church', 'school', 'hospital',
'cerkev', 'sola', 'hisa', 'hlev', 'zgradbe'],
'items': ['item', 'predmet', 'blueprint', 'resource', 'seme', 'seed'],
'tools': ['tool', 'orodje', 'axe', 'pickaxe', 'hoe', 'sekira', 'kramp', 'motika'],
'ui': ['ui', 'button', 'gumb', 'icon', 'ikona', 'panel', 'okvir'],
'enemies': ['enemy', 'zombie', 'zombi', 'boss', 'monster', 'sovraznik'],
'animals': ['animal', 'cow', 'pig', 'sheep', 'chicken', 'krava', 'prasic'],
'biomes': ['biome', 'biom', 'grassland', 'forest', 'desert', 'snow'],
'effects': ['vfx', 'effect', 'sparkle', 'glow', 'particle'],
'interior': ['interior', 'notranjost', 'furniture', 'pohištvo']
}
# Zberi vse slike
all_files = list(REFERENCA_DIR.glob('*.png')) + list(REFERENCA_DIR.glob('*.jpg'))
print(f"Kategoriziram {len(all_files)} slik...")
for filepath in all_files:
filename_lower = filepath.name.lower()
categorized = False
for category, words in keywords.items():
if any(word in filename_lower for word in words):
categories[category].append(filepath)
categorized = True
break
if not categorized:
categories['other'].append(filepath)
# Poročilo
print("\n📊 KATEGORIZACIJA:")
for category, files in sorted(categories.items(), key=lambda x: len(x[1]), reverse=True):
if files:
print(f" {category}: {len(files)} slik")
return categories
def analyze_filenames():
"""Analizira dolžine imen"""
print("\n📏 ANALIZA DOLŽIN IMEN...\n")
all_files = list(REFERENCA_DIR.glob('*.png')) + list(REFERENCA_DIR.glob('*.jpg'))
lengths = [len(f.stem) for f in all_files]
avg_length = sum(lengths) / len(lengths) if lengths else 0
max_length = max(lengths) if lengths else 0
long_names = [f for f in all_files if len(f.stem) > 50]
print(f"Povprečna dolžina imena: {avg_length:.1f} znakov")
print(f"Najdaljše ime: {max_length} znakov")
print(f"Imen daljših od 50 znakov: {len(long_names)}")
if long_names:
print("\n🔴 TOP 10 NAJDALJŠIH IMEN:")
for f in sorted(long_names, key=lambda x: len(x.stem), reverse=True)[:10]:
print(f" {len(f.stem):3d} {f.name[:80]}...")
return {
'avg_length': avg_length,
'max_length': max_length,
'long_names_count': len(long_names)
}
if __name__ == "__main__":
print("=" * 80)
print(" GLAVNA REFERENCA - CLEANUP ANALIZA")
print("=" * 80)
# 1. Duplikati
duplicates = find_duplicates()
# 2. Kategorije
categories = categorize_files()
# 3. Dolžine imen
name_stats = analyze_filenames()
print("\n" + "=" * 80)
print(" ANALIZA KONČANA!")
print("=" * 80)
print("\n📋 NASLEDNJI KORAKI:")
print(" 1. Preglej DUPLICATE_REPORT.json")
print(" 2. Izbriši duplikate")
print(" 3. Organiziraj v pod-mape")
print(" 4. Preimenuj v kratka imena")