#!/usr/bin/env python3 """ GLAVNA REFERENCA CLEANUP 1. Organizacija v pod-mape 2. Preimenovanje v kratka imena 3. Odstranjevanje duplikatov 4. Vizualna analiza kvalitete """ import os import hashlib from pathlib import Path from collections import defaultdict import json REFERENCA_DIR = Path("/Users/davidkotnik/repos/novafarma/assets/slike/glavna_referenca") def get_file_hash(filepath): """Izračuna MD5 hash datoteke""" hash_md5 = hashlib.md5() try: with open(filepath, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() except: return None def find_duplicates(): """Najde vse duplikate po hash""" print("🔍 IŠČEM DUPLIKATE...\n") hash_map = defaultdict(list) all_files = [] # Zberi vse PNG/JPG for ext in ['*.png', '*.jpg', '*.jpeg']: all_files.extend(REFERENCA_DIR.rglob(ext)) print(f"Najdenih {len(all_files)} slik...") # Izračunaj hashe for i, filepath in enumerate(all_files): if i % 100 == 0: print(f" Obdelanih: {i}/{len(all_files)}") file_hash = get_file_hash(filepath) if file_hash: hash_map[file_hash].append(filepath) # Najdi duplikate duplicates = {hash_val: files for hash_val, files in hash_map.items() if len(files) > 1} print(f"\n✅ Analiza končana!") print(f"📊 DUPLIKATI: {len(duplicates)} skupin") # Poročilo duplicate_report = [] total_duplicates = 0 for hash_val, files in duplicates.items(): total_duplicates += len(files) - 1 # -1 ker enega obdržimo group = { 'hash': hash_val, 'count': len(files), 'size': files[0].stat().st_size, 'files': [str(f.relative_to(REFERENCA_DIR)) for f in files] } duplicate_report.append(group) # Shrani poročilo report_file = REFERENCA_DIR / "DUPLICATE_REPORT.json" with open(report_file, 'w', encoding='utf-8') as f: json.dump({ 'total_groups': len(duplicates), 'total_duplicates': total_duplicates, 'space_wasted_mb': sum(g['size'] * (g['count'] - 1) for g in duplicate_report) / 1024 / 1024, 'groups': duplicate_report }, f, indent=2, ensure_ascii=False) print(f"\n📄 Poročilo shranjeno: DUPLICATE_REPORT.json") print(f"🗑️ Lahko zbrišeš: {total_duplicates} duplikatov") print(f"💾 Prihranila bi: {sum(g['size'] * (g['count'] - 1) for g in duplicate_report) / 1024 / 1024:.1f} MB") return duplicate_report def categorize_files(): """Kategorizira datoteke po vsebini imena""" print("\n📂 KATEGORIZACIJA...\n") categories = { 'characters': [], 'npcs': [], 'crops': [], 'trees': [], 'buildings': [], 'items': [], 'tools': [], 'ui': [], 'enemies': [], 'animals': [], 'biomes': [], 'effects': [], 'interior': [], 'other': [] } # Ključne besede za kategorije keywords = { 'characters': ['kai', 'ana', 'gronk', 'main_character'], 'npcs': ['npc', 'priest', 'merchant', 'farmer', 'guard', 'elder', 'innkeeper', 'blacksmith', 'mayor', 'teacher', 'herbalist', 'hunter'], 'crops': ['crop', 'wheat', 'corn', 'potato', 'tomato', 'carrot', 'cannabis', 'korenje', 'krompir', 'koruza', 'paradiznik', 'konoplja', 'stage', 'growth'], 'trees': ['tree', 'drevo', 'oak', 'pine', 'jablana', 'visnja', 'hruška'], 'buildings': ['building', 'house', 'barn', 'church', 'school', 'hospital', 'cerkev', 'sola', 'hisa', 'hlev', 'zgradbe'], 'items': ['item', 'predmet', 'blueprint', 'resource', 'seme', 'seed'], 'tools': ['tool', 'orodje', 'axe', 'pickaxe', 'hoe', 'sekira', 'kramp', 'motika'], 'ui': ['ui', 'button', 'gumb', 'icon', 'ikona', 'panel', 'okvir'], 'enemies': ['enemy', 'zombie', 'zombi', 'boss', 'monster', 'sovraznik'], 'animals': ['animal', 'cow', 'pig', 'sheep', 'chicken', 'krava', 'prasic'], 'biomes': ['biome', 'biom', 'grassland', 'forest', 'desert', 'snow'], 'effects': ['vfx', 'effect', 'sparkle', 'glow', 'particle'], 'interior': ['interior', 'notranjost', 'furniture', 'pohištvo'] } # Zberi vse slike all_files = list(REFERENCA_DIR.glob('*.png')) + list(REFERENCA_DIR.glob('*.jpg')) print(f"Kategoriziram {len(all_files)} slik...") for filepath in all_files: filename_lower = filepath.name.lower() categorized = False for category, words in keywords.items(): if any(word in filename_lower for word in words): categories[category].append(filepath) categorized = True break if not categorized: categories['other'].append(filepath) # Poročilo print("\n📊 KATEGORIZACIJA:") for category, files in sorted(categories.items(), key=lambda x: len(x[1]), reverse=True): if files: print(f" {category}: {len(files)} slik") return categories def analyze_filenames(): """Analizira dolžine imen""" print("\n📏 ANALIZA DOLŽIN IMEN...\n") all_files = list(REFERENCA_DIR.glob('*.png')) + list(REFERENCA_DIR.glob('*.jpg')) lengths = [len(f.stem) for f in all_files] avg_length = sum(lengths) / len(lengths) if lengths else 0 max_length = max(lengths) if lengths else 0 long_names = [f for f in all_files if len(f.stem) > 50] print(f"Povprečna dolžina imena: {avg_length:.1f} znakov") print(f"Najdaljše ime: {max_length} znakov") print(f"Imen daljših od 50 znakov: {len(long_names)}") if long_names: print("\n🔴 TOP 10 NAJDALJŠIH IMEN:") for f in sorted(long_names, key=lambda x: len(x.stem), reverse=True)[:10]: print(f" {len(f.stem):3d} {f.name[:80]}...") return { 'avg_length': avg_length, 'max_length': max_length, 'long_names_count': len(long_names) } if __name__ == "__main__": print("=" * 80) print(" GLAVNA REFERENCA - CLEANUP ANALIZA") print("=" * 80) # 1. Duplikati duplicates = find_duplicates() # 2. Kategorije categories = categorize_files() # 3. Dolžine imen name_stats = analyze_filenames() print("\n" + "=" * 80) print(" ANALIZA KONČANA!") print("=" * 80) print("\n📋 NASLEDNJI KORAKI:") print(" 1. Preglej DUPLICATE_REPORT.json") print(" 2. Izbriši duplikate") print(" 3. Organiziraj v pod-mape") print(" 4. Preimenuj v kratka imena")