OCR dengan ZONA

2025-12-28 01:20:37 +08:00
commit 4fe381b3f0
12 changed files with 2356 additions and 0 deletions
--- a/pycache/kk_extractor.cpython-313.pyc
+++ b/pycache/kk_extractor.cpython-313.pyc
--- a/pycache/ktp_extractor.cpython-313.pyc
+++ b/pycache/ktp_extractor.cpython-313.pyc
--- a/pycache/ocr_engine.cpython-313.pyc
+++ b/pycache/ocr_engine.cpython-313.pyc
--- a/app.py
+++ b/app.py
@@ -0,0 +1,253 @@
+"""
+Flask Web Server untuk OCR KTP/KK
+"""
+
+import os
+from flask import Flask, render_template, request, jsonify
+from werkzeug.utils import secure_filename
+
+from ocr_engine import get_ocr_engine
+from ktp_extractor import KTPExtractor
+from kk_extractor import KKExtractor
+
+app = Flask(__name__)
+
+# Konfigurasi
+UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads')
+ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'bmp', 'webp'}
+MAX_CONTENT_LENGTH = 16 * 1024 * 1024  # 16MB max
+
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
+
+# Buat folder upload jika belum ada
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+
+# Inisialisasi extractors
+ktp_extractor = KTPExtractor()
+kk_extractor = KKExtractor()
+
+
+def allowed_file(filename):
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+
+
+@app.route('/')
+def index():
+    """Halaman utama"""
+    return render_template('index.html')
+
+
+@app.route('/upload', methods=['POST'])
+def upload_file():
+    """Handle upload dan proses OCR"""
+    try:
+        # Cek file
+        if 'file' not in request.files:
+            return jsonify({'success': False, 'error': 'Tidak ada file yang diupload'}), 400
+        
+        file = request.files['file']
+        doc_type = request.form.get('doc_type', 'ktp')
+        
+        if file.filename == '':
+            return jsonify({'success': False, 'error': 'Nama file kosong'}), 400
+        
+        if not allowed_file(file.filename):
+            return jsonify({'success': False, 'error': 'Format file tidak didukung. Gunakan PNG, JPG, JPEG, BMP, atau WEBP'}), 400
+        
+        # Simpan file
+        filename = secure_filename(file.filename)
+        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+        file.save(filepath)
+        
+        try:
+            # Jalankan OCR
+            ocr_engine = get_ocr_engine()
+            ocr_results = ocr_engine.extract_text(filepath)
+            
+            if not ocr_results:
+                return jsonify({
+                    'success': False, 
+                    'error': 'Tidak dapat membaca teks dari gambar. Pastikan gambar jelas dan tidak blur.'
+                }), 400
+            
+            # Ekstrak field berdasarkan jenis dokumen
+            if doc_type == 'ktp':
+                extracted = ktp_extractor.extract(ocr_results)
+            else:
+                extracted = kk_extractor.extract(ocr_results)
+            
+            # Raw text untuk debugging
+            raw_text = '\n'.join([r['text'] for r in ocr_results])
+            
+            # DEBUG: Print raw OCR results
+            print("\n" + "="*50)
+            print("DEBUG: Raw OCR Results")
+            print("="*50)
+            for i, r in enumerate(ocr_results):
+                print(f"[{i}] {r['text']}")
+            print("="*50 + "\n")
+            
+            return jsonify({
+                'success': True,
+                'doc_type': doc_type,
+                'data': extracted,
+                'raw_text': raw_text,
+                'ocr_count': len(ocr_results)
+            })
+            
+        finally:
+            # Hapus file setelah proses (untuk keamanan data pribadi)
+            if os.path.exists(filepath):
+                os.remove(filepath)
+                
+    except Exception as e:
+        return jsonify({'success': False, 'error': str(e)}), 500
+
+
+# ============================================
+# Region Data API (using wilayah.id)
+# ============================================
+import requests
+from functools import lru_cache
+
+WILAYAH_API_BASE = "https://wilayah.id/api"
+
+@lru_cache(maxsize=100)
+def fetch_region_data(endpoint):
+    """Fetch region data with caching"""
+    try:
+        response = requests.get(f"{WILAYAH_API_BASE}/{endpoint}", timeout=10)
+        if response.status_code == 200:
+            return response.json()
+        return None
+    except Exception as e:
+        print(f"Error fetching region data: {e}")
+        return None
+
+
+def normalize_name(name):
+    """Normalize name for comparison"""
+    if not name:
+        return ""
+    return name.upper().strip().replace(".", "").replace(" ", "")
+
+
+def find_best_match(search_name, items, key='name'):
+    """Find best matching item by name (fuzzy matching)"""
+    if not search_name or not items:
+        return None
+    
+    search_norm = normalize_name(search_name)
+    
+    # Try exact match first
+    for item in items:
+        if normalize_name(item.get(key, '')) == search_norm:
+            return item
+    
+    # Try contains match
+    for item in items:
+        item_norm = normalize_name(item.get(key, ''))
+        if search_norm in item_norm or item_norm in search_norm:
+            return item
+    
+    return None
+
+
+@app.route('/api/provinces')
+def get_provinces():
+    """Get all provinces"""
+    data = fetch_region_data("provinces.json")
+    if data:
+        return jsonify(data)
+    return jsonify({'data': []}), 500
+
+
+@app.route('/api/regencies/<province_code>')
+def get_regencies(province_code):
+    """Get cities/regencies by province code"""
+    data = fetch_region_data(f"regencies/{province_code}.json")
+    if data:
+        return jsonify(data)
+    return jsonify({'data': []}), 500
+
+
+@app.route('/api/districts/<regency_code>')
+def get_districts(regency_code):
+    """Get districts by regency code"""
+    data = fetch_region_data(f"districts/{regency_code}.json")
+    if data:
+        return jsonify(data)
+    return jsonify({'data': []}), 500
+
+
+@app.route('/api/villages/<district_code>')
+def get_villages(district_code):
+    """Get villages by district code"""
+    data = fetch_region_data(f"villages/{district_code}.json")
+    if data:
+        return jsonify(data)
+    return jsonify({'data': []}), 500
+
+
+@app.route('/api/validate-region', methods=['POST'])
+def validate_region():
+    """Validate OCR region data against official database"""
+    try:
+        ocr_data = request.json
+        result = {
+            'provinsi': {'valid': False, 'code': None, 'suggestion': None},
+            'kabupaten_kota': {'valid': False, 'code': None, 'suggestion': None},
+            'kecamatan': {'valid': False, 'code': None, 'suggestion': None},
+            'kel_desa': {'valid': False, 'code': None, 'suggestion': None}
+        }
+        
+        # Validate province
+        provinces_data = fetch_region_data("provinces.json")
+        if provinces_data and 'data' in provinces_data:
+            match = find_best_match(ocr_data.get('provinsi'), provinces_data['data'])
+            if match:
+                result['provinsi'] = {'valid': True, 'code': match['code'], 'suggestion': match['name']}
+                
+                # Validate regency
+                regencies_data = fetch_region_data(f"regencies/{match['code']}.json")
+                if regencies_data and 'data' in regencies_data:
+                    reg_match = find_best_match(ocr_data.get('kabupaten_kota'), regencies_data['data'])
+                    if reg_match:
+                        result['kabupaten_kota'] = {'valid': True, 'code': reg_match['code'], 'suggestion': reg_match['name']}
+                        
+                        # Validate district
+                        districts_data = fetch_region_data(f"districts/{reg_match['code']}.json")
+                        if districts_data and 'data' in districts_data:
+                            dist_match = find_best_match(ocr_data.get('kecamatan'), districts_data['data'])
+                            if dist_match:
+                                result['kecamatan'] = {'valid': True, 'code': dist_match['code'], 'suggestion': dist_match['name']}
+                                
+                                # Validate village
+                                villages_data = fetch_region_data(f"villages/{dist_match['code']}.json")
+                                if villages_data and 'data' in villages_data:
+                                    vil_match = find_best_match(ocr_data.get('kel_desa'), villages_data['data'])
+                                    if vil_match:
+                                        result['kel_desa'] = {'valid': True, 'code': vil_match['code'], 'suggestion': vil_match['name']}
+        
+        return jsonify({'success': True, 'validation': result})
+        
+    except Exception as e:
+        return jsonify({'success': False, 'error': str(e)}), 500
+
+
+@app.route('/health')
+def health():
+    """Health check endpoint"""
+    return jsonify({'status': 'ok'})
+
+
+if __name__ == '__main__':
+    print("="*50)
+    print("OCR KTP/KK Application")
+    print("="*50)
+    print("Membuka: http://localhost:5000")
+    print("Tekan Ctrl+C untuk berhenti")
+    print("="*50)
+    
+    app.run(host='0.0.0.0', port=5000, debug=True)
--- a/kk.png
+++ b/kk.png
--- a/kk_extractor.py
+++ b/kk_extractor.py
@@ -0,0 +1,235 @@
+"""
+KK (Kartu Keluarga) Field Extractor
+Ekstraksi data terstruktur dari hasil OCR KK Indonesia
+"""
+
+import re
+from typing import Dict, Optional, List
+
+
+class KKExtractor:
+    """Ekstrak field dari hasil OCR Kartu Keluarga"""
+    
+    def __init__(self):
+        pass
+    
+    def extract(self, ocr_results: List[Dict]) -> Dict:
+        """
+        Ekstrak field KK dari hasil OCR
+        
+        Args:
+            ocr_results: List hasil dari OCREngine.extract_text()
+            
+        Returns:
+            Dict dengan field KK
+        """
+        all_text = '\n'.join([r['text'] for r in ocr_results])
+        
+        result = {
+            'no_kk': None,
+            'nama_kepala_keluarga': None,
+            'alamat': None,
+            'rt_rw': None,
+            'kel_desa': None,
+            'kecamatan': None,
+            'kabupaten_kota': None,
+            'provinsi': None,
+            'kode_pos': None,
+            'anggota_keluarga': [],
+        }
+        
+        # Ekstrak No KK (16 digit)
+        kk_match = re.search(r'\b(\d{16})\b', all_text)
+        if kk_match:
+            result['no_kk'] = kk_match.group(1)
+        
+        # Track untuk deteksi tabel anggota
+        in_table = False
+        table_start_y = None
+        
+        for i, ocr in enumerate(ocr_results):
+            text = ocr['text'].strip()
+            text_lower = text.lower()
+            y_pos = ocr.get('y_center', 0)
+            
+            # Provinsi
+            if 'provinsi' in text_lower and result['provinsi'] is None:
+                result['provinsi'] = self._extract_value(text, 'provinsi')
+            
+            # Kabupaten/Kota
+            if ('kabupaten' in text_lower or 'kota' in text_lower) and result['kabupaten_kota'] is None:
+                val = self._extract_value(text, 'kabupaten') or self._extract_value(text, 'kota')
+                if val:
+                    result['kabupaten_kota'] = val
+                else:
+                    result['kabupaten_kota'] = text
+            
+            # Kecamatan
+            if 'kecamatan' in text_lower and result['kecamatan'] is None:
+                result['kecamatan'] = self._extract_value(text, 'kecamatan')
+            
+            # Kelurahan/Desa
+            if ('kelurahan' in text_lower or 'desa' in text_lower) and result['kel_desa'] is None:
+                result['kel_desa'] = self._extract_value(text, 'kelurahan') or self._extract_value(text, 'desa')
+            
+            # No. KK dengan label
+            if 'no' in text_lower and ('kk' in text_lower or 'kartu' in text_lower):
+                # Cari 16 digit di text ini atau text berikutnya
+                match = re.search(r'(\d{16})', text)
+                if match:
+                    result['no_kk'] = match.group(1)
+                elif i + 1 < len(ocr_results):
+                    next_text = ocr_results[i + 1]['text']
+                    match = re.search(r'(\d{16})', next_text)
+                    if match:
+                        result['no_kk'] = match.group(1)
+            
+            # Nama Kepala Keluarga
+            if 'kepala' in text_lower and 'keluarga' in text_lower:
+                result['nama_kepala_keluarga'] = self._extract_value(text, 'keluarga')
+                if not result['nama_kepala_keluarga'] and i + 1 < len(ocr_results):
+                    # Nama mungkin di baris berikutnya
+                    next_text = ocr_results[i + 1]['text'].strip()
+                    if not any(kw in next_text.lower() for kw in ['alamat', 'rt', 'rw', 'provinsi']):
+                        result['nama_kepala_keluarga'] = next_text
+            
+            # Alamat
+            if 'alamat' in text_lower and result['alamat'] is None:
+                result['alamat'] = self._extract_value(text, 'alamat')
+            
+            # RT/RW
+            rt_rw_match = re.search(r'rt\s*/?\s*rw\s*[:\s]*(\d+)\s*/\s*(\d+)', text_lower)
+            if rt_rw_match:
+                result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}"
+            
+            # Kode Pos
+            if 'kode' in text_lower and 'pos' in text_lower:
+                match = re.search(r'(\d{5})', text)
+                if match:
+                    result['kode_pos'] = match.group(1)
+            
+            # Deteksi header tabel anggota keluarga
+            if self._is_table_header(text_lower):
+                in_table = True
+                table_start_y = y_pos
+                continue
+            
+            # Ekstrak anggota keluarga dari tabel
+            if in_table and table_start_y:
+                member = self._extract_member(text, ocr_results, i)
+                if member:
+                    result['anggota_keluarga'].append(member)
+        
+        # Post-processing
+        result = self._post_process(result)
+        
+        return result
+    
+    def _extract_value(self, text: str, field: str) -> Optional[str]:
+        """Ekstrak nilai setelah label field"""
+        patterns = [
+            rf'{field}[a-z]*\s*:\s*(.+)',
+            rf'{field}[a-z]*\s+(.+)',
+        ]
+        
+        for pattern in patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                value = match.group(1).strip()
+                value = re.sub(r'^[:\s]+', '', value)
+                if value:
+                    return value
+        return None
+    
+    def _is_table_header(self, text: str) -> bool:
+        """Cek apakah teks adalah header tabel anggota"""
+        header_keywords = ['no', 'nama lengkap', 'nik', 'jenis kelamin', 'hubungan']
+        count = sum(1 for kw in header_keywords if kw in text)
+        return count >= 2
+    
+    def _extract_member(self, text: str, all_results: List[Dict], current_idx: int) -> Optional[Dict]:
+        """Ekstrak data anggota keluarga dari baris tabel"""
+        # Cari NIK di text
+        nik_match = re.search(r'\b(\d{16})\b', text)
+        if not nik_match:
+            return None
+        
+        member = {
+            'nik': nik_match.group(1),
+            'nama': None,
+            'jenis_kelamin': None,
+            'tempat_lahir': None,
+            'tanggal_lahir': None,
+            'hubungan': None,
+        }
+        
+        # Cari teks di sekitar yang mungkin nama atau info lain
+        text_parts = text.split()
+        
+        # Deteksi jenis kelamin
+        if 'laki' in text.lower() or ' l ' in f' {text.lower()} ':
+            member['jenis_kelamin'] = 'LAKI-LAKI'
+        elif 'perempuan' in text.lower() or ' p ' in f' {text.lower()} ':
+            member['jenis_kelamin'] = 'PEREMPUAN'
+        
+        # Deteksi hubungan keluarga
+        hubungan_keywords = {
+            'kepala': 'KEPALA KELUARGA',
+            'istri': 'ISTRI',
+            'suami': 'SUAMI', 
+            'anak': 'ANAK',
+            'menantu': 'MENANTU',
+            'cucu': 'CUCU',
+            'orang tua': 'ORANG TUA',
+            'mertua': 'MERTUA',
+        }
+        
+        for keyword, value in hubungan_keywords.items():
+            if keyword in text.lower():
+                member['hubungan'] = value
+                break
+        
+        return member
+    
+    def _post_process(self, result: Dict) -> Dict:
+        """Post-processing hasil ekstraksi"""
+        # Validasi No KK
+        if result['no_kk'] and not re.match(r'^\d{16}$', result['no_kk']):
+            cleaned = re.sub(r'\D', '', result['no_kk'])
+            if len(cleaned) == 16:
+                result['no_kk'] = cleaned
+            else:
+                result['no_kk'] = None
+        
+        # Uppercase field teks
+        for field in ['nama_kepala_keluarga', 'alamat', 'kel_desa', 'kecamatan', 
+                      'kabupaten_kota', 'provinsi']:
+            if result[field]:
+                result[field] = result[field].upper()
+        
+        return result
+
+
+if __name__ == "__main__":
+    # Test
+    sample_ocr = [
+        {'text': 'KARTU KELUARGA', 'y_center': 10},
+        {'text': 'No. 3204012345678901', 'y_center': 30},
+        {'text': 'Nama Kepala Keluarga : JOHN DOE', 'y_center': 50},
+        {'text': 'Alamat : JL. MERDEKA NO. 123', 'y_center': 70},
+        {'text': 'RT/RW : 001/002', 'y_center': 90},
+        {'text': 'Desa/Kelurahan : SUKAMAJU', 'y_center': 110},
+        {'text': 'Kecamatan : SUKASARI', 'y_center': 130},
+        {'text': 'Kabupaten/Kota : BANDUNG', 'y_center': 150},
+        {'text': 'Provinsi : JAWA BARAT', 'y_center': 170},
+        {'text': 'Kode Pos : 40154', 'y_center': 190},
+    ]
+    
+    extractor = KKExtractor()
+    result = extractor.extract(sample_ocr)
+    
+    for key, value in result.items():
+        if key != 'anggota_keluarga':
+            print(f"{key}: {value}")
+    
+    print(f"\nAnggota Keluarga: {len(result['anggota_keluarga'])} orang")
--- a/ktp.jpeg
+++ b/ktp.jpeg
--- a/ktp_extractor.py
+++ b/ktp_extractor.py
@@ -0,0 +1,602 @@
+"""
+KTP Field Extractor
+Ekstraksi data terstruktur dari hasil OCR KTP Indonesia
+Mendukung berbagai format output OCR (full-width colon, standard colon, tanpa colon)
+"""
+
+import re
+from typing import Dict, Optional, List
+
+
+class KTPExtractor:
+    """Ekstrak field dari hasil OCR KTP"""
+    
+    # Pattern colon yang berbeda-beda (standard, full-width, dll)
+    COLON_PATTERN = r'[:\：]'
+    
+    # Keywords untuk jenis kelamin
+    MALE_KEYWORDS = ['laki', 'pria', 'male']
+    FEMALE_KEYWORDS = ['perempuan', 'wanita', 'female']
+    
+    # Agama yang valid
+    AGAMA_LIST = ['islam', 'kristen', 'katolik', 'hindu', 'budha', 'buddha', 'konghucu']
+    
+    # Pekerjaan umum
+    PEKERJAAN_LIST = ['pelajar', 'mahasiswa', 'pegawai', 'swasta', 'pns', 'wiraswasta', 
+                      'buruh', 'petani', 'nelayan', 'karyawan', 'ibu rumah tangga', 
+                      'tidak bekerja', 'lainnya', 'mengurus rumah tangga']
+    
+    # KTP Zone Template (normalized coordinates: x_min, y_min, x_max, y_max)
+    # Based on standard KTP layout
+    ZONES = {
+        'header_provinsi':  (0.15, 0.00, 0.85, 0.07),  # PROVINSI header
+        'header_kabupaten': (0.15, 0.05, 0.85, 0.13),  # KABUPATEN header
+        'nik':              (0.02, 0.10, 0.70, 0.22),  # NIK area
+        'nama':             (0.02, 0.18, 0.70, 0.28),  # Nama area
+        'ttl':              (0.02, 0.25, 0.70, 0.36),  # Tempat/Tgl Lahir
+        'jenis_kelamin':    (0.02, 0.33, 0.45, 0.42),  # Jenis Kelamin (left)
+        'gol_darah':        (0.40, 0.33, 0.70, 0.42),  # Gol Darah (right of jenis)
+        'alamat':           (0.02, 0.38, 0.70, 0.50),  # Alamat
+        'rt_rw':            (0.02, 0.46, 0.70, 0.54),  # RT/RW
+        'kel_desa':         (0.02, 0.51, 0.70, 0.60),  # Kel/Desa
+        'kecamatan':        (0.02, 0.57, 0.70, 0.66),  # Kecamatan
+        'agama':            (0.02, 0.63, 0.70, 0.72),  # Agama
+        'status':           (0.02, 0.69, 0.70, 0.78),  # Status Perkawinan
+        'pekerjaan':        (0.02, 0.75, 0.70, 0.84),  # Pekerjaan
+        'wni':              (0.02, 0.81, 0.70, 0.90),  # Kewarganegaraan
+        'berlaku':          (0.02, 0.87, 0.70, 0.96),  # Berlaku Hingga
+        'foto':             (0.68, 0.10, 0.98, 0.55),  # Foto (right side)
+        'penerbitan':       (0.65, 0.58, 0.98, 0.98),  # Tempat & Tanggal penerbitan
+    }
+    
+    def __init__(self):
+        self.image_width = 0
+        self.image_height = 0
+    
+    def _get_zone(self, x_center: float, y_center: float, img_width: int, img_height: int) -> Optional[str]:
+        """Determine which zone a text belongs to based on normalized coordinates"""
+        if img_width == 0 or img_height == 0:
+            return None
+        
+        # Normalize coordinates
+        x_norm = x_center / img_width
+        y_norm = y_center / img_height
+        
+        for zone_name, (x_min, y_min, x_max, y_max) in self.ZONES.items():
+            if x_min <= x_norm <= x_max and y_min <= y_norm <= y_max:
+                return zone_name
+        return None
+    
+    def _extract_value_from_text(self, text: str) -> str:
+        """Extract value part from label:value text"""
+        # Split by colon (standard or full-width)
+        parts = re.split(r'[：:]', text, 1)
+        if len(parts) > 1:
+            return parts[1].strip()
+        return text.strip()
+    
+    def _detect_image_size(self, ocr_results: List[Dict]) -> tuple:
+        """Detect image dimensions from bounding boxes"""
+        max_x, max_y = 0, 0
+        for r in ocr_results:
+            bbox = r.get('bbox', [])
+            if bbox and len(bbox) >= 4:
+                for point in bbox:
+                    if len(point) >= 2:
+                        max_x = max(max_x, point[0])
+                        max_y = max(max_y, point[1])
+        # Add some margin
+        return (int(max_x * 1.05), int(max_y * 1.05)) if max_x > 0 else (1000, 640)
+    
+    def _extract_by_zones(self, zone_texts: Dict[str, List[str]], result: Dict):
+        """Extract fields based on zone assignments"""
+        
+        # PROVINSI from header
+        if 'header_provinsi' in zone_texts:
+            for text in zone_texts['header_provinsi']:
+                if 'provinsi' in text.lower():
+                    val = re.sub(r'(?i)provinsi\s*', '', text).strip()
+                    if val:
+                        result['provinsi'] = val.upper()
+                    break
+        
+        # KABUPATEN/KOTA from header
+        if 'header_kabupaten' in zone_texts:
+            for text in zone_texts['header_kabupaten']:
+                text_lower = text.lower()
+                if 'kabupaten' in text_lower or 'kota' in text_lower:
+                    val = re.sub(r'(?i)(kabupaten|kota)\s*', '', text).strip()
+                    if val:
+                        result['kabupaten_kota'] = val.upper()
+                    else:
+                        result['kabupaten_kota'] = text.upper()
+                    break
+        
+        # NAMA from nama zone (skip label line)
+        if 'nama' in zone_texts:
+            for text in zone_texts['nama']:
+                text_lower = text.lower()
+                if 'nama' not in text_lower and len(text) > 2:
+                    result['nama'] = text.upper()
+                    break
+                elif 'nama' in text_lower:
+                    val = self._extract_value_from_text(text)
+                    if val and 'nama' not in val.lower():
+                        result['nama'] = val.upper()
+        
+        # TTL from ttl zone
+        if 'ttl' in zone_texts:
+            for text in zone_texts['ttl']:
+                if 'tempat' in text.lower() or 'lahir' in text.lower():
+                    val = self._extract_value_from_text(text)
+                    if val:
+                        self._parse_ttl(val, result)
+                        break
+        
+        # JENIS KELAMIN
+        if 'jenis_kelamin' in zone_texts:
+            for text in zone_texts['jenis_kelamin']:
+                text_lower = text.lower()
+                if 'laki' in text_lower:
+                    result['jenis_kelamin'] = 'LAKI-LAKI'
+                    break
+                elif 'perempuan' in text_lower:
+                    result['jenis_kelamin'] = 'PEREMPUAN'
+                    break
+        
+        # GOL DARAH
+        if 'gol_darah' in zone_texts:
+            for text in zone_texts['gol_darah']:
+                gol_match = re.search(r'([ABO]{1,2}[+\-]?)', text, re.IGNORECASE)
+                if gol_match:
+                    result['gol_darah'] = gol_match.group(1).upper()
+                    break
+        
+        # ALAMAT
+        if 'alamat' in zone_texts:
+            for text in zone_texts['alamat']:
+                if 'alamat' not in text.lower() or len(zone_texts['alamat']) == 1:
+                    val = self._extract_value_from_text(text) if 'alamat' in text.lower() else text
+                    if val and 'alamat' not in val.lower():
+                        result['alamat'] = val.upper()
+                        break
+        
+        # PENERBITAN area (tempat & tanggal dalam satu zona)
+        if 'penerbitan' in zone_texts:
+            for text in zone_texts['penerbitan']:
+                # Look for date
+                date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})', text)
+                if date_match and result['tanggal_penerbitan'] is None:
+                    result['tanggal_penerbitan'] = date_match.group(1)
+    
+    def extract(self, ocr_results: List[Dict]) -> Dict[str, Optional[str]]:
+        """
+        Ekstrak field KTP dari hasil OCR dengan template-based zone detection
+        
+        Args:
+            ocr_results: List hasil dari OCREngine.extract_text()
+            
+        Returns:
+            Dict dengan field KTP
+        """
+        result = {
+            'nik': None,
+            'nama': None,
+            'tempat_lahir': None,
+            'tanggal_lahir': None,
+            'jenis_kelamin': None,
+            'gol_darah': None,
+            'alamat': None,
+            'rt_rw': None,
+            'kel_desa': None,
+            'kecamatan': None,
+            'agama': None,
+            'status_perkawinan': None,
+            'pekerjaan': None,
+            'kewarganegaraan': None,
+            'berlaku_hingga': None,
+            'provinsi': None,
+            'kabupaten_kota': None,
+            'tanggal_penerbitan': None,
+        }
+        
+        # Detect image dimensions from bounding boxes
+        img_width, img_height = self._detect_image_size(ocr_results)
+        
+        # Assign zones to each OCR result
+        zone_texts = {}  # zone_name -> list of texts
+        for r in ocr_results:
+            x_center = r.get('x_center', 0)
+            y_center = r.get('y_center', 0)
+            zone = self._get_zone(x_center, y_center, img_width, img_height)
+            if zone:
+                if zone not in zone_texts:
+                    zone_texts[zone] = []
+                zone_texts[zone].append(r['text'])
+        
+        # Debug: print zone assignments
+        print("\n[DEBUG KTPExtractor] Zone assignments:")
+        for zone, texts in zone_texts.items():
+            print(f"  {zone}: {texts}")
+        
+        # Extract fields using zone-based approach
+        self._extract_by_zones(zone_texts, result)
+        
+        # Gabungkan semua teks untuk fallback pattern matching
+        texts = [r['text'].strip() for r in ocr_results]
+        all_text = '\n'.join(texts)
+        
+        # Ekstrak NIK (16 digit) - bisa ada di mana saja
+        nik_match = re.search(r'\b(\d{16})\b', all_text)
+        if nik_match:
+            result['nik'] = nik_match.group(1)
+            print(f"  -> NIK found: {result['nik']}")
+        
+        # Fallback: Parse line by line for fields not found by zone
+        for i, text in enumerate(texts):
+            text_lower = text.lower()
+            
+            # Normalize colons
+            text_normalized = re.sub(self.COLON_PATTERN, ':', text)
+            text_norm_lower = text_normalized.lower()
+            
+            # ===== PROVINSI =====
+            if 'provinsi' in text_lower and result['provinsi'] is None:
+                val = self._extract_after_label(text_normalized, 'provinsi')
+                if val:
+                    result['provinsi'] = val.upper()
+                elif i + 1 < len(texts) and 'provinsi' not in texts[i+1].lower():
+                    # Mungkin value di line berikutnya
+                    result['provinsi'] = texts[i+1].strip().upper()
+            
+            # ===== KABUPATEN/KOTA =====
+            if ('kabupaten' in text_lower or 'kota' in text_lower or 'jakarta' in text_lower) and result['kabupaten_kota'] is None:
+                if 'provinsi' not in text_lower:  # Bukan bagian dari provinsi
+                    val = self._extract_after_label(text_normalized, 'kabupaten|kota')
+                    if val:
+                        result['kabupaten_kota'] = val.upper()
+                    else:
+                        result['kabupaten_kota'] = text.strip().upper()
+            
+            # ===== NAMA =====
+            if 'nama' in text_lower and result['nama'] is None:
+                val = self._extract_after_label(text_normalized, 'nama')
+                if val and len(val) > 2:
+                    result['nama'] = val.upper()
+                elif i + 1 < len(texts):
+                    # Nama di line berikutnya
+                    next_text = texts[i+1].strip()
+                    if len(next_text) > 2 and not any(kw in next_text.lower() for kw in ['tempat', 'lahir', 'jenis']):
+                        result['nama'] = next_text.upper()
+            
+            # ===== TEMPAT/TANGGAL LAHIR =====
+            # Match "Tempat/Tgl Lahir" or "Tempat Lahir" or similar labels
+            if 'tempat' in text_lower or ('lahir' in text_lower and 'berlaku' not in text_lower):
+                if result['tempat_lahir'] is None or result['tanggal_lahir'] is None:
+                    # Extract value after label using full-width or standard colon
+                    ttl = self._extract_after_label(text_normalized, r'tempat[/\s]*tgl[/\s]*lahir|tempat[/\s]*lahir|lahir')
+                    if ttl:
+                        self._parse_ttl(ttl, result)
+                    elif '：' in text or ':' in text:
+                        # Value is after colon but _extract_after_label didn't catch it
+                        parts = re.split(r'[：:]', text, 1)
+                        if len(parts) > 1 and parts[1].strip():
+                            self._parse_ttl(parts[1].strip(), result)
+                    elif i + 1 < len(texts):
+                        # TTL di line berikutnya
+                        next_text = texts[i+1].strip()
+                        if not any(kw in next_text.lower() for kw in ['jenis', 'kelamin', 'alamat', 'gol']):
+                            self._parse_ttl(next_text, result)
+            
+            # ===== JENIS KELAMIN =====
+            if any(kw in text_lower for kw in self.MALE_KEYWORDS):
+                if result['jenis_kelamin'] is None:
+                    result['jenis_kelamin'] = 'LAKI-LAKI'
+            elif any(kw in text_lower for kw in self.FEMALE_KEYWORDS):
+                if result['jenis_kelamin'] is None:
+                    result['jenis_kelamin'] = 'PEREMPUAN'
+            
+            # ===== GOLONGAN DARAH =====
+            if 'darah' in text_lower or 'gol.' in text_lower:
+                # Try to find blood type on same line
+                gol_match = re.search(r'(?:gol|darah)[.\s:：]*([ABO]{1,2}[+\-]?)', text, re.IGNORECASE)
+                if gol_match and result['gol_darah'] is None:
+                    result['gol_darah'] = gol_match.group(1).upper()
+                elif result['gol_darah'] is None and i + 1 < len(texts):
+                    # Blood type might be on next line (real KTP pattern)
+                    next_text = texts[i+1].strip()
+                    if re.match(r'^[ABO]{1,2}[+\-]?$', next_text, re.IGNORECASE):
+                        result['gol_darah'] = next_text.upper()
+            # Standalone blood type (e.g., just "O" or "A+" on its own line)
+            if result['gol_darah'] is None:
+                if re.match(r'^[ABO]{1,2}[+\-]?$', text.strip(), re.IGNORECASE) and len(text.strip()) <= 3:
+                    result['gol_darah'] = text.strip().upper()
+            
+            # ===== ALAMAT =====
+            if 'alamat' in text_lower and result['alamat'] is None:
+                val = self._extract_after_label(text_normalized, 'alamat')
+                if val:
+                    result['alamat'] = val.upper()
+                elif i + 1 < len(texts):
+                    result['alamat'] = texts[i+1].strip().upper()
+            
+            # ===== RT/RW =====
+            rt_rw_match = re.search(r'(\d{3})\s*/\s*(\d{3})', text)
+            if rt_rw_match:
+                result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}"
+            
+            # ===== KELURAHAN/DESA =====
+            if ('kel' in text_lower or 'desa' in text_lower) and 'kelamin' not in text_lower:
+                if result['kel_desa'] is None:
+                    val = self._extract_after_label(text_normalized, 'kel|desa')
+                    if val:
+                        result['kel_desa'] = val.upper()
+                    elif i + 1 < len(texts):
+                        result['kel_desa'] = texts[i+1].strip().upper()
+            
+            # ===== KECAMATAN =====
+            if 'kecamatan' in text_lower or ('kec' in text_lower and 'kelamin' not in text_lower):
+                if result['kecamatan'] is None:
+                    val = self._extract_after_label(text_normalized, 'kecamatan|kec')
+                    if val:
+                        result['kecamatan'] = val.upper()
+                    elif i + 1 < len(texts):
+                        # Value on next line (real KTP pattern)
+                        next_text = texts[i+1].strip()
+                        if len(next_text) > 2 and not any(kw in next_text.lower() for kw in ['agama', 'status', 'pekerjaan']):
+                            result['kecamatan'] = next_text.upper()
+            
+            # ===== AGAMA =====
+            if 'agama' in text_lower:
+                val = self._extract_after_label(text_normalized, 'agama')
+                if val and result['agama'] is None:
+                    result['agama'] = val.upper()
+                elif result['agama'] is None and i + 1 < len(texts):
+                    # Value on next line (real KTP pattern)
+                    next_text = texts[i+1].strip().upper()
+                    if next_text in ['ISLAM', 'KRISTEN', 'KATOLIK', 'HINDU', 'BUDHA', 'BUDDHA', 'KONGHUCU']:
+                        result['agama'] = next_text
+            else:
+                # Check if line contains only agama name
+                for agama in self.AGAMA_LIST:
+                    if agama in text_lower and len(text) < 20:
+                        if result['agama'] is None:
+                            result['agama'] = text.strip().upper()
+                            break
+            
+            # ===== STATUS PERKAWINAN =====
+            if 'kawin' in text_lower:
+                if result['status_perkawinan'] is None:
+                    val = self._extract_after_label(text_normalized, 'status.*kawin|perkawinan')
+                    if val:
+                        result['status_perkawinan'] = val.upper()
+                    elif 'belum' in text_lower:
+                        result['status_perkawinan'] = 'BELUM KAWIN'
+                    elif 'kawin' in text_lower and 'cerai' not in text_lower:
+                        result['status_perkawinan'] = 'KAWIN'
+                    elif 'cerai hidup' in text_lower:
+                        result['status_perkawinan'] = 'CERAI HIDUP'
+                    elif 'cerai mati' in text_lower:
+                        result['status_perkawinan'] = 'CERAI MATI'
+            
+            # ===== PEKERJAAN =====
+            if 'pekerjaan' in text_lower:
+                val = self._extract_after_label(text_normalized, 'pekerjaan')
+                if val and result['pekerjaan'] is None:
+                    result['pekerjaan'] = val.upper()
+                elif result['pekerjaan'] is None and i + 1 < len(texts):
+                    # Value on next line (real KTP pattern)
+                    next_text = texts[i+1].strip()
+                    if len(next_text) > 2 and 'kewarganegaraan' not in next_text.lower():
+                        result['pekerjaan'] = next_text.upper()
+            else:
+                # Check if line contains pekerjaan keyword
+                for pekerjaan in self.PEKERJAAN_LIST:
+                    if pekerjaan in text_lower and len(text) < 30:
+                        if result['pekerjaan'] is None:
+                            result['pekerjaan'] = text.strip().upper()
+                            break
+            
+            # ===== KEWARGANEGARAAN =====
+            if 'wni' in text_lower:
+                result['kewarganegaraan'] = 'WNI'
+            elif 'wna' in text_lower:
+                result['kewarganegaraan'] = 'WNA'
+            elif 'warga' in text_lower and result['kewarganegaraan'] is None:
+                val = self._extract_after_label(text_normalized, 'kewarganegaraan|warga')
+                if val:
+                    result['kewarganegaraan'] = val.upper()
+            
+            # ===== BERLAKU HINGGA =====
+            if 'berlaku' in text_lower or 'seumur' in text_lower:
+                if result['berlaku_hingga'] is None:
+                    if 'seumur' in text_lower or 'hidup' in text_lower:
+                        result['berlaku_hingga'] = 'SEUMUR HIDUP'
+                    else:
+                        val = self._extract_after_label(text_normalized, 'berlaku')
+                        if val:
+                            result['berlaku_hingga'] = val.upper()
+            
+            # ===== TANGGAL PENERBITAN (biasanya format DD-MM-YYYY di akhir) =====
+            # Look for date that is NOT tanggal lahir (different date)
+            if result['tanggal_penerbitan'] is None:
+                # Match date format at end of text or standalone date
+                date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})$', text.strip())
+                if date_match:
+                    found_date = date_match.group(1)
+                    # Make sure it's not the same as tanggal_lahir
+                    if result['tanggal_lahir'] != found_date:
+                        # Likely penerbitan if after berlaku_hingga was found
+                        if result['berlaku_hingga'] or i > len(texts) * 0.7:
+                            result['tanggal_penerbitan'] = found_date
+        
+        # Post-processing
+        result = self._post_process(result)
+        
+        return result
+    
+    def _extract_after_label(self, text: str, label_pattern: str) -> Optional[str]:
+        """Ekstrak nilai setelah label (supports various separators)"""
+        patterns = [
+            rf'(?:{label_pattern})\s*:\s*(.+)',  # label: value
+            rf'(?:{label_pattern})\s+([A-Z0-9].+)',  # label VALUE (uppercase start)
+        ]
+        
+        for pattern in patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                value = match.group(1).strip()
+                # Remove trailing colon or label fragment
+                value = re.sub(r'^[:\s]+', '', value)
+                value = re.sub(r'\s*:\s*$', '', value)
+                if value and len(value) > 1:
+                    return value
+        
+        return None
+    
+    def _parse_ttl(self, ttl_text: str, result: Dict):
+        """Parse tempat/tanggal lahir dari text"""
+        ttl_text = ttl_text.strip()
+        
+        # Normalize dates where OCR missed dashes:
+        # "05 08 1978" -> "05-08-1978"
+        # "05 08-1978" -> "05-08-1978"  
+        # "05-08 1978" -> "05-08-1978"
+        ttl_text = re.sub(r'(\d{2})[\s]+(\d{2})[\s]+(\d{4})', r'\1-\2-\3', ttl_text)
+        ttl_text = re.sub(r'(\d{2})[\s]+(\d{2})[-/](\d{4})', r'\1-\2-\3', ttl_text)
+        ttl_text = re.sub(r'(\d{2})[-/](\d{2})[\s]+(\d{4})', r'\1-\2-\3', ttl_text)
+        
+        # Handle 8-digit date without separator: "05081978" -> "05-08-1978"
+        date_8digit = re.search(r'(\d{8})', ttl_text)
+        if date_8digit:
+            d = date_8digit.group(1)
+            formatted = f"{d[:2]}-{d[2:4]}-{d[4:]}"
+            ttl_text = ttl_text.replace(d, formatted)
+        
+        # Handle merged city+date like "JAKARTA05-08-1978" - add space before digits
+        ttl_text = re.sub(r'([A-Z])(\d{2}[-/])', r'\1 \2', ttl_text, flags=re.IGNORECASE)
+        
+        # Format: "TEMPAT, DD-MM-YYYY" atau "TEMPAT DD-MM-YYYY"
+        date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})', ttl_text)
+        if date_match:
+            result['tanggal_lahir'] = date_match.group(1)
+            # Tempat adalah bagian sebelum tanggal
+            place = ttl_text[:date_match.start()].strip(' ,:-/')
+            # Clean up label remnants
+            place = re.sub(r'^(tempat|tgl|lahir|：|:)[/\s:：]*', '', place, flags=re.IGNORECASE).strip()
+            if place and len(place) > 2:
+                result['tempat_lahir'] = place.upper()
+        else:
+            # Coba split by comma
+            parts = ttl_text.split(',')
+            if len(parts) >= 2:
+                result['tempat_lahir'] = parts[0].strip().upper()
+                result['tanggal_lahir'] = parts[1].strip()
+            elif len(parts) == 1 and len(ttl_text) > 2:
+                result['tempat_lahir'] = ttl_text.upper()
+    
+    def _post_process(self, result: Dict) -> Dict:
+        """Post-processing hasil ekstraksi"""
+        # Validasi NIK (harus 16 digit)
+        if result['nik'] and not re.match(r'^\d{16}$', result['nik']):
+            cleaned = re.sub(r'\D', '', result['nik'])
+            if len(cleaned) == 16:
+                result['nik'] = cleaned
+            else:
+                result['nik'] = None
+        
+        # Clean all string values - remove leading colons and extra whitespace
+        for field in result:
+            if result[field] and isinstance(result[field], str):
+                val = result[field]
+                # Remove leading colons (standard and full-width)
+                val = re.sub(r'^[\s:：]+', '', val)
+                # Remove trailing colons
+                val = re.sub(r'[\s:：]+$', '', val)
+                # Remove double spaces
+                val = re.sub(r'\s+', ' ', val)
+                result[field] = val.strip()
+        
+        # Bersihkan label dari values
+        for field in ['nama', 'alamat', 'tempat_lahir', 'kel_desa', 'kecamatan', 'agama', 'pekerjaan']:
+            if result[field]:
+                # Remove common labels yang ter-capture
+                result[field] = re.sub(
+                    r'^(NAMA|ALAMAT|TEMPAT|LAHIR|TGL|KEL|DESA|KELURAHAN|KECAMATAN|KEC|AGAMA|PEKERJAAN|STATUS)[\s:：]*', 
+                    '', result[field], flags=re.IGNORECASE
+                ).strip()
+        
+        # Fix status perkawinan yang masih mengandung label
+        if result['status_perkawinan']:
+            sp = result['status_perkawinan']
+            sp = re.sub(r'^(STATUS|PERKAWINAN)[\s:：]*', '', sp, flags=re.IGNORECASE).strip()
+            result['status_perkawinan'] = sp
+        
+        # Fix berlaku hingga
+        if result['berlaku_hingga']:
+            bh = result['berlaku_hingga']
+            bh = re.sub(r'^(BERLAKU|HINGGA)[\s:：]*', '', bh, flags=re.IGNORECASE).strip()
+            if bh.upper() == 'HIDUP' or 'SEUMUR' in bh.upper():
+                result['berlaku_hingga'] = 'SEUMUR HIDUP'
+            else:
+                result['berlaku_hingga'] = bh
+        
+        # Fix merged kabupaten/kota names (e.g., JAKARTASELATAN -> JAKARTA SELATAN)
+        if result['kabupaten_kota']:
+            kk = result['kabupaten_kota']
+            # Add space before directional words
+            kk = re.sub(r'(JAKARTA|BANDUNG|SURABAYA|SEMARANG|MEDAN|BEKASI|TANGERANG|DEPOK|BOGOR)(SELATAN|UTARA|BARAT|TIMUR|PUSAT|TENGAH)', 
+                        r'\1 \2', kk, flags=re.IGNORECASE)
+            # Common merged patterns
+            kk = re.sub(r'(KOTA|KABUPATEN)([A-Z])', r'\1 \2', kk, flags=re.IGNORECASE)
+            result['kabupaten_kota'] = kk.upper()
+        
+        # Fix merged provinsi names
+        if result['provinsi']:
+            prov = result['provinsi']
+            prov = re.sub(r'(DKI|DI)(JAKARTA|YOGYAKARTA)', r'\1 \2', prov, flags=re.IGNORECASE)
+            prov = re.sub(r'(JAWA|KALIMANTAN|SULAWESI|SUMATERA|NUSA TENGGARA)(BARAT|TIMUR|TENGAH|SELATAN|UTARA)', 
+                          r'\1 \2', prov, flags=re.IGNORECASE)
+            result['provinsi'] = prov.upper()
+        
+        # Fix merged alamat/address (e.g., JLKECAPIV -> JL KECAPI V)
+        if result['alamat']:
+            alamat = result['alamat']
+            # Add space after common street prefixes
+            alamat = re.sub(r'^(JL|JLN|JALAN|GG|GANG|NO|BLOK)([A-Z])', r'\1 \2', alamat, flags=re.IGNORECASE)
+            # Add space before Roman numerals at the end (I, II, III, IV, V, VI, VII, VIII, IX, X)
+            alamat = re.sub(r'([A-Z])([IVX]+)$', r'\1 \2', alamat, flags=re.IGNORECASE)
+            # Add space before single digits/numbers at end
+            alamat = re.sub(r'([A-Z])(\d+)$', r'\1 \2', alamat, flags=re.IGNORECASE)
+            # Fix common patterns: "NO123" -> "NO 123", "BLOKA" -> "BLOK A"
+            alamat = re.sub(r'\b(NO|BLOK)(\d+|[A-Z])\b', r'\1 \2', alamat, flags=re.IGNORECASE)
+            result['alamat'] = alamat.upper()
+        
+        return result
+
+
+if __name__ == "__main__":
+    # Test
+    sample_ocr = [
+        {'text': 'PROVINSI JAWA BARAT'},
+        {'text': 'KABUPATEN BANDUNG'},
+        {'text': 'NIK : 3204012345678901'},
+        {'text': 'Nama : JOHN DOE'},
+        {'text': 'Tempat/Tgl Lahir : BANDUNG, 01-01-1990'},
+        {'text': 'Jenis Kelamin : LAKI-LAKI'},
+        {'text': 'Alamat : JL. MERDEKA NO. 123'},
+        {'text': 'RT/RW : 001/002'},
+        {'text': 'Kel/Desa : SUKAMAJU'},
+        {'text': 'Kecamatan : SUKASARI'},
+        {'text': 'Agama : ISLAM'},
+        {'text': 'Status Perkawinan : BELUM KAWIN'},
+        {'text': 'Pekerjaan : KARYAWAN SWASTA'},
+        {'text': 'Kewarganegaraan : WNI'},
+        {'text': 'Berlaku Hingga : SEUMUR HIDUP'},
+    ]
+    
+    extractor = KTPExtractor()
+    result = extractor.extract(sample_ocr)
+    
+    for key, value in result.items():
+        print(f"{key}: {value}")
--- a/ocr_engine.py
+++ b/ocr_engine.py
@@ -0,0 +1,153 @@
+"""
+OCR Engine menggunakan PaddleOCR 3.x
+Untuk membaca teks dari gambar dokumen Indonesia (KTP, KK)
+"""
+
+from paddleocr import PaddleOCR
+import cv2
+import numpy as np
+from PIL import Image
+
+
+class OCREngine:
+    def __init__(self):
+        """Inisialisasi PaddleOCR 3.x dengan konfigurasi untuk dokumen Indonesia"""
+        self.ocr = PaddleOCR(
+            use_doc_orientation_classify=True,   # Deteksi rotasi (0°/90°/180°/270°)
+            use_doc_unwarping=True,              # Koreksi perspektif (trapezium → persegi)
+            use_textline_orientation=True,       # Orientasi per baris teks
+        )
+    
+    def preprocess_image(self, image_path: str) -> np.ndarray:
+        """
+        Preprocessing gambar untuk hasil OCR lebih baik
+        - Resize jika terlalu besar
+        - Enhance contrast
+        """
+        img = cv2.imread(image_path)
+        if img is None:
+            raise ValueError(f"Tidak dapat membaca gambar: {image_path}")
+        
+        # Resize jika terlalu besar (max 2000px)
+        max_dim = 2000
+        height, width = img.shape[:2]
+        if max(height, width) > max_dim:
+            scale = max_dim / max(height, width)
+            img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
+        
+        # Convert ke grayscale untuk preprocessing
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        
+        # Enhance contrast menggunakan CLAHE
+        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+        enhanced = clahe.apply(gray)
+        
+        # Convert kembali ke BGR untuk PaddleOCR
+        enhanced_bgr = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)
+        
+        return enhanced_bgr
+    
+    def extract_text(self, image_path: str, preprocess: bool = False) -> list:
+        """
+        Ekstraksi teks dari gambar menggunakan PaddleOCR 3.x API
+        
+        Args:
+            image_path: Path ke file gambar
+            preprocess: Apakah melakukan preprocessing
+            
+        Returns:
+            List of dict dengan keys: 'text', 'confidence', 'bbox'
+        """
+        try:
+            # Jalankan OCR dengan API baru (predict)
+            result = self.ocr.predict(input=image_path)
+            
+            if not result:
+                return []
+            
+            extracted = []
+            
+            # Parse hasil dari PaddleOCR 3.x
+            for res in result:
+                # Akses data dari result object
+                if hasattr(res, 'rec_texts') and hasattr(res, 'rec_scores') and hasattr(res, 'dt_polys'):
+                    texts = res.rec_texts if res.rec_texts else []
+                    scores = res.rec_scores if res.rec_scores else []
+                    polys = res.dt_polys if res.dt_polys else []
+                    
+                    for i, text in enumerate(texts):
+                        confidence = scores[i] if i < len(scores) else 0.0
+                        bbox = polys[i].tolist() if i < len(polys) and hasattr(polys[i], 'tolist') else []
+                        
+                        # Calculate center for sorting
+                        if bbox and len(bbox) >= 4:
+                            y_center = (bbox[0][1] + bbox[2][1]) / 2
+                            x_center = (bbox[0][0] + bbox[2][0]) / 2
+                        else:
+                            y_center = 0
+                            x_center = 0
+                        
+                        extracted.append({
+                            'text': text,
+                            'confidence': float(confidence),
+                            'bbox': bbox,
+                            'y_center': y_center,
+                            'x_center': x_center,
+                        })
+                # Fallback: try dict-like access
+                elif hasattr(res, '__getitem__'):
+                    try:
+                        texts = res.get('rec_texts', res.get('texts', []))
+                        scores = res.get('rec_scores', res.get('scores', []))
+                        
+                        for i, text in enumerate(texts):
+                            confidence = scores[i] if i < len(scores) else 0.0
+                            extracted.append({
+                                'text': text,
+                                'confidence': float(confidence),
+                                'bbox': [],
+                                'y_center': i * 10,  # Simple ordering fallback
+                                'x_center': 0,
+                            })
+                    except Exception:
+                        pass
+            
+            # Sort berdasarkan posisi Y (atas ke bawah)
+            if extracted:
+                extracted.sort(key=lambda x: (x['y_center'], x['x_center']))
+            
+            return extracted
+            
+        except Exception as e:
+            print(f"Error OCR: {e}")
+            import traceback
+            traceback.print_exc()
+            return []
+    
+    def get_raw_text(self, image_path: str) -> str:
+        """
+        Mendapatkan semua teks dari gambar sebagai string
+        """
+        results = self.extract_text(image_path)
+        return '\n'.join([r['text'] for r in results])
+
+
+# Singleton instance
+_ocr_engine = None
+
+def get_ocr_engine() -> OCREngine:
+    """Get singleton OCR engine instance"""
+    global _ocr_engine
+    if _ocr_engine is None:
+        _ocr_engine = OCREngine()
+    return _ocr_engine
+
+
+if __name__ == "__main__":
+    # Test OCR
+    import sys
+    if len(sys.argv) > 1:
+        engine = get_ocr_engine()
+        results = engine.extract_text(sys.argv[1])
+        for r in results:
+            print(f"[{r['confidence']:.2f}] {r['text']}")
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+paddlepaddle
+paddleocr
+flask
+pillow
+opencv-python
--- a/static/style.css
+++ b/static/style.css
@@ -0,0 +1,538 @@
+/* OCR KTP/KK - Modern Dark Theme */
+
+:root {
+    --bg-primary: #0f0f1a;
+    --bg-secondary: #1a1a2e;
+    --bg-tertiary: #252540;
+    --accent-primary: #6366f1;
+    --accent-secondary: #818cf8;
+    --accent-gradient: linear-gradient(135deg, #6366f1 0%, #a855f7 100%);
+    --text-primary: #f1f5f9;
+    --text-secondary: #94a3b8;
+    --text-muted: #64748b;
+    --success: #22c55e;
+    --error: #ef4444;
+    --warning: #f59e0b;
+    --border: #334155;
+    --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.3);
+    --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.4);
+    --radius: 12px;
+    --radius-lg: 16px;
+}
+
+* {
+    margin: 0;
+    padding: 0;
+    box-sizing: border-box;
+}
+
+body {
+    font-family: 'Segoe UI', system-ui, -apple-system, sans-serif;
+    background: var(--bg-primary);
+    color: var(--text-primary);
+    min-height: 100vh;
+    line-height: 1.6;
+}
+
+.container {
+    max-width: 800px;
+    margin: 0 auto;
+    padding: 2rem 1rem;
+}
+
+/* Header */
+header {
+    text-align: center;
+    margin-bottom: 2rem;
+}
+
+header h1 {
+    font-size: 2.5rem;
+    font-weight: 700;
+    background: var(--accent-gradient);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+    margin-bottom: 0.5rem;
+}
+
+.subtitle {
+    color: var(--text-secondary);
+    font-size: 1.1rem;
+}
+
+/* Upload Section */
+.upload-section {
+    background: var(--bg-secondary);
+    border-radius: var(--radius-lg);
+    padding: 2rem;
+    box-shadow: var(--shadow-lg);
+    margin-bottom: 2rem;
+}
+
+/* Document Type Selector */
+.doc-type-selector {
+    display: flex;
+    gap: 1rem;
+    margin-bottom: 1.5rem;
+}
+
+.doc-btn {
+    flex: 1;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 0.5rem;
+    padding: 1rem;
+    background: var(--bg-tertiary);
+    border: 2px solid transparent;
+    border-radius: var(--radius);
+    color: var(--text-secondary);
+    font-size: 1rem;
+    font-weight: 600;
+    cursor: pointer;
+    transition: all 0.3s ease;
+}
+
+.doc-btn:hover {
+    background: var(--bg-primary);
+    color: var(--text-primary);
+}
+
+.doc-btn.active {
+    background: var(--accent-gradient);
+    color: white;
+    border-color: var(--accent-secondary);
+}
+
+.doc-btn .icon {
+    font-size: 1.5rem;
+}
+
+/* Dropzone */
+.dropzone {
+    border: 2px dashed var(--border);
+    border-radius: var(--radius);
+    padding: 3rem 2rem;
+    text-align: center;
+    cursor: pointer;
+    transition: all 0.3s ease;
+    background: var(--bg-tertiary);
+    position: relative;
+    overflow: hidden;
+}
+
+.dropzone:hover,
+.dropzone.dragover {
+    border-color: var(--accent-primary);
+    background: rgba(99, 102, 241, 0.1);
+}
+
+.dropzone-content {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    gap: 0.5rem;
+}
+
+.upload-icon {
+    font-size: 4rem;
+    margin-bottom: 0.5rem;
+}
+
+.dropzone p {
+    color: var(--text-secondary);
+}
+
+.dropzone .hint {
+    color: var(--text-muted);
+    font-size: 0.875rem;
+}
+
+.file-btn {
+    display: inline-block;
+    padding: 0.75rem 1.5rem;
+    background: var(--accent-gradient);
+    color: white;
+    border-radius: var(--radius);
+    font-weight: 600;
+    cursor: pointer;
+    margin: 0.5rem 0;
+    transition: transform 0.2s ease;
+}
+
+.file-btn:hover {
+    transform: scale(1.05);
+}
+
+.file-types {
+    font-size: 0.75rem;
+    color: var(--text-muted);
+}
+
+.preview-image {
+    max-width: 100%;
+    max-height: 400px;
+    border-radius: var(--radius);
+    cursor: pointer;
+}
+
+/* Process Button */
+.process-btn {
+    width: 100%;
+    padding: 1rem;
+    margin-top: 1.5rem;
+    background: var(--accent-gradient);
+    border: none;
+    border-radius: var(--radius);
+    color: white;
+    font-size: 1.1rem;
+    font-weight: 600;
+    cursor: pointer;
+    transition: all 0.3s ease;
+    box-shadow: var(--shadow);
+}
+
+.process-btn:hover:not(:disabled) {
+    transform: translateY(-2px);
+    box-shadow: var(--shadow-lg);
+}
+
+.process-btn:disabled {
+    opacity: 0.5;
+    cursor: not-allowed;
+}
+
+/* Results Section */
+.results-section {
+    background: var(--bg-secondary);
+    border-radius: var(--radius-lg);
+    padding: 2rem;
+    box-shadow: var(--shadow-lg);
+    animation: slideUp 0.3s ease;
+}
+
+@keyframes slideUp {
+    from {
+        opacity: 0;
+        transform: translateY(20px);
+    }
+
+    to {
+        opacity: 1;
+        transform: translateY(0);
+    }
+}
+
+.results-header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    margin-bottom: 1.5rem;
+    flex-wrap: wrap;
+    gap: 1rem;
+}
+
+.results-header h2 {
+    font-size: 1.5rem;
+}
+
+.results-actions {
+    display: flex;
+    gap: 0.5rem;
+}
+
+.action-btn {
+    padding: 0.5rem 1rem;
+    background: var(--bg-tertiary);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    color: var(--text-primary);
+    font-size: 0.875rem;
+    cursor: pointer;
+    transition: all 0.2s ease;
+}
+
+.action-btn:hover {
+    background: var(--accent-primary);
+    border-color: var(--accent-primary);
+}
+
+.action-btn.secondary {
+    background: transparent;
+}
+
+/* Results Table */
+.results-table {
+    width: 100%;
+    border-collapse: collapse;
+}
+
+.results-table th,
+.results-table td {
+    padding: 0.875rem 1rem;
+    text-align: left;
+    border-bottom: 1px solid var(--border);
+}
+
+.results-table th {
+    background: var(--bg-tertiary);
+    color: var(--text-secondary);
+    font-weight: 600;
+    font-size: 0.875rem;
+    text-transform: uppercase;
+    letter-spacing: 0.05em;
+}
+
+.results-table th:first-child {
+    border-radius: var(--radius) 0 0 0;
+}
+
+.results-table th:last-child {
+    border-radius: 0 var(--radius) 0 0;
+}
+
+.field-label {
+    color: var(--text-secondary);
+    font-weight: 500;
+    width: 40%;
+}
+
+.field-value {
+    color: var(--text-primary);
+    font-weight: 600;
+}
+
+.results-table tr:hover {
+    background: rgba(99, 102, 241, 0.05);
+}
+
+/* Editable Fields */
+.editable-field {
+    width: 100%;
+    padding: 0.5rem 0.75rem;
+    background: var(--bg-tertiary);
+    border: 1px solid var(--border);
+    border-radius: 6px;
+    color: var(--text-primary);
+    font-size: 0.95rem;
+    font-weight: 600;
+    font-family: inherit;
+    transition: all 0.2s ease;
+}
+
+.editable-field:focus {
+    outline: none;
+    border-color: var(--accent-primary);
+    background: var(--bg-secondary);
+    box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2);
+}
+
+.editable-field::placeholder {
+    color: var(--text-muted);
+    font-weight: 400;
+}
+
+/* Region Dropdown Styles */
+.region-field-wrapper {
+    display: flex;
+    gap: 0.5rem;
+    align-items: center;
+}
+
+.region-field-wrapper input,
+.region-field-wrapper select {
+    flex: 1;
+}
+
+.region-dropdown {
+    width: 100%;
+    padding: 0.5rem 0.75rem;
+    background: var(--bg-tertiary);
+    border: 1px solid var(--border);
+    border-radius: 6px;
+    color: var(--text-primary);
+    font-size: 0.95rem;
+    font-family: inherit;
+    cursor: pointer;
+}
+
+.region-dropdown:focus {
+    outline: none;
+    border-color: var(--accent-primary);
+}
+
+.dropdown-toggle {
+    padding: 0.5rem 0.75rem;
+    background: var(--bg-tertiary);
+    border: 1px solid var(--border);
+    border-radius: 6px;
+    color: var(--text-secondary);
+    cursor: pointer;
+    transition: all 0.2s ease;
+    flex-shrink: 0;
+}
+
+.dropdown-toggle:hover {
+    background: var(--accent-primary);
+    color: white;
+}
+
+.dropdown-toggle.confirmed {
+    background: var(--success);
+    color: white;
+    border-color: var(--success);
+}
+
+/* Validation Indicators */
+.validation-status {
+    margin-left: 0.5rem;
+    font-size: 0.875rem;
+}
+
+.validation-status.valid-field {
+    color: var(--success);
+}
+
+.validation-status.invalid-field {
+    color: var(--warning);
+}
+
+.editable-field.valid-field {
+    border-color: var(--success);
+}
+
+.editable-field.invalid-field {
+    border-color: var(--warning);
+}
+
+.suggestion-text {
+    font-size: 0.75rem;
+    color: var(--text-muted);
+    margin-top: 0.25rem;
+    font-style: italic;
+}
+
+/* Raw Text Section */
+.raw-text-section {
+    margin-top: 1.5rem;
+    padding-top: 1.5rem;
+    border-top: 1px solid var(--border);
+}
+
+.raw-text-section h3 {
+    font-size: 1rem;
+    color: var(--text-secondary);
+    margin-bottom: 1rem;
+}
+
+.raw-text-section pre {
+    background: var(--bg-primary);
+    padding: 1rem;
+    border-radius: var(--radius);
+    font-family: 'Consolas', monospace;
+    font-size: 0.875rem;
+    color: var(--text-secondary);
+    white-space: pre-wrap;
+    word-wrap: break-word;
+    max-height: 300px;
+    overflow-y: auto;
+}
+
+/* Error Section */
+.error-section {
+    margin-top: 1rem;
+}
+
+.error-content {
+    background: rgba(239, 68, 68, 0.1);
+    border: 1px solid var(--error);
+    border-radius: var(--radius);
+    padding: 1rem;
+    display: flex;
+    align-items: center;
+    gap: 0.75rem;
+}
+
+.error-icon {
+    font-size: 1.5rem;
+}
+
+.error-content p {
+    color: var(--error);
+}
+
+/* Footer */
+footer {
+    text-align: center;
+    margin-top: 2rem;
+    padding-top: 1rem;
+    border-top: 1px solid var(--border);
+}
+
+footer p {
+    color: var(--text-muted);
+    font-size: 0.875rem;
+}
+
+footer a {
+    color: var(--accent-secondary);
+    text-decoration: none;
+}
+
+footer a:hover {
+    text-decoration: underline;
+}
+
+/* Responsive */
+@media (max-width: 600px) {
+    .container {
+        padding: 1rem;
+    }
+
+    header h1 {
+        font-size: 2rem;
+    }
+
+    .upload-section,
+    .results-section {
+        padding: 1.5rem;
+    }
+
+    .doc-type-selector {
+        flex-direction: column;
+    }
+
+    .results-header {
+        flex-direction: column;
+        align-items: flex-start;
+    }
+
+    .results-actions {
+        width: 100%;
+        justify-content: flex-start;
+    }
+
+    .field-label {
+        width: 45%;
+    }
+}
+
+/* Scrollbar */
+::-webkit-scrollbar {
+    width: 8px;
+    height: 8px;
+}
+
+::-webkit-scrollbar-track {
+    background: var(--bg-tertiary);
+}
+
+::-webkit-scrollbar-thumb {
+    background: var(--border);
+    border-radius: 4px;
+}
+
+::-webkit-scrollbar-thumb:hover {
+    background: var(--text-muted);
+}
--- a/templates/index.html
+++ b/templates/index.html
@@ -0,0 +1,570 @@
+<!DOCTYPE html>
+<html lang="id">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>OCR KTP/KK - Pembaca Dokumen Indonesia</title>
+    <link rel="stylesheet" href="/static/style.css">
+</head>
+
+<body>
+    <div class="container">
+        <header>
+            <h1>📄 OCR KTP/KK</h1>
+            <p class="subtitle">Pembaca Dokumen Indonesia Offline</p>
+        </header>
+
+        <main>
+            <!-- Upload Section -->
+            <section class="upload-section">
+                <div class="doc-type-selector">
+                    <button class="doc-btn active" data-type="ktp">
+                        <span class="icon">🪪</span>
+                        KTP
+                    </button>
+                    <button class="doc-btn" data-type="kk">
+                        <span class="icon">👨‍👩‍👧‍👦</span>
+                        Kartu Keluarga
+                    </button>
+                </div>
+
+                <div class="dropzone" id="dropzone">
+                    <div class="dropzone-content">
+                        <div class="upload-icon">📷</div>
+                        <p>Drag & drop gambar di sini</p>
+                        <p class="hint">atau</p>
+                        <label class="file-btn">
+                            Pilih File
+                            <input type="file" id="fileInput" accept="image/*" hidden>
+                        </label>
+                        <p class="file-types">PNG, JPG, JPEG, BMP, WEBP (max 16MB)</p>
+                    </div>
+                    <img id="preview" class="preview-image" style="display: none;">
+                </div>
+
+                <button id="processBtn" class="process-btn" disabled>
+                    <span class="btn-text">🔍 Proses OCR</span>
+                    <span class="btn-loading" style="display: none;">⏳ Memproses...</span>
+                </button>
+            </section>
+
+            <!-- Results Section -->
+            <section class="results-section" id="resultsSection" style="display: none;">
+                <div class="results-header">
+                    <h2>📋 Hasil Ekstraksi</h2>
+                    <div class="results-actions">
+                        <button class="action-btn" id="copyBtn" title="Copy JSON">📋 Copy</button>
+                        <button class="action-btn" id="exportBtn" title="Export JSON">💾 Export</button>
+                        <button class="action-btn secondary" id="toggleRaw">📝 Raw Text</button>
+                    </div>
+                </div>
+
+                <div class="results-content">
+                    <table class="results-table" id="resultsTable">
+                        <thead>
+                            <tr>
+                                <th>Field</th>
+                                <th>Nilai</th>
+                            </tr>
+                        </thead>
+                        <tbody id="resultsBody">
+                        </tbody>
+                    </table>
+
+                    <div class="raw-text-section" id="rawTextSection" style="display: none;">
+                        <h3>Raw OCR Text</h3>
+                        <pre id="rawText"></pre>
+                    </div>
+                </div>
+            </section>
+
+            <!-- Error Section -->
+            <section class="error-section" id="errorSection" style="display: none;">
+                <div class="error-content">
+                    <span class="error-icon">⚠️</span>
+                    <p id="errorMessage"></p>
+                </div>
+            </section>
+        </main>
+
+        <footer>
+            <p>OCR menggunakan <a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">PaddleOCR</a> • Data
+                diproses secara lokal</p>
+        </footer>
+    </div>
+
+    <script>
+        // State
+        let selectedFile = null;
+        let docType = 'ktp';
+        let extractedData = null;
+
+        // Elements
+        const dropzone = document.getElementById('dropzone');
+        const fileInput = document.getElementById('fileInput');
+        const preview = document.getElementById('preview');
+        const processBtn = document.getElementById('processBtn');
+        const resultsSection = document.getElementById('resultsSection');
+        const resultsBody = document.getElementById('resultsBody');
+        const rawText = document.getElementById('rawText');
+        const rawTextSection = document.getElementById('rawTextSection');
+        const errorSection = document.getElementById('errorSection');
+        const errorMessage = document.getElementById('errorMessage');
+        const docBtns = document.querySelectorAll('.doc-btn');
+
+        // Field labels untuk display
+        const fieldLabels = {
+            // KTP
+            'nik': 'NIK',
+            'nama': 'Nama',
+            'tempat_lahir': 'Tempat Lahir',
+            'tanggal_lahir': 'Tanggal Lahir',
+            'jenis_kelamin': 'Jenis Kelamin',
+            'gol_darah': 'Gol. Darah',
+            'alamat': 'Alamat',
+            'rt_rw': 'RT/RW',
+            'kel_desa': 'Kel/Desa',
+            'kecamatan': 'Kecamatan',
+            'agama': 'Agama',
+            'status_perkawinan': 'Status Perkawinan',
+            'pekerjaan': 'Pekerjaan',
+            'kewarganegaraan': 'Kewarganegaraan',
+            'berlaku_hingga': 'Berlaku Hingga',
+            'provinsi': 'Provinsi',
+            'kabupaten_kota': 'Kabupaten/Kota',
+            'tanggal_penerbitan': 'Tanggal Penerbitan',
+            // KK
+            'no_kk': 'No. KK',
+            'nama_kepala_keluarga': 'Kepala Keluarga',
+            'kode_pos': 'Kode Pos',
+            'anggota_keluarga': 'Jumlah Anggota'
+        };
+
+        // Doc type selection
+        docBtns.forEach(btn => {
+            btn.addEventListener('click', () => {
+                docBtns.forEach(b => b.classList.remove('active'));
+                btn.classList.add('active');
+                docType = btn.dataset.type;
+            });
+        });
+
+        // Drag & drop
+        dropzone.addEventListener('dragover', (e) => {
+            e.preventDefault();
+            dropzone.classList.add('dragover');
+        });
+
+        dropzone.addEventListener('dragleave', () => {
+            dropzone.classList.remove('dragover');
+        });
+
+        dropzone.addEventListener('drop', (e) => {
+            e.preventDefault();
+            dropzone.classList.remove('dragover');
+            const files = e.dataTransfer.files;
+            if (files.length > 0) {
+                handleFile(files[0]);
+            }
+        });
+
+        // File input
+        fileInput.addEventListener('change', (e) => {
+            if (e.target.files.length > 0) {
+                handleFile(e.target.files[0]);
+            }
+        });
+
+        // Click on dropzone
+        dropzone.addEventListener('click', (e) => {
+            if (e.target === dropzone || e.target.closest('.dropzone-content')) {
+                fileInput.click();
+            }
+        });
+
+        function handleFile(file) {
+            if (!file.type.startsWith('image/')) {
+                showError('File harus berupa gambar');
+                return;
+            }
+
+            if (file.size > 16 * 1024 * 1024) {
+                showError('Ukuran file maksimal 16MB');
+                return;
+            }
+
+            selectedFile = file;
+
+            // Show preview
+            const reader = new FileReader();
+            reader.onload = (e) => {
+                preview.src = e.target.result;
+                preview.style.display = 'block';
+                dropzone.querySelector('.dropzone-content').style.display = 'none';
+            };
+            reader.readAsDataURL(file);
+
+            processBtn.disabled = false;
+            hideError();
+            resultsSection.style.display = 'none';
+        }
+
+        // Process button
+        processBtn.addEventListener('click', async () => {
+            if (!selectedFile) return;
+
+            const btnText = processBtn.querySelector('.btn-text');
+            const btnLoading = processBtn.querySelector('.btn-loading');
+
+            processBtn.disabled = true;
+            btnText.style.display = 'none';
+            btnLoading.style.display = 'inline';
+
+            try {
+                const formData = new FormData();
+                formData.append('file', selectedFile);
+                formData.append('doc_type', docType);
+
+                const response = await fetch('/upload', {
+                    method: 'POST',
+                    body: formData
+                });
+
+                const result = await response.json();
+
+                if (result.success) {
+                    extractedData = result.data;
+                    displayResults(result);
+                    hideError();
+                } else {
+                    showError(result.error);
+                    resultsSection.style.display = 'none';
+                }
+            } catch (error) {
+                showError('Terjadi kesalahan: ' + error.message);
+            } finally {
+                processBtn.disabled = false;
+                btnText.style.display = 'inline';
+                btnLoading.style.display = 'none';
+            }
+        });
+
+        // Region fields that use dropdowns - in hierarchical order
+        const regionFields = ['provinsi', 'kabupaten_kota', 'kecamatan', 'kel_desa'];
+        let regionData = {
+            provinces: [],
+            regencies: {},
+            districts: {},
+            villages: {}
+        };
+        let validationResult = null;
+
+        // Define field display order
+        const fieldOrder = [
+            // Location hierarchy first
+            'provinsi', 'kabupaten_kota', 'kecamatan', 'kel_desa',
+            // Identity
+            'nik', 'nama', 'tempat_lahir', 'tanggal_lahir', 'jenis_kelamin', 'gol_darah',
+            // Address
+            'alamat', 'rt_rw',
+            // Other info
+            'agama', 'status_perkawinan', 'pekerjaan', 'kewarganegaraan', 'berlaku_hingga',
+            // Issue date
+            'tanggal_penerbitan',
+            // KK specific
+            'no_kk', 'nama_kepala_keluarga', 'kode_pos', 'anggota_keluarga'
+        ];
+
+        async function displayResults(result) {
+            resultsBody.innerHTML = '';
+            const data = result.data;
+            extractedData = data;
+
+            // Validate region data first
+            await validateRegionData(data);
+
+            // Sort keys by fieldOrder
+            const sortedKeys = Object.keys(data).sort((a, b) => {
+                const indexA = fieldOrder.indexOf(a);
+                const indexB = fieldOrder.indexOf(b);
+                if (indexA === -1 && indexB === -1) return 0;
+                if (indexA === -1) return 1;
+                if (indexB === -1) return -1;
+                return indexA - indexB;
+            });
+
+            for (const key of sortedKeys) {
+                const value = data[key];
+                if (key === 'anggota_keluarga') {
+                    const count = Array.isArray(value) ? value.length : 0;
+                    addResultRow('Jumlah Anggota', count + ' orang', null, false);
+                } else if (regionFields.includes(key)) {
+                    // Region field with dropdown
+                    const label = fieldLabels[key] || key;
+                    await addRegionRow(label, value || '', key);
+                } else {
+                    const label = fieldLabels[key] || key;
+                    addResultRow(label, value || '', key, true);
+                }
+            }
+
+            rawText.textContent = result.raw_text;
+            resultsSection.style.display = 'block';
+            resultsSection.scrollIntoView({ behavior: 'smooth' });
+        }
+
+        async function validateRegionData(data) {
+            try {
+                const response = await fetch('/api/validate-region', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify(data)
+                });
+                const result = await response.json();
+                if (result.success) {
+                    validationResult = result.validation;
+                }
+            } catch (e) {
+                console.error('Validation error:', e);
+            }
+        }
+
+        async function addRegionRow(label, value, key) {
+            const row = document.createElement('tr');
+            const validation = validationResult?.[key];
+            const isValid = validation?.valid;
+            const suggestion = validation?.suggestion;
+
+            // Status indicator
+            const statusIcon = isValid ? '✓' : (value ? '⚠' : '');
+            const statusClass = isValid ? 'valid-field' : (value ? 'invalid-field' : '');
+
+            row.innerHTML = `
+                <td class="field-label">
+                    ${label}
+                    <span class="validation-status ${statusClass}">${statusIcon}</span>
+                </td>
+                <td class="field-value">
+                    <div class="region-field-wrapper">
+                        <input type="text" class="editable-field ${statusClass}" data-key="${key}" 
+                               value="${suggestion || value || ''}" placeholder="Ketik atau pilih...">
+                        <select class="region-dropdown" data-key="${key}" style="display: none;">
+                            <option value="">-- Pilih --</option>
+                        </select>
+                        <button type="button" class="dropdown-toggle" data-key="${key}" title="Pilih dari daftar">▼</button>
+                    </div>
+                    ${suggestion && suggestion !== value ? `<div class="suggestion-text">Saran: ${suggestion}</div>` : ''}
+                </td>
+            `;
+
+            const input = row.querySelector('input');
+            const select = row.querySelector('select');
+            const toggleBtn = row.querySelector('.dropdown-toggle');
+
+            // Input change
+            input.addEventListener('input', (e) => {
+                if (extractedData) {
+                    extractedData[key] = e.target.value;
+                }
+            });
+
+            // Toggle dropdown
+            toggleBtn.addEventListener('click', async () => {
+                if (select.style.display === 'none') {
+                    await loadDropdownOptions(key, select);
+                    select.style.display = 'block';
+                    input.style.display = 'none';
+                } else {
+                    select.style.display = 'none';
+                    input.style.display = 'block';
+                }
+            });
+
+            // Select change
+            select.addEventListener('change', (e) => {
+                const selectedOption = e.target.options[e.target.selectedIndex];
+                const selectedCode = selectedOption.value;
+                const selectedName = selectedOption.text !== '-- Pilih --' ? selectedOption.text : '';
+
+                input.value = selectedName;
+                if (extractedData) {
+                    extractedData[key] = selectedName;
+                }
+
+                // Update validation result with selected code for cascading
+                if (!validationResult) validationResult = {};
+                validationResult[key] = {
+                    valid: !!selectedCode,
+                    code: selectedCode,
+                    suggestion: selectedName
+                };
+
+                select.style.display = 'none';
+                input.style.display = 'block';
+
+                // Change toggle button to checkmark if valid selection
+                if (selectedCode) {
+                    toggleBtn.textContent = '✓';
+                    toggleBtn.classList.add('confirmed');
+                    input.classList.remove('invalid-field');
+                    input.classList.add('valid-field');
+                } else {
+                    toggleBtn.textContent = '▼';
+                    toggleBtn.classList.remove('confirmed');
+                }
+
+                // Clear dependent fields and their codes
+                clearDependentFields(key);
+            });
+
+            resultsBody.appendChild(row);
+        }
+
+        async function loadDropdownOptions(key, select) {
+            select.innerHTML = '<option value="">Loading...</option>';
+
+            try {
+                let data = [];
+
+                if (key === 'provinsi') {
+                    if (!regionData.provinces.length) {
+                        const res = await fetch('/api/provinces');
+                        const json = await res.json();
+                        regionData.provinces = json.data || [];
+                    }
+                    data = regionData.provinces;
+                } else if (key === 'kabupaten_kota') {
+                    const provCode = validationResult?.provinsi?.code;
+                    if (provCode) {
+                        if (!regionData.regencies[provCode]) {
+                            const res = await fetch(`/api/regencies/${provCode}`);
+                            const json = await res.json();
+                            regionData.regencies[provCode] = json.data || [];
+                        }
+                        data = regionData.regencies[provCode];
+                    }
+                } else if (key === 'kecamatan') {
+                    const regCode = validationResult?.kabupaten_kota?.code;
+                    if (regCode) {
+                        if (!regionData.districts[regCode]) {
+                            const res = await fetch(`/api/districts/${regCode}`);
+                            const json = await res.json();
+                            regionData.districts[regCode] = json.data || [];
+                        }
+                        data = regionData.districts[regCode];
+                    }
+                } else if (key === 'kel_desa') {
+                    const distCode = validationResult?.kecamatan?.code;
+                    if (distCode) {
+                        if (!regionData.villages[distCode]) {
+                            const res = await fetch(`/api/villages/${distCode}`);
+                            const json = await res.json();
+                            regionData.villages[distCode] = json.data || [];
+                        }
+                        data = regionData.villages[distCode];
+                    }
+                }
+
+                select.innerHTML = '<option value="">-- Pilih --</option>';
+                data.forEach(item => {
+                    const option = document.createElement('option');
+                    option.value = item.code;
+                    option.textContent = item.name;
+                    select.appendChild(option);
+                });
+            } catch (e) {
+                select.innerHTML = '<option value="">Error loading data</option>';
+            }
+        }
+
+        function clearDependentFields(key) {
+            const dependents = {
+                'provinsi': ['kabupaten_kota', 'kecamatan', 'kel_desa'],
+                'kabupaten_kota': ['kecamatan', 'kel_desa'],
+                'kecamatan': ['kel_desa']
+            };
+
+            (dependents[key] || []).forEach(depKey => {
+                const input = document.querySelector(`input[data-key="${depKey}"]`);
+                if (input) input.value = '';
+                if (extractedData) extractedData[depKey] = '';
+                // Clear validation code for cascading
+                if (validationResult && validationResult[depKey]) {
+                    validationResult[depKey] = { valid: false, code: null, suggestion: null };
+                }
+            });
+        }
+
+        function addResultRow(label, value, key, editable = true) {
+            const row = document.createElement('tr');
+            if (editable && key) {
+                row.innerHTML = `
+                    <td class="field-label">${label}</td>
+                    <td class="field-value">
+                        <input type="text" class="editable-field" data-key="${key}" value="${value || ''}" placeholder="Klik untuk edit...">
+                    </td>
+                `;
+                const input = row.querySelector('input');
+                input.addEventListener('input', (e) => {
+                    if (extractedData && key) {
+                        extractedData[key] = e.target.value;
+                    }
+                });
+            } else {
+                row.innerHTML = `
+                    <td class="field-label">${label}</td>
+                    <td class="field-value">${value || '-'}</td>
+                `;
+            }
+            resultsBody.appendChild(row);
+        }
+
+        // Toggle raw text
+        document.getElementById('toggleRaw').addEventListener('click', () => {
+            const isVisible = rawTextSection.style.display !== 'none';
+            rawTextSection.style.display = isVisible ? 'none' : 'block';
+        });
+
+        // Copy to clipboard
+        document.getElementById('copyBtn').addEventListener('click', () => {
+            if (extractedData) {
+                navigator.clipboard.writeText(JSON.stringify(extractedData, null, 2))
+                    .then(() => alert('Data berhasil disalin!'));
+            }
+        });
+
+        // Export JSON
+        document.getElementById('exportBtn').addEventListener('click', () => {
+            if (extractedData) {
+                const blob = new Blob([JSON.stringify(extractedData, null, 2)], { type: 'application/json' });
+                const url = URL.createObjectURL(blob);
+                const a = document.createElement('a');
+                a.href = url;
+                a.download = `${docType}_data.json`;
+                a.click();
+                URL.revokeObjectURL(url);
+            }
+        });
+
+        function showError(message) {
+            errorMessage.textContent = message;
+            errorSection.style.display = 'block';
+        }
+
+        function hideError() {
+            errorSection.style.display = 'none';
+        }
+
+        // Reset on new file selection
+        preview.addEventListener('click', () => {
+            preview.style.display = 'none';
+            dropzone.querySelector('.dropzone-content').style.display = 'flex';
+            selectedFile = null;
+            processBtn.disabled = true;
+            fileInput.value = '';
+        });
+    </script>
+</body>
+
+</html>