OCR dengan ZONA
This commit is contained in:
BIN
__pycache__/kk_extractor.cpython-313.pyc
Normal file
BIN
__pycache__/kk_extractor.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/ktp_extractor.cpython-313.pyc
Normal file
BIN
__pycache__/ktp_extractor.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/ocr_engine.cpython-313.pyc
Normal file
BIN
__pycache__/ocr_engine.cpython-313.pyc
Normal file
Binary file not shown.
253
app.py
Normal file
253
app.py
Normal file
@@ -0,0 +1,253 @@
|
|||||||
|
"""
|
||||||
|
Flask Web Server untuk OCR KTP/KK
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from flask import Flask, render_template, request, jsonify
|
||||||
|
from werkzeug.utils import secure_filename
|
||||||
|
|
||||||
|
from ocr_engine import get_ocr_engine
|
||||||
|
from ktp_extractor import KTPExtractor
|
||||||
|
from kk_extractor import KKExtractor
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
# Konfigurasi
|
||||||
|
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads')
|
||||||
|
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'bmp', 'webp'}
|
||||||
|
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max
|
||||||
|
|
||||||
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
||||||
|
app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
|
||||||
|
|
||||||
|
# Buat folder upload jika belum ada
|
||||||
|
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
||||||
|
|
||||||
|
# Inisialisasi extractors
|
||||||
|
ktp_extractor = KTPExtractor()
|
||||||
|
kk_extractor = KKExtractor()
|
||||||
|
|
||||||
|
|
||||||
|
def allowed_file(filename):
|
||||||
|
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
"""Halaman utama"""
|
||||||
|
return render_template('index.html')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/upload', methods=['POST'])
|
||||||
|
def upload_file():
|
||||||
|
"""Handle upload dan proses OCR"""
|
||||||
|
try:
|
||||||
|
# Cek file
|
||||||
|
if 'file' not in request.files:
|
||||||
|
return jsonify({'success': False, 'error': 'Tidak ada file yang diupload'}), 400
|
||||||
|
|
||||||
|
file = request.files['file']
|
||||||
|
doc_type = request.form.get('doc_type', 'ktp')
|
||||||
|
|
||||||
|
if file.filename == '':
|
||||||
|
return jsonify({'success': False, 'error': 'Nama file kosong'}), 400
|
||||||
|
|
||||||
|
if not allowed_file(file.filename):
|
||||||
|
return jsonify({'success': False, 'error': 'Format file tidak didukung. Gunakan PNG, JPG, JPEG, BMP, atau WEBP'}), 400
|
||||||
|
|
||||||
|
# Simpan file
|
||||||
|
filename = secure_filename(file.filename)
|
||||||
|
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
||||||
|
file.save(filepath)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Jalankan OCR
|
||||||
|
ocr_engine = get_ocr_engine()
|
||||||
|
ocr_results = ocr_engine.extract_text(filepath)
|
||||||
|
|
||||||
|
if not ocr_results:
|
||||||
|
return jsonify({
|
||||||
|
'success': False,
|
||||||
|
'error': 'Tidak dapat membaca teks dari gambar. Pastikan gambar jelas dan tidak blur.'
|
||||||
|
}), 400
|
||||||
|
|
||||||
|
# Ekstrak field berdasarkan jenis dokumen
|
||||||
|
if doc_type == 'ktp':
|
||||||
|
extracted = ktp_extractor.extract(ocr_results)
|
||||||
|
else:
|
||||||
|
extracted = kk_extractor.extract(ocr_results)
|
||||||
|
|
||||||
|
# Raw text untuk debugging
|
||||||
|
raw_text = '\n'.join([r['text'] for r in ocr_results])
|
||||||
|
|
||||||
|
# DEBUG: Print raw OCR results
|
||||||
|
print("\n" + "="*50)
|
||||||
|
print("DEBUG: Raw OCR Results")
|
||||||
|
print("="*50)
|
||||||
|
for i, r in enumerate(ocr_results):
|
||||||
|
print(f"[{i}] {r['text']}")
|
||||||
|
print("="*50 + "\n")
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
'success': True,
|
||||||
|
'doc_type': doc_type,
|
||||||
|
'data': extracted,
|
||||||
|
'raw_text': raw_text,
|
||||||
|
'ocr_count': len(ocr_results)
|
||||||
|
})
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Hapus file setelah proses (untuk keamanan data pribadi)
|
||||||
|
if os.path.exists(filepath):
|
||||||
|
os.remove(filepath)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({'success': False, 'error': str(e)}), 500
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================
|
||||||
|
# Region Data API (using wilayah.id)
|
||||||
|
# ============================================
|
||||||
|
import requests
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
|
WILAYAH_API_BASE = "https://wilayah.id/api"
|
||||||
|
|
||||||
|
@lru_cache(maxsize=100)
|
||||||
|
def fetch_region_data(endpoint):
|
||||||
|
"""Fetch region data with caching"""
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{WILAYAH_API_BASE}/{endpoint}", timeout=10)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching region data: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_name(name):
|
||||||
|
"""Normalize name for comparison"""
|
||||||
|
if not name:
|
||||||
|
return ""
|
||||||
|
return name.upper().strip().replace(".", "").replace(" ", "")
|
||||||
|
|
||||||
|
|
||||||
|
def find_best_match(search_name, items, key='name'):
|
||||||
|
"""Find best matching item by name (fuzzy matching)"""
|
||||||
|
if not search_name or not items:
|
||||||
|
return None
|
||||||
|
|
||||||
|
search_norm = normalize_name(search_name)
|
||||||
|
|
||||||
|
# Try exact match first
|
||||||
|
for item in items:
|
||||||
|
if normalize_name(item.get(key, '')) == search_norm:
|
||||||
|
return item
|
||||||
|
|
||||||
|
# Try contains match
|
||||||
|
for item in items:
|
||||||
|
item_norm = normalize_name(item.get(key, ''))
|
||||||
|
if search_norm in item_norm or item_norm in search_norm:
|
||||||
|
return item
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/api/provinces')
|
||||||
|
def get_provinces():
|
||||||
|
"""Get all provinces"""
|
||||||
|
data = fetch_region_data("provinces.json")
|
||||||
|
if data:
|
||||||
|
return jsonify(data)
|
||||||
|
return jsonify({'data': []}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/api/regencies/<province_code>')
|
||||||
|
def get_regencies(province_code):
|
||||||
|
"""Get cities/regencies by province code"""
|
||||||
|
data = fetch_region_data(f"regencies/{province_code}.json")
|
||||||
|
if data:
|
||||||
|
return jsonify(data)
|
||||||
|
return jsonify({'data': []}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/api/districts/<regency_code>')
|
||||||
|
def get_districts(regency_code):
|
||||||
|
"""Get districts by regency code"""
|
||||||
|
data = fetch_region_data(f"districts/{regency_code}.json")
|
||||||
|
if data:
|
||||||
|
return jsonify(data)
|
||||||
|
return jsonify({'data': []}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/api/villages/<district_code>')
|
||||||
|
def get_villages(district_code):
|
||||||
|
"""Get villages by district code"""
|
||||||
|
data = fetch_region_data(f"villages/{district_code}.json")
|
||||||
|
if data:
|
||||||
|
return jsonify(data)
|
||||||
|
return jsonify({'data': []}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/api/validate-region', methods=['POST'])
|
||||||
|
def validate_region():
|
||||||
|
"""Validate OCR region data against official database"""
|
||||||
|
try:
|
||||||
|
ocr_data = request.json
|
||||||
|
result = {
|
||||||
|
'provinsi': {'valid': False, 'code': None, 'suggestion': None},
|
||||||
|
'kabupaten_kota': {'valid': False, 'code': None, 'suggestion': None},
|
||||||
|
'kecamatan': {'valid': False, 'code': None, 'suggestion': None},
|
||||||
|
'kel_desa': {'valid': False, 'code': None, 'suggestion': None}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Validate province
|
||||||
|
provinces_data = fetch_region_data("provinces.json")
|
||||||
|
if provinces_data and 'data' in provinces_data:
|
||||||
|
match = find_best_match(ocr_data.get('provinsi'), provinces_data['data'])
|
||||||
|
if match:
|
||||||
|
result['provinsi'] = {'valid': True, 'code': match['code'], 'suggestion': match['name']}
|
||||||
|
|
||||||
|
# Validate regency
|
||||||
|
regencies_data = fetch_region_data(f"regencies/{match['code']}.json")
|
||||||
|
if regencies_data and 'data' in regencies_data:
|
||||||
|
reg_match = find_best_match(ocr_data.get('kabupaten_kota'), regencies_data['data'])
|
||||||
|
if reg_match:
|
||||||
|
result['kabupaten_kota'] = {'valid': True, 'code': reg_match['code'], 'suggestion': reg_match['name']}
|
||||||
|
|
||||||
|
# Validate district
|
||||||
|
districts_data = fetch_region_data(f"districts/{reg_match['code']}.json")
|
||||||
|
if districts_data and 'data' in districts_data:
|
||||||
|
dist_match = find_best_match(ocr_data.get('kecamatan'), districts_data['data'])
|
||||||
|
if dist_match:
|
||||||
|
result['kecamatan'] = {'valid': True, 'code': dist_match['code'], 'suggestion': dist_match['name']}
|
||||||
|
|
||||||
|
# Validate village
|
||||||
|
villages_data = fetch_region_data(f"villages/{dist_match['code']}.json")
|
||||||
|
if villages_data and 'data' in villages_data:
|
||||||
|
vil_match = find_best_match(ocr_data.get('kel_desa'), villages_data['data'])
|
||||||
|
if vil_match:
|
||||||
|
result['kel_desa'] = {'valid': True, 'code': vil_match['code'], 'suggestion': vil_match['name']}
|
||||||
|
|
||||||
|
return jsonify({'success': True, 'validation': result})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({'success': False, 'error': str(e)}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/health')
|
||||||
|
def health():
|
||||||
|
"""Health check endpoint"""
|
||||||
|
return jsonify({'status': 'ok'})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print("="*50)
|
||||||
|
print("OCR KTP/KK Application")
|
||||||
|
print("="*50)
|
||||||
|
print("Membuka: http://localhost:5000")
|
||||||
|
print("Tekan Ctrl+C untuk berhenti")
|
||||||
|
print("="*50)
|
||||||
|
|
||||||
|
app.run(host='0.0.0.0', port=5000, debug=True)
|
||||||
235
kk_extractor.py
Normal file
235
kk_extractor.py
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
"""
|
||||||
|
KK (Kartu Keluarga) Field Extractor
|
||||||
|
Ekstraksi data terstruktur dari hasil OCR KK Indonesia
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Dict, Optional, List
|
||||||
|
|
||||||
|
|
||||||
|
class KKExtractor:
|
||||||
|
"""Ekstrak field dari hasil OCR Kartu Keluarga"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def extract(self, ocr_results: List[Dict]) -> Dict:
|
||||||
|
"""
|
||||||
|
Ekstrak field KK dari hasil OCR
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ocr_results: List hasil dari OCREngine.extract_text()
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict dengan field KK
|
||||||
|
"""
|
||||||
|
all_text = '\n'.join([r['text'] for r in ocr_results])
|
||||||
|
|
||||||
|
result = {
|
||||||
|
'no_kk': None,
|
||||||
|
'nama_kepala_keluarga': None,
|
||||||
|
'alamat': None,
|
||||||
|
'rt_rw': None,
|
||||||
|
'kel_desa': None,
|
||||||
|
'kecamatan': None,
|
||||||
|
'kabupaten_kota': None,
|
||||||
|
'provinsi': None,
|
||||||
|
'kode_pos': None,
|
||||||
|
'anggota_keluarga': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ekstrak No KK (16 digit)
|
||||||
|
kk_match = re.search(r'\b(\d{16})\b', all_text)
|
||||||
|
if kk_match:
|
||||||
|
result['no_kk'] = kk_match.group(1)
|
||||||
|
|
||||||
|
# Track untuk deteksi tabel anggota
|
||||||
|
in_table = False
|
||||||
|
table_start_y = None
|
||||||
|
|
||||||
|
for i, ocr in enumerate(ocr_results):
|
||||||
|
text = ocr['text'].strip()
|
||||||
|
text_lower = text.lower()
|
||||||
|
y_pos = ocr.get('y_center', 0)
|
||||||
|
|
||||||
|
# Provinsi
|
||||||
|
if 'provinsi' in text_lower and result['provinsi'] is None:
|
||||||
|
result['provinsi'] = self._extract_value(text, 'provinsi')
|
||||||
|
|
||||||
|
# Kabupaten/Kota
|
||||||
|
if ('kabupaten' in text_lower or 'kota' in text_lower) and result['kabupaten_kota'] is None:
|
||||||
|
val = self._extract_value(text, 'kabupaten') or self._extract_value(text, 'kota')
|
||||||
|
if val:
|
||||||
|
result['kabupaten_kota'] = val
|
||||||
|
else:
|
||||||
|
result['kabupaten_kota'] = text
|
||||||
|
|
||||||
|
# Kecamatan
|
||||||
|
if 'kecamatan' in text_lower and result['kecamatan'] is None:
|
||||||
|
result['kecamatan'] = self._extract_value(text, 'kecamatan')
|
||||||
|
|
||||||
|
# Kelurahan/Desa
|
||||||
|
if ('kelurahan' in text_lower or 'desa' in text_lower) and result['kel_desa'] is None:
|
||||||
|
result['kel_desa'] = self._extract_value(text, 'kelurahan') or self._extract_value(text, 'desa')
|
||||||
|
|
||||||
|
# No. KK dengan label
|
||||||
|
if 'no' in text_lower and ('kk' in text_lower or 'kartu' in text_lower):
|
||||||
|
# Cari 16 digit di text ini atau text berikutnya
|
||||||
|
match = re.search(r'(\d{16})', text)
|
||||||
|
if match:
|
||||||
|
result['no_kk'] = match.group(1)
|
||||||
|
elif i + 1 < len(ocr_results):
|
||||||
|
next_text = ocr_results[i + 1]['text']
|
||||||
|
match = re.search(r'(\d{16})', next_text)
|
||||||
|
if match:
|
||||||
|
result['no_kk'] = match.group(1)
|
||||||
|
|
||||||
|
# Nama Kepala Keluarga
|
||||||
|
if 'kepala' in text_lower and 'keluarga' in text_lower:
|
||||||
|
result['nama_kepala_keluarga'] = self._extract_value(text, 'keluarga')
|
||||||
|
if not result['nama_kepala_keluarga'] and i + 1 < len(ocr_results):
|
||||||
|
# Nama mungkin di baris berikutnya
|
||||||
|
next_text = ocr_results[i + 1]['text'].strip()
|
||||||
|
if not any(kw in next_text.lower() for kw in ['alamat', 'rt', 'rw', 'provinsi']):
|
||||||
|
result['nama_kepala_keluarga'] = next_text
|
||||||
|
|
||||||
|
# Alamat
|
||||||
|
if 'alamat' in text_lower and result['alamat'] is None:
|
||||||
|
result['alamat'] = self._extract_value(text, 'alamat')
|
||||||
|
|
||||||
|
# RT/RW
|
||||||
|
rt_rw_match = re.search(r'rt\s*/?\s*rw\s*[:\s]*(\d+)\s*/\s*(\d+)', text_lower)
|
||||||
|
if rt_rw_match:
|
||||||
|
result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}"
|
||||||
|
|
||||||
|
# Kode Pos
|
||||||
|
if 'kode' in text_lower and 'pos' in text_lower:
|
||||||
|
match = re.search(r'(\d{5})', text)
|
||||||
|
if match:
|
||||||
|
result['kode_pos'] = match.group(1)
|
||||||
|
|
||||||
|
# Deteksi header tabel anggota keluarga
|
||||||
|
if self._is_table_header(text_lower):
|
||||||
|
in_table = True
|
||||||
|
table_start_y = y_pos
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Ekstrak anggota keluarga dari tabel
|
||||||
|
if in_table and table_start_y:
|
||||||
|
member = self._extract_member(text, ocr_results, i)
|
||||||
|
if member:
|
||||||
|
result['anggota_keluarga'].append(member)
|
||||||
|
|
||||||
|
# Post-processing
|
||||||
|
result = self._post_process(result)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _extract_value(self, text: str, field: str) -> Optional[str]:
|
||||||
|
"""Ekstrak nilai setelah label field"""
|
||||||
|
patterns = [
|
||||||
|
rf'{field}[a-z]*\s*:\s*(.+)',
|
||||||
|
rf'{field}[a-z]*\s+(.+)',
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, text, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
value = match.group(1).strip()
|
||||||
|
value = re.sub(r'^[:\s]+', '', value)
|
||||||
|
if value:
|
||||||
|
return value
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _is_table_header(self, text: str) -> bool:
|
||||||
|
"""Cek apakah teks adalah header tabel anggota"""
|
||||||
|
header_keywords = ['no', 'nama lengkap', 'nik', 'jenis kelamin', 'hubungan']
|
||||||
|
count = sum(1 for kw in header_keywords if kw in text)
|
||||||
|
return count >= 2
|
||||||
|
|
||||||
|
def _extract_member(self, text: str, all_results: List[Dict], current_idx: int) -> Optional[Dict]:
|
||||||
|
"""Ekstrak data anggota keluarga dari baris tabel"""
|
||||||
|
# Cari NIK di text
|
||||||
|
nik_match = re.search(r'\b(\d{16})\b', text)
|
||||||
|
if not nik_match:
|
||||||
|
return None
|
||||||
|
|
||||||
|
member = {
|
||||||
|
'nik': nik_match.group(1),
|
||||||
|
'nama': None,
|
||||||
|
'jenis_kelamin': None,
|
||||||
|
'tempat_lahir': None,
|
||||||
|
'tanggal_lahir': None,
|
||||||
|
'hubungan': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Cari teks di sekitar yang mungkin nama atau info lain
|
||||||
|
text_parts = text.split()
|
||||||
|
|
||||||
|
# Deteksi jenis kelamin
|
||||||
|
if 'laki' in text.lower() or ' l ' in f' {text.lower()} ':
|
||||||
|
member['jenis_kelamin'] = 'LAKI-LAKI'
|
||||||
|
elif 'perempuan' in text.lower() or ' p ' in f' {text.lower()} ':
|
||||||
|
member['jenis_kelamin'] = 'PEREMPUAN'
|
||||||
|
|
||||||
|
# Deteksi hubungan keluarga
|
||||||
|
hubungan_keywords = {
|
||||||
|
'kepala': 'KEPALA KELUARGA',
|
||||||
|
'istri': 'ISTRI',
|
||||||
|
'suami': 'SUAMI',
|
||||||
|
'anak': 'ANAK',
|
||||||
|
'menantu': 'MENANTU',
|
||||||
|
'cucu': 'CUCU',
|
||||||
|
'orang tua': 'ORANG TUA',
|
||||||
|
'mertua': 'MERTUA',
|
||||||
|
}
|
||||||
|
|
||||||
|
for keyword, value in hubungan_keywords.items():
|
||||||
|
if keyword in text.lower():
|
||||||
|
member['hubungan'] = value
|
||||||
|
break
|
||||||
|
|
||||||
|
return member
|
||||||
|
|
||||||
|
def _post_process(self, result: Dict) -> Dict:
|
||||||
|
"""Post-processing hasil ekstraksi"""
|
||||||
|
# Validasi No KK
|
||||||
|
if result['no_kk'] and not re.match(r'^\d{16}$', result['no_kk']):
|
||||||
|
cleaned = re.sub(r'\D', '', result['no_kk'])
|
||||||
|
if len(cleaned) == 16:
|
||||||
|
result['no_kk'] = cleaned
|
||||||
|
else:
|
||||||
|
result['no_kk'] = None
|
||||||
|
|
||||||
|
# Uppercase field teks
|
||||||
|
for field in ['nama_kepala_keluarga', 'alamat', 'kel_desa', 'kecamatan',
|
||||||
|
'kabupaten_kota', 'provinsi']:
|
||||||
|
if result[field]:
|
||||||
|
result[field] = result[field].upper()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test
|
||||||
|
sample_ocr = [
|
||||||
|
{'text': 'KARTU KELUARGA', 'y_center': 10},
|
||||||
|
{'text': 'No. 3204012345678901', 'y_center': 30},
|
||||||
|
{'text': 'Nama Kepala Keluarga : JOHN DOE', 'y_center': 50},
|
||||||
|
{'text': 'Alamat : JL. MERDEKA NO. 123', 'y_center': 70},
|
||||||
|
{'text': 'RT/RW : 001/002', 'y_center': 90},
|
||||||
|
{'text': 'Desa/Kelurahan : SUKAMAJU', 'y_center': 110},
|
||||||
|
{'text': 'Kecamatan : SUKASARI', 'y_center': 130},
|
||||||
|
{'text': 'Kabupaten/Kota : BANDUNG', 'y_center': 150},
|
||||||
|
{'text': 'Provinsi : JAWA BARAT', 'y_center': 170},
|
||||||
|
{'text': 'Kode Pos : 40154', 'y_center': 190},
|
||||||
|
]
|
||||||
|
|
||||||
|
extractor = KKExtractor()
|
||||||
|
result = extractor.extract(sample_ocr)
|
||||||
|
|
||||||
|
for key, value in result.items():
|
||||||
|
if key != 'anggota_keluarga':
|
||||||
|
print(f"{key}: {value}")
|
||||||
|
|
||||||
|
print(f"\nAnggota Keluarga: {len(result['anggota_keluarga'])} orang")
|
||||||
602
ktp_extractor.py
Normal file
602
ktp_extractor.py
Normal file
@@ -0,0 +1,602 @@
|
|||||||
|
"""
|
||||||
|
KTP Field Extractor
|
||||||
|
Ekstraksi data terstruktur dari hasil OCR KTP Indonesia
|
||||||
|
Mendukung berbagai format output OCR (full-width colon, standard colon, tanpa colon)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Dict, Optional, List
|
||||||
|
|
||||||
|
|
||||||
|
class KTPExtractor:
|
||||||
|
"""Ekstrak field dari hasil OCR KTP"""
|
||||||
|
|
||||||
|
# Pattern colon yang berbeda-beda (standard, full-width, dll)
|
||||||
|
COLON_PATTERN = r'[:\:]'
|
||||||
|
|
||||||
|
# Keywords untuk jenis kelamin
|
||||||
|
MALE_KEYWORDS = ['laki', 'pria', 'male']
|
||||||
|
FEMALE_KEYWORDS = ['perempuan', 'wanita', 'female']
|
||||||
|
|
||||||
|
# Agama yang valid
|
||||||
|
AGAMA_LIST = ['islam', 'kristen', 'katolik', 'hindu', 'budha', 'buddha', 'konghucu']
|
||||||
|
|
||||||
|
# Pekerjaan umum
|
||||||
|
PEKERJAAN_LIST = ['pelajar', 'mahasiswa', 'pegawai', 'swasta', 'pns', 'wiraswasta',
|
||||||
|
'buruh', 'petani', 'nelayan', 'karyawan', 'ibu rumah tangga',
|
||||||
|
'tidak bekerja', 'lainnya', 'mengurus rumah tangga']
|
||||||
|
|
||||||
|
# KTP Zone Template (normalized coordinates: x_min, y_min, x_max, y_max)
|
||||||
|
# Based on standard KTP layout
|
||||||
|
ZONES = {
|
||||||
|
'header_provinsi': (0.15, 0.00, 0.85, 0.07), # PROVINSI header
|
||||||
|
'header_kabupaten': (0.15, 0.05, 0.85, 0.13), # KABUPATEN header
|
||||||
|
'nik': (0.02, 0.10, 0.70, 0.22), # NIK area
|
||||||
|
'nama': (0.02, 0.18, 0.70, 0.28), # Nama area
|
||||||
|
'ttl': (0.02, 0.25, 0.70, 0.36), # Tempat/Tgl Lahir
|
||||||
|
'jenis_kelamin': (0.02, 0.33, 0.45, 0.42), # Jenis Kelamin (left)
|
||||||
|
'gol_darah': (0.40, 0.33, 0.70, 0.42), # Gol Darah (right of jenis)
|
||||||
|
'alamat': (0.02, 0.38, 0.70, 0.50), # Alamat
|
||||||
|
'rt_rw': (0.02, 0.46, 0.70, 0.54), # RT/RW
|
||||||
|
'kel_desa': (0.02, 0.51, 0.70, 0.60), # Kel/Desa
|
||||||
|
'kecamatan': (0.02, 0.57, 0.70, 0.66), # Kecamatan
|
||||||
|
'agama': (0.02, 0.63, 0.70, 0.72), # Agama
|
||||||
|
'status': (0.02, 0.69, 0.70, 0.78), # Status Perkawinan
|
||||||
|
'pekerjaan': (0.02, 0.75, 0.70, 0.84), # Pekerjaan
|
||||||
|
'wni': (0.02, 0.81, 0.70, 0.90), # Kewarganegaraan
|
||||||
|
'berlaku': (0.02, 0.87, 0.70, 0.96), # Berlaku Hingga
|
||||||
|
'foto': (0.68, 0.10, 0.98, 0.55), # Foto (right side)
|
||||||
|
'penerbitan': (0.65, 0.58, 0.98, 0.98), # Tempat & Tanggal penerbitan
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.image_width = 0
|
||||||
|
self.image_height = 0
|
||||||
|
|
||||||
|
def _get_zone(self, x_center: float, y_center: float, img_width: int, img_height: int) -> Optional[str]:
|
||||||
|
"""Determine which zone a text belongs to based on normalized coordinates"""
|
||||||
|
if img_width == 0 or img_height == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Normalize coordinates
|
||||||
|
x_norm = x_center / img_width
|
||||||
|
y_norm = y_center / img_height
|
||||||
|
|
||||||
|
for zone_name, (x_min, y_min, x_max, y_max) in self.ZONES.items():
|
||||||
|
if x_min <= x_norm <= x_max and y_min <= y_norm <= y_max:
|
||||||
|
return zone_name
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _extract_value_from_text(self, text: str) -> str:
|
||||||
|
"""Extract value part from label:value text"""
|
||||||
|
# Split by colon (standard or full-width)
|
||||||
|
parts = re.split(r'[::]', text, 1)
|
||||||
|
if len(parts) > 1:
|
||||||
|
return parts[1].strip()
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
def _detect_image_size(self, ocr_results: List[Dict]) -> tuple:
|
||||||
|
"""Detect image dimensions from bounding boxes"""
|
||||||
|
max_x, max_y = 0, 0
|
||||||
|
for r in ocr_results:
|
||||||
|
bbox = r.get('bbox', [])
|
||||||
|
if bbox and len(bbox) >= 4:
|
||||||
|
for point in bbox:
|
||||||
|
if len(point) >= 2:
|
||||||
|
max_x = max(max_x, point[0])
|
||||||
|
max_y = max(max_y, point[1])
|
||||||
|
# Add some margin
|
||||||
|
return (int(max_x * 1.05), int(max_y * 1.05)) if max_x > 0 else (1000, 640)
|
||||||
|
|
||||||
|
def _extract_by_zones(self, zone_texts: Dict[str, List[str]], result: Dict):
|
||||||
|
"""Extract fields based on zone assignments"""
|
||||||
|
|
||||||
|
# PROVINSI from header
|
||||||
|
if 'header_provinsi' in zone_texts:
|
||||||
|
for text in zone_texts['header_provinsi']:
|
||||||
|
if 'provinsi' in text.lower():
|
||||||
|
val = re.sub(r'(?i)provinsi\s*', '', text).strip()
|
||||||
|
if val:
|
||||||
|
result['provinsi'] = val.upper()
|
||||||
|
break
|
||||||
|
|
||||||
|
# KABUPATEN/KOTA from header
|
||||||
|
if 'header_kabupaten' in zone_texts:
|
||||||
|
for text in zone_texts['header_kabupaten']:
|
||||||
|
text_lower = text.lower()
|
||||||
|
if 'kabupaten' in text_lower or 'kota' in text_lower:
|
||||||
|
val = re.sub(r'(?i)(kabupaten|kota)\s*', '', text).strip()
|
||||||
|
if val:
|
||||||
|
result['kabupaten_kota'] = val.upper()
|
||||||
|
else:
|
||||||
|
result['kabupaten_kota'] = text.upper()
|
||||||
|
break
|
||||||
|
|
||||||
|
# NAMA from nama zone (skip label line)
|
||||||
|
if 'nama' in zone_texts:
|
||||||
|
for text in zone_texts['nama']:
|
||||||
|
text_lower = text.lower()
|
||||||
|
if 'nama' not in text_lower and len(text) > 2:
|
||||||
|
result['nama'] = text.upper()
|
||||||
|
break
|
||||||
|
elif 'nama' in text_lower:
|
||||||
|
val = self._extract_value_from_text(text)
|
||||||
|
if val and 'nama' not in val.lower():
|
||||||
|
result['nama'] = val.upper()
|
||||||
|
|
||||||
|
# TTL from ttl zone
|
||||||
|
if 'ttl' in zone_texts:
|
||||||
|
for text in zone_texts['ttl']:
|
||||||
|
if 'tempat' in text.lower() or 'lahir' in text.lower():
|
||||||
|
val = self._extract_value_from_text(text)
|
||||||
|
if val:
|
||||||
|
self._parse_ttl(val, result)
|
||||||
|
break
|
||||||
|
|
||||||
|
# JENIS KELAMIN
|
||||||
|
if 'jenis_kelamin' in zone_texts:
|
||||||
|
for text in zone_texts['jenis_kelamin']:
|
||||||
|
text_lower = text.lower()
|
||||||
|
if 'laki' in text_lower:
|
||||||
|
result['jenis_kelamin'] = 'LAKI-LAKI'
|
||||||
|
break
|
||||||
|
elif 'perempuan' in text_lower:
|
||||||
|
result['jenis_kelamin'] = 'PEREMPUAN'
|
||||||
|
break
|
||||||
|
|
||||||
|
# GOL DARAH
|
||||||
|
if 'gol_darah' in zone_texts:
|
||||||
|
for text in zone_texts['gol_darah']:
|
||||||
|
gol_match = re.search(r'([ABO]{1,2}[+\-]?)', text, re.IGNORECASE)
|
||||||
|
if gol_match:
|
||||||
|
result['gol_darah'] = gol_match.group(1).upper()
|
||||||
|
break
|
||||||
|
|
||||||
|
# ALAMAT
|
||||||
|
if 'alamat' in zone_texts:
|
||||||
|
for text in zone_texts['alamat']:
|
||||||
|
if 'alamat' not in text.lower() or len(zone_texts['alamat']) == 1:
|
||||||
|
val = self._extract_value_from_text(text) if 'alamat' in text.lower() else text
|
||||||
|
if val and 'alamat' not in val.lower():
|
||||||
|
result['alamat'] = val.upper()
|
||||||
|
break
|
||||||
|
|
||||||
|
# PENERBITAN area (tempat & tanggal dalam satu zona)
|
||||||
|
if 'penerbitan' in zone_texts:
|
||||||
|
for text in zone_texts['penerbitan']:
|
||||||
|
# Look for date
|
||||||
|
date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})', text)
|
||||||
|
if date_match and result['tanggal_penerbitan'] is None:
|
||||||
|
result['tanggal_penerbitan'] = date_match.group(1)
|
||||||
|
|
||||||
|
def extract(self, ocr_results: List[Dict]) -> Dict[str, Optional[str]]:
|
||||||
|
"""
|
||||||
|
Ekstrak field KTP dari hasil OCR dengan template-based zone detection
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ocr_results: List hasil dari OCREngine.extract_text()
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict dengan field KTP
|
||||||
|
"""
|
||||||
|
result = {
|
||||||
|
'nik': None,
|
||||||
|
'nama': None,
|
||||||
|
'tempat_lahir': None,
|
||||||
|
'tanggal_lahir': None,
|
||||||
|
'jenis_kelamin': None,
|
||||||
|
'gol_darah': None,
|
||||||
|
'alamat': None,
|
||||||
|
'rt_rw': None,
|
||||||
|
'kel_desa': None,
|
||||||
|
'kecamatan': None,
|
||||||
|
'agama': None,
|
||||||
|
'status_perkawinan': None,
|
||||||
|
'pekerjaan': None,
|
||||||
|
'kewarganegaraan': None,
|
||||||
|
'berlaku_hingga': None,
|
||||||
|
'provinsi': None,
|
||||||
|
'kabupaten_kota': None,
|
||||||
|
'tanggal_penerbitan': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detect image dimensions from bounding boxes
|
||||||
|
img_width, img_height = self._detect_image_size(ocr_results)
|
||||||
|
|
||||||
|
# Assign zones to each OCR result
|
||||||
|
zone_texts = {} # zone_name -> list of texts
|
||||||
|
for r in ocr_results:
|
||||||
|
x_center = r.get('x_center', 0)
|
||||||
|
y_center = r.get('y_center', 0)
|
||||||
|
zone = self._get_zone(x_center, y_center, img_width, img_height)
|
||||||
|
if zone:
|
||||||
|
if zone not in zone_texts:
|
||||||
|
zone_texts[zone] = []
|
||||||
|
zone_texts[zone].append(r['text'])
|
||||||
|
|
||||||
|
# Debug: print zone assignments
|
||||||
|
print("\n[DEBUG KTPExtractor] Zone assignments:")
|
||||||
|
for zone, texts in zone_texts.items():
|
||||||
|
print(f" {zone}: {texts}")
|
||||||
|
|
||||||
|
# Extract fields using zone-based approach
|
||||||
|
self._extract_by_zones(zone_texts, result)
|
||||||
|
|
||||||
|
# Gabungkan semua teks untuk fallback pattern matching
|
||||||
|
texts = [r['text'].strip() for r in ocr_results]
|
||||||
|
all_text = '\n'.join(texts)
|
||||||
|
|
||||||
|
# Ekstrak NIK (16 digit) - bisa ada di mana saja
|
||||||
|
nik_match = re.search(r'\b(\d{16})\b', all_text)
|
||||||
|
if nik_match:
|
||||||
|
result['nik'] = nik_match.group(1)
|
||||||
|
print(f" -> NIK found: {result['nik']}")
|
||||||
|
|
||||||
|
# Fallback: Parse line by line for fields not found by zone
|
||||||
|
for i, text in enumerate(texts):
|
||||||
|
text_lower = text.lower()
|
||||||
|
|
||||||
|
# Normalize colons
|
||||||
|
text_normalized = re.sub(self.COLON_PATTERN, ':', text)
|
||||||
|
text_norm_lower = text_normalized.lower()
|
||||||
|
|
||||||
|
# ===== PROVINSI =====
|
||||||
|
if 'provinsi' in text_lower and result['provinsi'] is None:
|
||||||
|
val = self._extract_after_label(text_normalized, 'provinsi')
|
||||||
|
if val:
|
||||||
|
result['provinsi'] = val.upper()
|
||||||
|
elif i + 1 < len(texts) and 'provinsi' not in texts[i+1].lower():
|
||||||
|
# Mungkin value di line berikutnya
|
||||||
|
result['provinsi'] = texts[i+1].strip().upper()
|
||||||
|
|
||||||
|
# ===== KABUPATEN/KOTA =====
|
||||||
|
if ('kabupaten' in text_lower or 'kota' in text_lower or 'jakarta' in text_lower) and result['kabupaten_kota'] is None:
|
||||||
|
if 'provinsi' not in text_lower: # Bukan bagian dari provinsi
|
||||||
|
val = self._extract_after_label(text_normalized, 'kabupaten|kota')
|
||||||
|
if val:
|
||||||
|
result['kabupaten_kota'] = val.upper()
|
||||||
|
else:
|
||||||
|
result['kabupaten_kota'] = text.strip().upper()
|
||||||
|
|
||||||
|
# ===== NAMA =====
|
||||||
|
if 'nama' in text_lower and result['nama'] is None:
|
||||||
|
val = self._extract_after_label(text_normalized, 'nama')
|
||||||
|
if val and len(val) > 2:
|
||||||
|
result['nama'] = val.upper()
|
||||||
|
elif i + 1 < len(texts):
|
||||||
|
# Nama di line berikutnya
|
||||||
|
next_text = texts[i+1].strip()
|
||||||
|
if len(next_text) > 2 and not any(kw in next_text.lower() for kw in ['tempat', 'lahir', 'jenis']):
|
||||||
|
result['nama'] = next_text.upper()
|
||||||
|
|
||||||
|
# ===== TEMPAT/TANGGAL LAHIR =====
|
||||||
|
# Match "Tempat/Tgl Lahir" or "Tempat Lahir" or similar labels
|
||||||
|
if 'tempat' in text_lower or ('lahir' in text_lower and 'berlaku' not in text_lower):
|
||||||
|
if result['tempat_lahir'] is None or result['tanggal_lahir'] is None:
|
||||||
|
# Extract value after label using full-width or standard colon
|
||||||
|
ttl = self._extract_after_label(text_normalized, r'tempat[/\s]*tgl[/\s]*lahir|tempat[/\s]*lahir|lahir')
|
||||||
|
if ttl:
|
||||||
|
self._parse_ttl(ttl, result)
|
||||||
|
elif ':' in text or ':' in text:
|
||||||
|
# Value is after colon but _extract_after_label didn't catch it
|
||||||
|
parts = re.split(r'[::]', text, 1)
|
||||||
|
if len(parts) > 1 and parts[1].strip():
|
||||||
|
self._parse_ttl(parts[1].strip(), result)
|
||||||
|
elif i + 1 < len(texts):
|
||||||
|
# TTL di line berikutnya
|
||||||
|
next_text = texts[i+1].strip()
|
||||||
|
if not any(kw in next_text.lower() for kw in ['jenis', 'kelamin', 'alamat', 'gol']):
|
||||||
|
self._parse_ttl(next_text, result)
|
||||||
|
|
||||||
|
# ===== JENIS KELAMIN =====
|
||||||
|
if any(kw in text_lower for kw in self.MALE_KEYWORDS):
|
||||||
|
if result['jenis_kelamin'] is None:
|
||||||
|
result['jenis_kelamin'] = 'LAKI-LAKI'
|
||||||
|
elif any(kw in text_lower for kw in self.FEMALE_KEYWORDS):
|
||||||
|
if result['jenis_kelamin'] is None:
|
||||||
|
result['jenis_kelamin'] = 'PEREMPUAN'
|
||||||
|
|
||||||
|
# ===== GOLONGAN DARAH =====
|
||||||
|
if 'darah' in text_lower or 'gol.' in text_lower:
|
||||||
|
# Try to find blood type on same line
|
||||||
|
gol_match = re.search(r'(?:gol|darah)[.\s::]*([ABO]{1,2}[+\-]?)', text, re.IGNORECASE)
|
||||||
|
if gol_match and result['gol_darah'] is None:
|
||||||
|
result['gol_darah'] = gol_match.group(1).upper()
|
||||||
|
elif result['gol_darah'] is None and i + 1 < len(texts):
|
||||||
|
# Blood type might be on next line (real KTP pattern)
|
||||||
|
next_text = texts[i+1].strip()
|
||||||
|
if re.match(r'^[ABO]{1,2}[+\-]?$', next_text, re.IGNORECASE):
|
||||||
|
result['gol_darah'] = next_text.upper()
|
||||||
|
# Standalone blood type (e.g., just "O" or "A+" on its own line)
|
||||||
|
if result['gol_darah'] is None:
|
||||||
|
if re.match(r'^[ABO]{1,2}[+\-]?$', text.strip(), re.IGNORECASE) and len(text.strip()) <= 3:
|
||||||
|
result['gol_darah'] = text.strip().upper()
|
||||||
|
|
||||||
|
# ===== ALAMAT =====
|
||||||
|
if 'alamat' in text_lower and result['alamat'] is None:
|
||||||
|
val = self._extract_after_label(text_normalized, 'alamat')
|
||||||
|
if val:
|
||||||
|
result['alamat'] = val.upper()
|
||||||
|
elif i + 1 < len(texts):
|
||||||
|
result['alamat'] = texts[i+1].strip().upper()
|
||||||
|
|
||||||
|
# ===== RT/RW =====
|
||||||
|
rt_rw_match = re.search(r'(\d{3})\s*/\s*(\d{3})', text)
|
||||||
|
if rt_rw_match:
|
||||||
|
result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}"
|
||||||
|
|
||||||
|
# ===== KELURAHAN/DESA =====
|
||||||
|
if ('kel' in text_lower or 'desa' in text_lower) and 'kelamin' not in text_lower:
|
||||||
|
if result['kel_desa'] is None:
|
||||||
|
val = self._extract_after_label(text_normalized, 'kel|desa')
|
||||||
|
if val:
|
||||||
|
result['kel_desa'] = val.upper()
|
||||||
|
elif i + 1 < len(texts):
|
||||||
|
result['kel_desa'] = texts[i+1].strip().upper()
|
||||||
|
|
||||||
|
# ===== KECAMATAN =====
|
||||||
|
if 'kecamatan' in text_lower or ('kec' in text_lower and 'kelamin' not in text_lower):
|
||||||
|
if result['kecamatan'] is None:
|
||||||
|
val = self._extract_after_label(text_normalized, 'kecamatan|kec')
|
||||||
|
if val:
|
||||||
|
result['kecamatan'] = val.upper()
|
||||||
|
elif i + 1 < len(texts):
|
||||||
|
# Value on next line (real KTP pattern)
|
||||||
|
next_text = texts[i+1].strip()
|
||||||
|
if len(next_text) > 2 and not any(kw in next_text.lower() for kw in ['agama', 'status', 'pekerjaan']):
|
||||||
|
result['kecamatan'] = next_text.upper()
|
||||||
|
|
||||||
|
# ===== AGAMA =====
|
||||||
|
if 'agama' in text_lower:
|
||||||
|
val = self._extract_after_label(text_normalized, 'agama')
|
||||||
|
if val and result['agama'] is None:
|
||||||
|
result['agama'] = val.upper()
|
||||||
|
elif result['agama'] is None and i + 1 < len(texts):
|
||||||
|
# Value on next line (real KTP pattern)
|
||||||
|
next_text = texts[i+1].strip().upper()
|
||||||
|
if next_text in ['ISLAM', 'KRISTEN', 'KATOLIK', 'HINDU', 'BUDHA', 'BUDDHA', 'KONGHUCU']:
|
||||||
|
result['agama'] = next_text
|
||||||
|
else:
|
||||||
|
# Check if line contains only agama name
|
||||||
|
for agama in self.AGAMA_LIST:
|
||||||
|
if agama in text_lower and len(text) < 20:
|
||||||
|
if result['agama'] is None:
|
||||||
|
result['agama'] = text.strip().upper()
|
||||||
|
break
|
||||||
|
|
||||||
|
# ===== STATUS PERKAWINAN =====
|
||||||
|
if 'kawin' in text_lower:
|
||||||
|
if result['status_perkawinan'] is None:
|
||||||
|
val = self._extract_after_label(text_normalized, 'status.*kawin|perkawinan')
|
||||||
|
if val:
|
||||||
|
result['status_perkawinan'] = val.upper()
|
||||||
|
elif 'belum' in text_lower:
|
||||||
|
result['status_perkawinan'] = 'BELUM KAWIN'
|
||||||
|
elif 'kawin' in text_lower and 'cerai' not in text_lower:
|
||||||
|
result['status_perkawinan'] = 'KAWIN'
|
||||||
|
elif 'cerai hidup' in text_lower:
|
||||||
|
result['status_perkawinan'] = 'CERAI HIDUP'
|
||||||
|
elif 'cerai mati' in text_lower:
|
||||||
|
result['status_perkawinan'] = 'CERAI MATI'
|
||||||
|
|
||||||
|
# ===== PEKERJAAN =====
|
||||||
|
if 'pekerjaan' in text_lower:
|
||||||
|
val = self._extract_after_label(text_normalized, 'pekerjaan')
|
||||||
|
if val and result['pekerjaan'] is None:
|
||||||
|
result['pekerjaan'] = val.upper()
|
||||||
|
elif result['pekerjaan'] is None and i + 1 < len(texts):
|
||||||
|
# Value on next line (real KTP pattern)
|
||||||
|
next_text = texts[i+1].strip()
|
||||||
|
if len(next_text) > 2 and 'kewarganegaraan' not in next_text.lower():
|
||||||
|
result['pekerjaan'] = next_text.upper()
|
||||||
|
else:
|
||||||
|
# Check if line contains pekerjaan keyword
|
||||||
|
for pekerjaan in self.PEKERJAAN_LIST:
|
||||||
|
if pekerjaan in text_lower and len(text) < 30:
|
||||||
|
if result['pekerjaan'] is None:
|
||||||
|
result['pekerjaan'] = text.strip().upper()
|
||||||
|
break
|
||||||
|
|
||||||
|
# ===== KEWARGANEGARAAN =====
|
||||||
|
if 'wni' in text_lower:
|
||||||
|
result['kewarganegaraan'] = 'WNI'
|
||||||
|
elif 'wna' in text_lower:
|
||||||
|
result['kewarganegaraan'] = 'WNA'
|
||||||
|
elif 'warga' in text_lower and result['kewarganegaraan'] is None:
|
||||||
|
val = self._extract_after_label(text_normalized, 'kewarganegaraan|warga')
|
||||||
|
if val:
|
||||||
|
result['kewarganegaraan'] = val.upper()
|
||||||
|
|
||||||
|
# ===== BERLAKU HINGGA =====
|
||||||
|
if 'berlaku' in text_lower or 'seumur' in text_lower:
|
||||||
|
if result['berlaku_hingga'] is None:
|
||||||
|
if 'seumur' in text_lower or 'hidup' in text_lower:
|
||||||
|
result['berlaku_hingga'] = 'SEUMUR HIDUP'
|
||||||
|
else:
|
||||||
|
val = self._extract_after_label(text_normalized, 'berlaku')
|
||||||
|
if val:
|
||||||
|
result['berlaku_hingga'] = val.upper()
|
||||||
|
|
||||||
|
# ===== TANGGAL PENERBITAN (biasanya format DD-MM-YYYY di akhir) =====
|
||||||
|
# Look for date that is NOT tanggal lahir (different date)
|
||||||
|
if result['tanggal_penerbitan'] is None:
|
||||||
|
# Match date format at end of text or standalone date
|
||||||
|
date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})$', text.strip())
|
||||||
|
if date_match:
|
||||||
|
found_date = date_match.group(1)
|
||||||
|
# Make sure it's not the same as tanggal_lahir
|
||||||
|
if result['tanggal_lahir'] != found_date:
|
||||||
|
# Likely penerbitan if after berlaku_hingga was found
|
||||||
|
if result['berlaku_hingga'] or i > len(texts) * 0.7:
|
||||||
|
result['tanggal_penerbitan'] = found_date
|
||||||
|
|
||||||
|
# Post-processing
|
||||||
|
result = self._post_process(result)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _extract_after_label(self, text: str, label_pattern: str) -> Optional[str]:
|
||||||
|
"""Ekstrak nilai setelah label (supports various separators)"""
|
||||||
|
patterns = [
|
||||||
|
rf'(?:{label_pattern})\s*:\s*(.+)', # label: value
|
||||||
|
rf'(?:{label_pattern})\s+([A-Z0-9].+)', # label VALUE (uppercase start)
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, text, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
value = match.group(1).strip()
|
||||||
|
# Remove trailing colon or label fragment
|
||||||
|
value = re.sub(r'^[:\s]+', '', value)
|
||||||
|
value = re.sub(r'\s*:\s*$', '', value)
|
||||||
|
if value and len(value) > 1:
|
||||||
|
return value
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _parse_ttl(self, ttl_text: str, result: Dict):
|
||||||
|
"""Parse tempat/tanggal lahir dari text"""
|
||||||
|
ttl_text = ttl_text.strip()
|
||||||
|
|
||||||
|
# Normalize dates where OCR missed dashes:
|
||||||
|
# "05 08 1978" -> "05-08-1978"
|
||||||
|
# "05 08-1978" -> "05-08-1978"
|
||||||
|
# "05-08 1978" -> "05-08-1978"
|
||||||
|
ttl_text = re.sub(r'(\d{2})[\s]+(\d{2})[\s]+(\d{4})', r'\1-\2-\3', ttl_text)
|
||||||
|
ttl_text = re.sub(r'(\d{2})[\s]+(\d{2})[-/](\d{4})', r'\1-\2-\3', ttl_text)
|
||||||
|
ttl_text = re.sub(r'(\d{2})[-/](\d{2})[\s]+(\d{4})', r'\1-\2-\3', ttl_text)
|
||||||
|
|
||||||
|
# Handle 8-digit date without separator: "05081978" -> "05-08-1978"
|
||||||
|
date_8digit = re.search(r'(\d{8})', ttl_text)
|
||||||
|
if date_8digit:
|
||||||
|
d = date_8digit.group(1)
|
||||||
|
formatted = f"{d[:2]}-{d[2:4]}-{d[4:]}"
|
||||||
|
ttl_text = ttl_text.replace(d, formatted)
|
||||||
|
|
||||||
|
# Handle merged city+date like "JAKARTA05-08-1978" - add space before digits
|
||||||
|
ttl_text = re.sub(r'([A-Z])(\d{2}[-/])', r'\1 \2', ttl_text, flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
# Format: "TEMPAT, DD-MM-YYYY" atau "TEMPAT DD-MM-YYYY"
|
||||||
|
date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})', ttl_text)
|
||||||
|
if date_match:
|
||||||
|
result['tanggal_lahir'] = date_match.group(1)
|
||||||
|
# Tempat adalah bagian sebelum tanggal
|
||||||
|
place = ttl_text[:date_match.start()].strip(' ,:-/')
|
||||||
|
# Clean up label remnants
|
||||||
|
place = re.sub(r'^(tempat|tgl|lahir|:|:)[/\s::]*', '', place, flags=re.IGNORECASE).strip()
|
||||||
|
if place and len(place) > 2:
|
||||||
|
result['tempat_lahir'] = place.upper()
|
||||||
|
else:
|
||||||
|
# Coba split by comma
|
||||||
|
parts = ttl_text.split(',')
|
||||||
|
if len(parts) >= 2:
|
||||||
|
result['tempat_lahir'] = parts[0].strip().upper()
|
||||||
|
result['tanggal_lahir'] = parts[1].strip()
|
||||||
|
elif len(parts) == 1 and len(ttl_text) > 2:
|
||||||
|
result['tempat_lahir'] = ttl_text.upper()
|
||||||
|
|
||||||
|
def _post_process(self, result: Dict) -> Dict:
|
||||||
|
"""Post-processing hasil ekstraksi"""
|
||||||
|
# Validasi NIK (harus 16 digit)
|
||||||
|
if result['nik'] and not re.match(r'^\d{16}$', result['nik']):
|
||||||
|
cleaned = re.sub(r'\D', '', result['nik'])
|
||||||
|
if len(cleaned) == 16:
|
||||||
|
result['nik'] = cleaned
|
||||||
|
else:
|
||||||
|
result['nik'] = None
|
||||||
|
|
||||||
|
# Clean all string values - remove leading colons and extra whitespace
|
||||||
|
for field in result:
|
||||||
|
if result[field] and isinstance(result[field], str):
|
||||||
|
val = result[field]
|
||||||
|
# Remove leading colons (standard and full-width)
|
||||||
|
val = re.sub(r'^[\s::]+', '', val)
|
||||||
|
# Remove trailing colons
|
||||||
|
val = re.sub(r'[\s::]+$', '', val)
|
||||||
|
# Remove double spaces
|
||||||
|
val = re.sub(r'\s+', ' ', val)
|
||||||
|
result[field] = val.strip()
|
||||||
|
|
||||||
|
# Bersihkan label dari values
|
||||||
|
for field in ['nama', 'alamat', 'tempat_lahir', 'kel_desa', 'kecamatan', 'agama', 'pekerjaan']:
|
||||||
|
if result[field]:
|
||||||
|
# Remove common labels yang ter-capture
|
||||||
|
result[field] = re.sub(
|
||||||
|
r'^(NAMA|ALAMAT|TEMPAT|LAHIR|TGL|KEL|DESA|KELURAHAN|KECAMATAN|KEC|AGAMA|PEKERJAAN|STATUS)[\s::]*',
|
||||||
|
'', result[field], flags=re.IGNORECASE
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
# Fix status perkawinan yang masih mengandung label
|
||||||
|
if result['status_perkawinan']:
|
||||||
|
sp = result['status_perkawinan']
|
||||||
|
sp = re.sub(r'^(STATUS|PERKAWINAN)[\s::]*', '', sp, flags=re.IGNORECASE).strip()
|
||||||
|
result['status_perkawinan'] = sp
|
||||||
|
|
||||||
|
# Fix berlaku hingga
|
||||||
|
if result['berlaku_hingga']:
|
||||||
|
bh = result['berlaku_hingga']
|
||||||
|
bh = re.sub(r'^(BERLAKU|HINGGA)[\s::]*', '', bh, flags=re.IGNORECASE).strip()
|
||||||
|
if bh.upper() == 'HIDUP' or 'SEUMUR' in bh.upper():
|
||||||
|
result['berlaku_hingga'] = 'SEUMUR HIDUP'
|
||||||
|
else:
|
||||||
|
result['berlaku_hingga'] = bh
|
||||||
|
|
||||||
|
# Fix merged kabupaten/kota names (e.g., JAKARTASELATAN -> JAKARTA SELATAN)
|
||||||
|
if result['kabupaten_kota']:
|
||||||
|
kk = result['kabupaten_kota']
|
||||||
|
# Add space before directional words
|
||||||
|
kk = re.sub(r'(JAKARTA|BANDUNG|SURABAYA|SEMARANG|MEDAN|BEKASI|TANGERANG|DEPOK|BOGOR)(SELATAN|UTARA|BARAT|TIMUR|PUSAT|TENGAH)',
|
||||||
|
r'\1 \2', kk, flags=re.IGNORECASE)
|
||||||
|
# Common merged patterns
|
||||||
|
kk = re.sub(r'(KOTA|KABUPATEN)([A-Z])', r'\1 \2', kk, flags=re.IGNORECASE)
|
||||||
|
result['kabupaten_kota'] = kk.upper()
|
||||||
|
|
||||||
|
# Fix merged provinsi names
|
||||||
|
if result['provinsi']:
|
||||||
|
prov = result['provinsi']
|
||||||
|
prov = re.sub(r'(DKI|DI)(JAKARTA|YOGYAKARTA)', r'\1 \2', prov, flags=re.IGNORECASE)
|
||||||
|
prov = re.sub(r'(JAWA|KALIMANTAN|SULAWESI|SUMATERA|NUSA TENGGARA)(BARAT|TIMUR|TENGAH|SELATAN|UTARA)',
|
||||||
|
r'\1 \2', prov, flags=re.IGNORECASE)
|
||||||
|
result['provinsi'] = prov.upper()
|
||||||
|
|
||||||
|
# Fix merged alamat/address (e.g., JLKECAPIV -> JL KECAPI V)
|
||||||
|
if result['alamat']:
|
||||||
|
alamat = result['alamat']
|
||||||
|
# Add space after common street prefixes
|
||||||
|
alamat = re.sub(r'^(JL|JLN|JALAN|GG|GANG|NO|BLOK)([A-Z])', r'\1 \2', alamat, flags=re.IGNORECASE)
|
||||||
|
# Add space before Roman numerals at the end (I, II, III, IV, V, VI, VII, VIII, IX, X)
|
||||||
|
alamat = re.sub(r'([A-Z])([IVX]+)$', r'\1 \2', alamat, flags=re.IGNORECASE)
|
||||||
|
# Add space before single digits/numbers at end
|
||||||
|
alamat = re.sub(r'([A-Z])(\d+)$', r'\1 \2', alamat, flags=re.IGNORECASE)
|
||||||
|
# Fix common patterns: "NO123" -> "NO 123", "BLOKA" -> "BLOK A"
|
||||||
|
alamat = re.sub(r'\b(NO|BLOK)(\d+|[A-Z])\b', r'\1 \2', alamat, flags=re.IGNORECASE)
|
||||||
|
result['alamat'] = alamat.upper()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test
|
||||||
|
sample_ocr = [
|
||||||
|
{'text': 'PROVINSI JAWA BARAT'},
|
||||||
|
{'text': 'KABUPATEN BANDUNG'},
|
||||||
|
{'text': 'NIK : 3204012345678901'},
|
||||||
|
{'text': 'Nama : JOHN DOE'},
|
||||||
|
{'text': 'Tempat/Tgl Lahir : BANDUNG, 01-01-1990'},
|
||||||
|
{'text': 'Jenis Kelamin : LAKI-LAKI'},
|
||||||
|
{'text': 'Alamat : JL. MERDEKA NO. 123'},
|
||||||
|
{'text': 'RT/RW : 001/002'},
|
||||||
|
{'text': 'Kel/Desa : SUKAMAJU'},
|
||||||
|
{'text': 'Kecamatan : SUKASARI'},
|
||||||
|
{'text': 'Agama : ISLAM'},
|
||||||
|
{'text': 'Status Perkawinan : BELUM KAWIN'},
|
||||||
|
{'text': 'Pekerjaan : KARYAWAN SWASTA'},
|
||||||
|
{'text': 'Kewarganegaraan : WNI'},
|
||||||
|
{'text': 'Berlaku Hingga : SEUMUR HIDUP'},
|
||||||
|
]
|
||||||
|
|
||||||
|
extractor = KTPExtractor()
|
||||||
|
result = extractor.extract(sample_ocr)
|
||||||
|
|
||||||
|
for key, value in result.items():
|
||||||
|
print(f"{key}: {value}")
|
||||||
153
ocr_engine.py
Normal file
153
ocr_engine.py
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
"""
|
||||||
|
OCR Engine menggunakan PaddleOCR 3.x
|
||||||
|
Untuk membaca teks dari gambar dokumen Indonesia (KTP, KK)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from paddleocr import PaddleOCR
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
|
||||||
|
class OCREngine:
|
||||||
|
def __init__(self):
|
||||||
|
"""Inisialisasi PaddleOCR 3.x dengan konfigurasi untuk dokumen Indonesia"""
|
||||||
|
self.ocr = PaddleOCR(
|
||||||
|
use_doc_orientation_classify=True, # Deteksi rotasi (0°/90°/180°/270°)
|
||||||
|
use_doc_unwarping=True, # Koreksi perspektif (trapezium → persegi)
|
||||||
|
use_textline_orientation=True, # Orientasi per baris teks
|
||||||
|
)
|
||||||
|
|
||||||
|
def preprocess_image(self, image_path: str) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Preprocessing gambar untuk hasil OCR lebih baik
|
||||||
|
- Resize jika terlalu besar
|
||||||
|
- Enhance contrast
|
||||||
|
"""
|
||||||
|
img = cv2.imread(image_path)
|
||||||
|
if img is None:
|
||||||
|
raise ValueError(f"Tidak dapat membaca gambar: {image_path}")
|
||||||
|
|
||||||
|
# Resize jika terlalu besar (max 2000px)
|
||||||
|
max_dim = 2000
|
||||||
|
height, width = img.shape[:2]
|
||||||
|
if max(height, width) > max_dim:
|
||||||
|
scale = max_dim / max(height, width)
|
||||||
|
img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
|
||||||
|
|
||||||
|
# Convert ke grayscale untuk preprocessing
|
||||||
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
# Enhance contrast menggunakan CLAHE
|
||||||
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||||
|
enhanced = clahe.apply(gray)
|
||||||
|
|
||||||
|
# Convert kembali ke BGR untuk PaddleOCR
|
||||||
|
enhanced_bgr = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)
|
||||||
|
|
||||||
|
return enhanced_bgr
|
||||||
|
|
||||||
|
def extract_text(self, image_path: str, preprocess: bool = False) -> list:
|
||||||
|
"""
|
||||||
|
Ekstraksi teks dari gambar menggunakan PaddleOCR 3.x API
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path: Path ke file gambar
|
||||||
|
preprocess: Apakah melakukan preprocessing
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dict dengan keys: 'text', 'confidence', 'bbox'
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Jalankan OCR dengan API baru (predict)
|
||||||
|
result = self.ocr.predict(input=image_path)
|
||||||
|
|
||||||
|
if not result:
|
||||||
|
return []
|
||||||
|
|
||||||
|
extracted = []
|
||||||
|
|
||||||
|
# Parse hasil dari PaddleOCR 3.x
|
||||||
|
for res in result:
|
||||||
|
# Akses data dari result object
|
||||||
|
if hasattr(res, 'rec_texts') and hasattr(res, 'rec_scores') and hasattr(res, 'dt_polys'):
|
||||||
|
texts = res.rec_texts if res.rec_texts else []
|
||||||
|
scores = res.rec_scores if res.rec_scores else []
|
||||||
|
polys = res.dt_polys if res.dt_polys else []
|
||||||
|
|
||||||
|
for i, text in enumerate(texts):
|
||||||
|
confidence = scores[i] if i < len(scores) else 0.0
|
||||||
|
bbox = polys[i].tolist() if i < len(polys) and hasattr(polys[i], 'tolist') else []
|
||||||
|
|
||||||
|
# Calculate center for sorting
|
||||||
|
if bbox and len(bbox) >= 4:
|
||||||
|
y_center = (bbox[0][1] + bbox[2][1]) / 2
|
||||||
|
x_center = (bbox[0][0] + bbox[2][0]) / 2
|
||||||
|
else:
|
||||||
|
y_center = 0
|
||||||
|
x_center = 0
|
||||||
|
|
||||||
|
extracted.append({
|
||||||
|
'text': text,
|
||||||
|
'confidence': float(confidence),
|
||||||
|
'bbox': bbox,
|
||||||
|
'y_center': y_center,
|
||||||
|
'x_center': x_center,
|
||||||
|
})
|
||||||
|
# Fallback: try dict-like access
|
||||||
|
elif hasattr(res, '__getitem__'):
|
||||||
|
try:
|
||||||
|
texts = res.get('rec_texts', res.get('texts', []))
|
||||||
|
scores = res.get('rec_scores', res.get('scores', []))
|
||||||
|
|
||||||
|
for i, text in enumerate(texts):
|
||||||
|
confidence = scores[i] if i < len(scores) else 0.0
|
||||||
|
extracted.append({
|
||||||
|
'text': text,
|
||||||
|
'confidence': float(confidence),
|
||||||
|
'bbox': [],
|
||||||
|
'y_center': i * 10, # Simple ordering fallback
|
||||||
|
'x_center': 0,
|
||||||
|
})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Sort berdasarkan posisi Y (atas ke bawah)
|
||||||
|
if extracted:
|
||||||
|
extracted.sort(key=lambda x: (x['y_center'], x['x_center']))
|
||||||
|
|
||||||
|
return extracted
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error OCR: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_raw_text(self, image_path: str) -> str:
|
||||||
|
"""
|
||||||
|
Mendapatkan semua teks dari gambar sebagai string
|
||||||
|
"""
|
||||||
|
results = self.extract_text(image_path)
|
||||||
|
return '\n'.join([r['text'] for r in results])
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton instance
|
||||||
|
_ocr_engine = None
|
||||||
|
|
||||||
|
def get_ocr_engine() -> OCREngine:
|
||||||
|
"""Get singleton OCR engine instance"""
|
||||||
|
global _ocr_engine
|
||||||
|
if _ocr_engine is None:
|
||||||
|
_ocr_engine = OCREngine()
|
||||||
|
return _ocr_engine
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test OCR
|
||||||
|
import sys
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
engine = get_ocr_engine()
|
||||||
|
results = engine.extract_text(sys.argv[1])
|
||||||
|
for r in results:
|
||||||
|
print(f"[{r['confidence']:.2f}] {r['text']}")
|
||||||
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
paddlepaddle
|
||||||
|
paddleocr
|
||||||
|
flask
|
||||||
|
pillow
|
||||||
|
opencv-python
|
||||||
538
static/style.css
Normal file
538
static/style.css
Normal file
@@ -0,0 +1,538 @@
|
|||||||
|
/* OCR KTP/KK - Modern Dark Theme */
|
||||||
|
|
||||||
|
:root {
|
||||||
|
--bg-primary: #0f0f1a;
|
||||||
|
--bg-secondary: #1a1a2e;
|
||||||
|
--bg-tertiary: #252540;
|
||||||
|
--accent-primary: #6366f1;
|
||||||
|
--accent-secondary: #818cf8;
|
||||||
|
--accent-gradient: linear-gradient(135deg, #6366f1 0%, #a855f7 100%);
|
||||||
|
--text-primary: #f1f5f9;
|
||||||
|
--text-secondary: #94a3b8;
|
||||||
|
--text-muted: #64748b;
|
||||||
|
--success: #22c55e;
|
||||||
|
--error: #ef4444;
|
||||||
|
--warning: #f59e0b;
|
||||||
|
--border: #334155;
|
||||||
|
--shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.3);
|
||||||
|
--shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.4);
|
||||||
|
--radius: 12px;
|
||||||
|
--radius-lg: 16px;
|
||||||
|
}
|
||||||
|
|
||||||
|
* {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
font-family: 'Segoe UI', system-ui, -apple-system, sans-serif;
|
||||||
|
background: var(--bg-primary);
|
||||||
|
color: var(--text-primary);
|
||||||
|
min-height: 100vh;
|
||||||
|
line-height: 1.6;
|
||||||
|
}
|
||||||
|
|
||||||
|
.container {
|
||||||
|
max-width: 800px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 2rem 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Header */
|
||||||
|
header {
|
||||||
|
text-align: center;
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
header h1 {
|
||||||
|
font-size: 2.5rem;
|
||||||
|
font-weight: 700;
|
||||||
|
background: var(--accent-gradient);
|
||||||
|
-webkit-background-clip: text;
|
||||||
|
-webkit-text-fill-color: transparent;
|
||||||
|
background-clip: text;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.subtitle {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 1.1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Upload Section */
|
||||||
|
.upload-section {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border-radius: var(--radius-lg);
|
||||||
|
padding: 2rem;
|
||||||
|
box-shadow: var(--shadow-lg);
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Document Type Selector */
|
||||||
|
.doc-type-selector {
|
||||||
|
display: flex;
|
||||||
|
gap: 1rem;
|
||||||
|
margin-bottom: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-btn {
|
||||||
|
flex: 1;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
padding: 1rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 2px solid transparent;
|
||||||
|
border-radius: var(--radius);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 1rem;
|
||||||
|
font-weight: 600;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.3s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-btn:hover {
|
||||||
|
background: var(--bg-primary);
|
||||||
|
color: var(--text-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-btn.active {
|
||||||
|
background: var(--accent-gradient);
|
||||||
|
color: white;
|
||||||
|
border-color: var(--accent-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-btn .icon {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Dropzone */
|
||||||
|
.dropzone {
|
||||||
|
border: 2px dashed var(--border);
|
||||||
|
border-radius: var(--radius);
|
||||||
|
padding: 3rem 2rem;
|
||||||
|
text-align: center;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.3s ease;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
position: relative;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dropzone:hover,
|
||||||
|
.dropzone.dragover {
|
||||||
|
border-color: var(--accent-primary);
|
||||||
|
background: rgba(99, 102, 241, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
.dropzone-content {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-icon {
|
||||||
|
font-size: 4rem;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dropzone p {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.dropzone .hint {
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.file-btn {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.75rem 1.5rem;
|
||||||
|
background: var(--accent-gradient);
|
||||||
|
color: white;
|
||||||
|
border-radius: var(--radius);
|
||||||
|
font-weight: 600;
|
||||||
|
cursor: pointer;
|
||||||
|
margin: 0.5rem 0;
|
||||||
|
transition: transform 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.file-btn:hover {
|
||||||
|
transform: scale(1.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.file-types {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-muted);
|
||||||
|
}
|
||||||
|
|
||||||
|
.preview-image {
|
||||||
|
max-width: 100%;
|
||||||
|
max-height: 400px;
|
||||||
|
border-radius: var(--radius);
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Process Button */
|
||||||
|
.process-btn {
|
||||||
|
width: 100%;
|
||||||
|
padding: 1rem;
|
||||||
|
margin-top: 1.5rem;
|
||||||
|
background: var(--accent-gradient);
|
||||||
|
border: none;
|
||||||
|
border-radius: var(--radius);
|
||||||
|
color: white;
|
||||||
|
font-size: 1.1rem;
|
||||||
|
font-weight: 600;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.3s ease;
|
||||||
|
box-shadow: var(--shadow);
|
||||||
|
}
|
||||||
|
|
||||||
|
.process-btn:hover:not(:disabled) {
|
||||||
|
transform: translateY(-2px);
|
||||||
|
box-shadow: var(--shadow-lg);
|
||||||
|
}
|
||||||
|
|
||||||
|
.process-btn:disabled {
|
||||||
|
opacity: 0.5;
|
||||||
|
cursor: not-allowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Results Section */
|
||||||
|
.results-section {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border-radius: var(--radius-lg);
|
||||||
|
padding: 2rem;
|
||||||
|
box-shadow: var(--shadow-lg);
|
||||||
|
animation: slideUp 0.3s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
@keyframes slideUp {
|
||||||
|
from {
|
||||||
|
opacity: 0;
|
||||||
|
transform: translateY(20px);
|
||||||
|
}
|
||||||
|
|
||||||
|
to {
|
||||||
|
opacity: 1;
|
||||||
|
transform: translateY(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
margin-bottom: 1.5rem;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-header h2 {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-actions {
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn {
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius);
|
||||||
|
color: var(--text-primary);
|
||||||
|
font-size: 0.875rem;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn:hover {
|
||||||
|
background: var(--accent-primary);
|
||||||
|
border-color: var(--accent-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.action-btn.secondary {
|
||||||
|
background: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Results Table */
|
||||||
|
.results-table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-table th,
|
||||||
|
.results-table td {
|
||||||
|
padding: 0.875rem 1rem;
|
||||||
|
text-align: left;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-table th {
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-weight: 600;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.05em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-table th:first-child {
|
||||||
|
border-radius: var(--radius) 0 0 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-table th:last-child {
|
||||||
|
border-radius: 0 var(--radius) 0 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-label {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-weight: 500;
|
||||||
|
width: 40%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-value {
|
||||||
|
color: var(--text-primary);
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-table tr:hover {
|
||||||
|
background: rgba(99, 102, 241, 0.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Editable Fields */
|
||||||
|
.editable-field {
|
||||||
|
width: 100%;
|
||||||
|
padding: 0.5rem 0.75rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
color: var(--text-primary);
|
||||||
|
font-size: 0.95rem;
|
||||||
|
font-weight: 600;
|
||||||
|
font-family: inherit;
|
||||||
|
transition: all 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.editable-field:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: var(--accent-primary);
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2);
|
||||||
|
}
|
||||||
|
|
||||||
|
.editable-field::placeholder {
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-weight: 400;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Region Dropdown Styles */
|
||||||
|
.region-field-wrapper {
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5rem;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.region-field-wrapper input,
|
||||||
|
.region-field-wrapper select {
|
||||||
|
flex: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.region-dropdown {
|
||||||
|
width: 100%;
|
||||||
|
padding: 0.5rem 0.75rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
color: var(--text-primary);
|
||||||
|
font-size: 0.95rem;
|
||||||
|
font-family: inherit;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.region-dropdown:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: var(--accent-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.dropdown-toggle {
|
||||||
|
padding: 0.5rem 0.75rem;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s ease;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dropdown-toggle:hover {
|
||||||
|
background: var(--accent-primary);
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dropdown-toggle.confirmed {
|
||||||
|
background: var(--success);
|
||||||
|
color: white;
|
||||||
|
border-color: var(--success);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Validation Indicators */
|
||||||
|
.validation-status {
|
||||||
|
margin-left: 0.5rem;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.validation-status.valid-field {
|
||||||
|
color: var(--success);
|
||||||
|
}
|
||||||
|
|
||||||
|
.validation-status.invalid-field {
|
||||||
|
color: var(--warning);
|
||||||
|
}
|
||||||
|
|
||||||
|
.editable-field.valid-field {
|
||||||
|
border-color: var(--success);
|
||||||
|
}
|
||||||
|
|
||||||
|
.editable-field.invalid-field {
|
||||||
|
border-color: var(--warning);
|
||||||
|
}
|
||||||
|
|
||||||
|
.suggestion-text {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-muted);
|
||||||
|
margin-top: 0.25rem;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Raw Text Section */
|
||||||
|
.raw-text-section {
|
||||||
|
margin-top: 1.5rem;
|
||||||
|
padding-top: 1.5rem;
|
||||||
|
border-top: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
.raw-text-section h3 {
|
||||||
|
font-size: 1rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.raw-text-section pre {
|
||||||
|
background: var(--bg-primary);
|
||||||
|
padding: 1rem;
|
||||||
|
border-radius: var(--radius);
|
||||||
|
font-family: 'Consolas', monospace;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
white-space: pre-wrap;
|
||||||
|
word-wrap: break-word;
|
||||||
|
max-height: 300px;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Error Section */
|
||||||
|
.error-section {
|
||||||
|
margin-top: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.error-content {
|
||||||
|
background: rgba(239, 68, 68, 0.1);
|
||||||
|
border: 1px solid var(--error);
|
||||||
|
border-radius: var(--radius);
|
||||||
|
padding: 1rem;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.error-icon {
|
||||||
|
font-size: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.error-content p {
|
||||||
|
color: var(--error);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Footer */
|
||||||
|
footer {
|
||||||
|
text-align: center;
|
||||||
|
margin-top: 2rem;
|
||||||
|
padding-top: 1rem;
|
||||||
|
border-top: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
footer p {
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
footer a {
|
||||||
|
color: var(--accent-secondary);
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
footer a:hover {
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive */
|
||||||
|
@media (max-width: 600px) {
|
||||||
|
.container {
|
||||||
|
padding: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
header h1 {
|
||||||
|
font-size: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.upload-section,
|
||||||
|
.results-section {
|
||||||
|
padding: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-type-selector {
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-header {
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: flex-start;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-actions {
|
||||||
|
width: 100%;
|
||||||
|
justify-content: flex-start;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-label {
|
||||||
|
width: 45%;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Scrollbar */
|
||||||
|
::-webkit-scrollbar {
|
||||||
|
width: 8px;
|
||||||
|
height: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
::-webkit-scrollbar-track {
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
}
|
||||||
|
|
||||||
|
::-webkit-scrollbar-thumb {
|
||||||
|
background: var(--border);
|
||||||
|
border-radius: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
::-webkit-scrollbar-thumb:hover {
|
||||||
|
background: var(--text-muted);
|
||||||
|
}
|
||||||
570
templates/index.html
Normal file
570
templates/index.html
Normal file
@@ -0,0 +1,570 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="id">
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>OCR KTP/KK - Pembaca Dokumen Indonesia</title>
|
||||||
|
<link rel="stylesheet" href="/static/style.css">
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header>
|
||||||
|
<h1>📄 OCR KTP/KK</h1>
|
||||||
|
<p class="subtitle">Pembaca Dokumen Indonesia Offline</p>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<main>
|
||||||
|
<!-- Upload Section -->
|
||||||
|
<section class="upload-section">
|
||||||
|
<div class="doc-type-selector">
|
||||||
|
<button class="doc-btn active" data-type="ktp">
|
||||||
|
<span class="icon">🪪</span>
|
||||||
|
KTP
|
||||||
|
</button>
|
||||||
|
<button class="doc-btn" data-type="kk">
|
||||||
|
<span class="icon">👨👩👧👦</span>
|
||||||
|
Kartu Keluarga
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="dropzone" id="dropzone">
|
||||||
|
<div class="dropzone-content">
|
||||||
|
<div class="upload-icon">📷</div>
|
||||||
|
<p>Drag & drop gambar di sini</p>
|
||||||
|
<p class="hint">atau</p>
|
||||||
|
<label class="file-btn">
|
||||||
|
Pilih File
|
||||||
|
<input type="file" id="fileInput" accept="image/*" hidden>
|
||||||
|
</label>
|
||||||
|
<p class="file-types">PNG, JPG, JPEG, BMP, WEBP (max 16MB)</p>
|
||||||
|
</div>
|
||||||
|
<img id="preview" class="preview-image" style="display: none;">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button id="processBtn" class="process-btn" disabled>
|
||||||
|
<span class="btn-text">🔍 Proses OCR</span>
|
||||||
|
<span class="btn-loading" style="display: none;">⏳ Memproses...</span>
|
||||||
|
</button>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Results Section -->
|
||||||
|
<section class="results-section" id="resultsSection" style="display: none;">
|
||||||
|
<div class="results-header">
|
||||||
|
<h2>📋 Hasil Ekstraksi</h2>
|
||||||
|
<div class="results-actions">
|
||||||
|
<button class="action-btn" id="copyBtn" title="Copy JSON">📋 Copy</button>
|
||||||
|
<button class="action-btn" id="exportBtn" title="Export JSON">💾 Export</button>
|
||||||
|
<button class="action-btn secondary" id="toggleRaw">📝 Raw Text</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="results-content">
|
||||||
|
<table class="results-table" id="resultsTable">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Field</th>
|
||||||
|
<th>Nilai</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="resultsBody">
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<div class="raw-text-section" id="rawTextSection" style="display: none;">
|
||||||
|
<h3>Raw OCR Text</h3>
|
||||||
|
<pre id="rawText"></pre>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Error Section -->
|
||||||
|
<section class="error-section" id="errorSection" style="display: none;">
|
||||||
|
<div class="error-content">
|
||||||
|
<span class="error-icon">⚠️</span>
|
||||||
|
<p id="errorMessage"></p>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<footer>
|
||||||
|
<p>OCR menggunakan <a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">PaddleOCR</a> • Data
|
||||||
|
diproses secara lokal</p>
|
||||||
|
</footer>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
// State
|
||||||
|
let selectedFile = null;
|
||||||
|
let docType = 'ktp';
|
||||||
|
let extractedData = null;
|
||||||
|
|
||||||
|
// Elements
|
||||||
|
const dropzone = document.getElementById('dropzone');
|
||||||
|
const fileInput = document.getElementById('fileInput');
|
||||||
|
const preview = document.getElementById('preview');
|
||||||
|
const processBtn = document.getElementById('processBtn');
|
||||||
|
const resultsSection = document.getElementById('resultsSection');
|
||||||
|
const resultsBody = document.getElementById('resultsBody');
|
||||||
|
const rawText = document.getElementById('rawText');
|
||||||
|
const rawTextSection = document.getElementById('rawTextSection');
|
||||||
|
const errorSection = document.getElementById('errorSection');
|
||||||
|
const errorMessage = document.getElementById('errorMessage');
|
||||||
|
const docBtns = document.querySelectorAll('.doc-btn');
|
||||||
|
|
||||||
|
// Field labels untuk display
|
||||||
|
const fieldLabels = {
|
||||||
|
// KTP
|
||||||
|
'nik': 'NIK',
|
||||||
|
'nama': 'Nama',
|
||||||
|
'tempat_lahir': 'Tempat Lahir',
|
||||||
|
'tanggal_lahir': 'Tanggal Lahir',
|
||||||
|
'jenis_kelamin': 'Jenis Kelamin',
|
||||||
|
'gol_darah': 'Gol. Darah',
|
||||||
|
'alamat': 'Alamat',
|
||||||
|
'rt_rw': 'RT/RW',
|
||||||
|
'kel_desa': 'Kel/Desa',
|
||||||
|
'kecamatan': 'Kecamatan',
|
||||||
|
'agama': 'Agama',
|
||||||
|
'status_perkawinan': 'Status Perkawinan',
|
||||||
|
'pekerjaan': 'Pekerjaan',
|
||||||
|
'kewarganegaraan': 'Kewarganegaraan',
|
||||||
|
'berlaku_hingga': 'Berlaku Hingga',
|
||||||
|
'provinsi': 'Provinsi',
|
||||||
|
'kabupaten_kota': 'Kabupaten/Kota',
|
||||||
|
'tanggal_penerbitan': 'Tanggal Penerbitan',
|
||||||
|
// KK
|
||||||
|
'no_kk': 'No. KK',
|
||||||
|
'nama_kepala_keluarga': 'Kepala Keluarga',
|
||||||
|
'kode_pos': 'Kode Pos',
|
||||||
|
'anggota_keluarga': 'Jumlah Anggota'
|
||||||
|
};
|
||||||
|
|
||||||
|
// Doc type selection
|
||||||
|
docBtns.forEach(btn => {
|
||||||
|
btn.addEventListener('click', () => {
|
||||||
|
docBtns.forEach(b => b.classList.remove('active'));
|
||||||
|
btn.classList.add('active');
|
||||||
|
docType = btn.dataset.type;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Drag & drop
|
||||||
|
dropzone.addEventListener('dragover', (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
dropzone.classList.add('dragover');
|
||||||
|
});
|
||||||
|
|
||||||
|
dropzone.addEventListener('dragleave', () => {
|
||||||
|
dropzone.classList.remove('dragover');
|
||||||
|
});
|
||||||
|
|
||||||
|
dropzone.addEventListener('drop', (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
dropzone.classList.remove('dragover');
|
||||||
|
const files = e.dataTransfer.files;
|
||||||
|
if (files.length > 0) {
|
||||||
|
handleFile(files[0]);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// File input
|
||||||
|
fileInput.addEventListener('change', (e) => {
|
||||||
|
if (e.target.files.length > 0) {
|
||||||
|
handleFile(e.target.files[0]);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Click on dropzone
|
||||||
|
dropzone.addEventListener('click', (e) => {
|
||||||
|
if (e.target === dropzone || e.target.closest('.dropzone-content')) {
|
||||||
|
fileInput.click();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function handleFile(file) {
|
||||||
|
if (!file.type.startsWith('image/')) {
|
||||||
|
showError('File harus berupa gambar');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (file.size > 16 * 1024 * 1024) {
|
||||||
|
showError('Ukuran file maksimal 16MB');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
selectedFile = file;
|
||||||
|
|
||||||
|
// Show preview
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onload = (e) => {
|
||||||
|
preview.src = e.target.result;
|
||||||
|
preview.style.display = 'block';
|
||||||
|
dropzone.querySelector('.dropzone-content').style.display = 'none';
|
||||||
|
};
|
||||||
|
reader.readAsDataURL(file);
|
||||||
|
|
||||||
|
processBtn.disabled = false;
|
||||||
|
hideError();
|
||||||
|
resultsSection.style.display = 'none';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process button
|
||||||
|
processBtn.addEventListener('click', async () => {
|
||||||
|
if (!selectedFile) return;
|
||||||
|
|
||||||
|
const btnText = processBtn.querySelector('.btn-text');
|
||||||
|
const btnLoading = processBtn.querySelector('.btn-loading');
|
||||||
|
|
||||||
|
processBtn.disabled = true;
|
||||||
|
btnText.style.display = 'none';
|
||||||
|
btnLoading.style.display = 'inline';
|
||||||
|
|
||||||
|
try {
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', selectedFile);
|
||||||
|
formData.append('doc_type', docType);
|
||||||
|
|
||||||
|
const response = await fetch('/upload', {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await response.json();
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
extractedData = result.data;
|
||||||
|
displayResults(result);
|
||||||
|
hideError();
|
||||||
|
} else {
|
||||||
|
showError(result.error);
|
||||||
|
resultsSection.style.display = 'none';
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
showError('Terjadi kesalahan: ' + error.message);
|
||||||
|
} finally {
|
||||||
|
processBtn.disabled = false;
|
||||||
|
btnText.style.display = 'inline';
|
||||||
|
btnLoading.style.display = 'none';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Region fields that use dropdowns - in hierarchical order
|
||||||
|
const regionFields = ['provinsi', 'kabupaten_kota', 'kecamatan', 'kel_desa'];
|
||||||
|
let regionData = {
|
||||||
|
provinces: [],
|
||||||
|
regencies: {},
|
||||||
|
districts: {},
|
||||||
|
villages: {}
|
||||||
|
};
|
||||||
|
let validationResult = null;
|
||||||
|
|
||||||
|
// Define field display order
|
||||||
|
const fieldOrder = [
|
||||||
|
// Location hierarchy first
|
||||||
|
'provinsi', 'kabupaten_kota', 'kecamatan', 'kel_desa',
|
||||||
|
// Identity
|
||||||
|
'nik', 'nama', 'tempat_lahir', 'tanggal_lahir', 'jenis_kelamin', 'gol_darah',
|
||||||
|
// Address
|
||||||
|
'alamat', 'rt_rw',
|
||||||
|
// Other info
|
||||||
|
'agama', 'status_perkawinan', 'pekerjaan', 'kewarganegaraan', 'berlaku_hingga',
|
||||||
|
// Issue date
|
||||||
|
'tanggal_penerbitan',
|
||||||
|
// KK specific
|
||||||
|
'no_kk', 'nama_kepala_keluarga', 'kode_pos', 'anggota_keluarga'
|
||||||
|
];
|
||||||
|
|
||||||
|
async function displayResults(result) {
|
||||||
|
resultsBody.innerHTML = '';
|
||||||
|
const data = result.data;
|
||||||
|
extractedData = data;
|
||||||
|
|
||||||
|
// Validate region data first
|
||||||
|
await validateRegionData(data);
|
||||||
|
|
||||||
|
// Sort keys by fieldOrder
|
||||||
|
const sortedKeys = Object.keys(data).sort((a, b) => {
|
||||||
|
const indexA = fieldOrder.indexOf(a);
|
||||||
|
const indexB = fieldOrder.indexOf(b);
|
||||||
|
if (indexA === -1 && indexB === -1) return 0;
|
||||||
|
if (indexA === -1) return 1;
|
||||||
|
if (indexB === -1) return -1;
|
||||||
|
return indexA - indexB;
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const key of sortedKeys) {
|
||||||
|
const value = data[key];
|
||||||
|
if (key === 'anggota_keluarga') {
|
||||||
|
const count = Array.isArray(value) ? value.length : 0;
|
||||||
|
addResultRow('Jumlah Anggota', count + ' orang', null, false);
|
||||||
|
} else if (regionFields.includes(key)) {
|
||||||
|
// Region field with dropdown
|
||||||
|
const label = fieldLabels[key] || key;
|
||||||
|
await addRegionRow(label, value || '', key);
|
||||||
|
} else {
|
||||||
|
const label = fieldLabels[key] || key;
|
||||||
|
addResultRow(label, value || '', key, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rawText.textContent = result.raw_text;
|
||||||
|
resultsSection.style.display = 'block';
|
||||||
|
resultsSection.scrollIntoView({ behavior: 'smooth' });
|
||||||
|
}
|
||||||
|
|
||||||
|
async function validateRegionData(data) {
|
||||||
|
try {
|
||||||
|
const response = await fetch('/api/validate-region', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(data)
|
||||||
|
});
|
||||||
|
const result = await response.json();
|
||||||
|
if (result.success) {
|
||||||
|
validationResult = result.validation;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Validation error:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function addRegionRow(label, value, key) {
|
||||||
|
const row = document.createElement('tr');
|
||||||
|
const validation = validationResult?.[key];
|
||||||
|
const isValid = validation?.valid;
|
||||||
|
const suggestion = validation?.suggestion;
|
||||||
|
|
||||||
|
// Status indicator
|
||||||
|
const statusIcon = isValid ? '✓' : (value ? '⚠' : '');
|
||||||
|
const statusClass = isValid ? 'valid-field' : (value ? 'invalid-field' : '');
|
||||||
|
|
||||||
|
row.innerHTML = `
|
||||||
|
<td class="field-label">
|
||||||
|
${label}
|
||||||
|
<span class="validation-status ${statusClass}">${statusIcon}</span>
|
||||||
|
</td>
|
||||||
|
<td class="field-value">
|
||||||
|
<div class="region-field-wrapper">
|
||||||
|
<input type="text" class="editable-field ${statusClass}" data-key="${key}"
|
||||||
|
value="${suggestion || value || ''}" placeholder="Ketik atau pilih...">
|
||||||
|
<select class="region-dropdown" data-key="${key}" style="display: none;">
|
||||||
|
<option value="">-- Pilih --</option>
|
||||||
|
</select>
|
||||||
|
<button type="button" class="dropdown-toggle" data-key="${key}" title="Pilih dari daftar">▼</button>
|
||||||
|
</div>
|
||||||
|
${suggestion && suggestion !== value ? `<div class="suggestion-text">Saran: ${suggestion}</div>` : ''}
|
||||||
|
</td>
|
||||||
|
`;
|
||||||
|
|
||||||
|
const input = row.querySelector('input');
|
||||||
|
const select = row.querySelector('select');
|
||||||
|
const toggleBtn = row.querySelector('.dropdown-toggle');
|
||||||
|
|
||||||
|
// Input change
|
||||||
|
input.addEventListener('input', (e) => {
|
||||||
|
if (extractedData) {
|
||||||
|
extractedData[key] = e.target.value;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Toggle dropdown
|
||||||
|
toggleBtn.addEventListener('click', async () => {
|
||||||
|
if (select.style.display === 'none') {
|
||||||
|
await loadDropdownOptions(key, select);
|
||||||
|
select.style.display = 'block';
|
||||||
|
input.style.display = 'none';
|
||||||
|
} else {
|
||||||
|
select.style.display = 'none';
|
||||||
|
input.style.display = 'block';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Select change
|
||||||
|
select.addEventListener('change', (e) => {
|
||||||
|
const selectedOption = e.target.options[e.target.selectedIndex];
|
||||||
|
const selectedCode = selectedOption.value;
|
||||||
|
const selectedName = selectedOption.text !== '-- Pilih --' ? selectedOption.text : '';
|
||||||
|
|
||||||
|
input.value = selectedName;
|
||||||
|
if (extractedData) {
|
||||||
|
extractedData[key] = selectedName;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update validation result with selected code for cascading
|
||||||
|
if (!validationResult) validationResult = {};
|
||||||
|
validationResult[key] = {
|
||||||
|
valid: !!selectedCode,
|
||||||
|
code: selectedCode,
|
||||||
|
suggestion: selectedName
|
||||||
|
};
|
||||||
|
|
||||||
|
select.style.display = 'none';
|
||||||
|
input.style.display = 'block';
|
||||||
|
|
||||||
|
// Change toggle button to checkmark if valid selection
|
||||||
|
if (selectedCode) {
|
||||||
|
toggleBtn.textContent = '✓';
|
||||||
|
toggleBtn.classList.add('confirmed');
|
||||||
|
input.classList.remove('invalid-field');
|
||||||
|
input.classList.add('valid-field');
|
||||||
|
} else {
|
||||||
|
toggleBtn.textContent = '▼';
|
||||||
|
toggleBtn.classList.remove('confirmed');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear dependent fields and their codes
|
||||||
|
clearDependentFields(key);
|
||||||
|
});
|
||||||
|
|
||||||
|
resultsBody.appendChild(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadDropdownOptions(key, select) {
|
||||||
|
select.innerHTML = '<option value="">Loading...</option>';
|
||||||
|
|
||||||
|
try {
|
||||||
|
let data = [];
|
||||||
|
|
||||||
|
if (key === 'provinsi') {
|
||||||
|
if (!regionData.provinces.length) {
|
||||||
|
const res = await fetch('/api/provinces');
|
||||||
|
const json = await res.json();
|
||||||
|
regionData.provinces = json.data || [];
|
||||||
|
}
|
||||||
|
data = regionData.provinces;
|
||||||
|
} else if (key === 'kabupaten_kota') {
|
||||||
|
const provCode = validationResult?.provinsi?.code;
|
||||||
|
if (provCode) {
|
||||||
|
if (!regionData.regencies[provCode]) {
|
||||||
|
const res = await fetch(`/api/regencies/${provCode}`);
|
||||||
|
const json = await res.json();
|
||||||
|
regionData.regencies[provCode] = json.data || [];
|
||||||
|
}
|
||||||
|
data = regionData.regencies[provCode];
|
||||||
|
}
|
||||||
|
} else if (key === 'kecamatan') {
|
||||||
|
const regCode = validationResult?.kabupaten_kota?.code;
|
||||||
|
if (regCode) {
|
||||||
|
if (!regionData.districts[regCode]) {
|
||||||
|
const res = await fetch(`/api/districts/${regCode}`);
|
||||||
|
const json = await res.json();
|
||||||
|
regionData.districts[regCode] = json.data || [];
|
||||||
|
}
|
||||||
|
data = regionData.districts[regCode];
|
||||||
|
}
|
||||||
|
} else if (key === 'kel_desa') {
|
||||||
|
const distCode = validationResult?.kecamatan?.code;
|
||||||
|
if (distCode) {
|
||||||
|
if (!regionData.villages[distCode]) {
|
||||||
|
const res = await fetch(`/api/villages/${distCode}`);
|
||||||
|
const json = await res.json();
|
||||||
|
regionData.villages[distCode] = json.data || [];
|
||||||
|
}
|
||||||
|
data = regionData.villages[distCode];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
select.innerHTML = '<option value="">-- Pilih --</option>';
|
||||||
|
data.forEach(item => {
|
||||||
|
const option = document.createElement('option');
|
||||||
|
option.value = item.code;
|
||||||
|
option.textContent = item.name;
|
||||||
|
select.appendChild(option);
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
select.innerHTML = '<option value="">Error loading data</option>';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function clearDependentFields(key) {
|
||||||
|
const dependents = {
|
||||||
|
'provinsi': ['kabupaten_kota', 'kecamatan', 'kel_desa'],
|
||||||
|
'kabupaten_kota': ['kecamatan', 'kel_desa'],
|
||||||
|
'kecamatan': ['kel_desa']
|
||||||
|
};
|
||||||
|
|
||||||
|
(dependents[key] || []).forEach(depKey => {
|
||||||
|
const input = document.querySelector(`input[data-key="${depKey}"]`);
|
||||||
|
if (input) input.value = '';
|
||||||
|
if (extractedData) extractedData[depKey] = '';
|
||||||
|
// Clear validation code for cascading
|
||||||
|
if (validationResult && validationResult[depKey]) {
|
||||||
|
validationResult[depKey] = { valid: false, code: null, suggestion: null };
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function addResultRow(label, value, key, editable = true) {
|
||||||
|
const row = document.createElement('tr');
|
||||||
|
if (editable && key) {
|
||||||
|
row.innerHTML = `
|
||||||
|
<td class="field-label">${label}</td>
|
||||||
|
<td class="field-value">
|
||||||
|
<input type="text" class="editable-field" data-key="${key}" value="${value || ''}" placeholder="Klik untuk edit...">
|
||||||
|
</td>
|
||||||
|
`;
|
||||||
|
const input = row.querySelector('input');
|
||||||
|
input.addEventListener('input', (e) => {
|
||||||
|
if (extractedData && key) {
|
||||||
|
extractedData[key] = e.target.value;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
row.innerHTML = `
|
||||||
|
<td class="field-label">${label}</td>
|
||||||
|
<td class="field-value">${value || '-'}</td>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
resultsBody.appendChild(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Toggle raw text
|
||||||
|
document.getElementById('toggleRaw').addEventListener('click', () => {
|
||||||
|
const isVisible = rawTextSection.style.display !== 'none';
|
||||||
|
rawTextSection.style.display = isVisible ? 'none' : 'block';
|
||||||
|
});
|
||||||
|
|
||||||
|
// Copy to clipboard
|
||||||
|
document.getElementById('copyBtn').addEventListener('click', () => {
|
||||||
|
if (extractedData) {
|
||||||
|
navigator.clipboard.writeText(JSON.stringify(extractedData, null, 2))
|
||||||
|
.then(() => alert('Data berhasil disalin!'));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Export JSON
|
||||||
|
document.getElementById('exportBtn').addEventListener('click', () => {
|
||||||
|
if (extractedData) {
|
||||||
|
const blob = new Blob([JSON.stringify(extractedData, null, 2)], { type: 'application/json' });
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
const a = document.createElement('a');
|
||||||
|
a.href = url;
|
||||||
|
a.download = `${docType}_data.json`;
|
||||||
|
a.click();
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function showError(message) {
|
||||||
|
errorMessage.textContent = message;
|
||||||
|
errorSection.style.display = 'block';
|
||||||
|
}
|
||||||
|
|
||||||
|
function hideError() {
|
||||||
|
errorSection.style.display = 'none';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset on new file selection
|
||||||
|
preview.addEventListener('click', () => {
|
||||||
|
preview.style.display = 'none';
|
||||||
|
dropzone.querySelector('.dropzone-content').style.display = 'flex';
|
||||||
|
selectedFile = null;
|
||||||
|
processBtn.disabled = true;
|
||||||
|
fileInput.value = '';
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
||||||
Reference in New Issue
Block a user