OCR dengan ZONA
This commit is contained in:
BIN
__pycache__/kk_extractor.cpython-313.pyc
Normal file
BIN
__pycache__/kk_extractor.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/ktp_extractor.cpython-313.pyc
Normal file
BIN
__pycache__/ktp_extractor.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/ocr_engine.cpython-313.pyc
Normal file
BIN
__pycache__/ocr_engine.cpython-313.pyc
Normal file
Binary file not shown.
253
app.py
Normal file
253
app.py
Normal file
@@ -0,0 +1,253 @@
|
||||
"""
|
||||
Flask Web Server untuk OCR KTP/KK
|
||||
"""
|
||||
|
||||
import os
|
||||
from flask import Flask, render_template, request, jsonify
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from ocr_engine import get_ocr_engine
|
||||
from ktp_extractor import KTPExtractor
|
||||
from kk_extractor import KKExtractor
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# Konfigurasi
|
||||
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads')
|
||||
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'bmp', 'webp'}
|
||||
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max
|
||||
|
||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
||||
app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
|
||||
|
||||
# Buat folder upload jika belum ada
|
||||
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
||||
|
||||
# Inisialisasi extractors
|
||||
ktp_extractor = KTPExtractor()
|
||||
kk_extractor = KKExtractor()
|
||||
|
||||
|
||||
def allowed_file(filename):
|
||||
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
"""Halaman utama"""
|
||||
return render_template('index.html')
|
||||
|
||||
|
||||
@app.route('/upload', methods=['POST'])
|
||||
def upload_file():
|
||||
"""Handle upload dan proses OCR"""
|
||||
try:
|
||||
# Cek file
|
||||
if 'file' not in request.files:
|
||||
return jsonify({'success': False, 'error': 'Tidak ada file yang diupload'}), 400
|
||||
|
||||
file = request.files['file']
|
||||
doc_type = request.form.get('doc_type', 'ktp')
|
||||
|
||||
if file.filename == '':
|
||||
return jsonify({'success': False, 'error': 'Nama file kosong'}), 400
|
||||
|
||||
if not allowed_file(file.filename):
|
||||
return jsonify({'success': False, 'error': 'Format file tidak didukung. Gunakan PNG, JPG, JPEG, BMP, atau WEBP'}), 400
|
||||
|
||||
# Simpan file
|
||||
filename = secure_filename(file.filename)
|
||||
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
||||
file.save(filepath)
|
||||
|
||||
try:
|
||||
# Jalankan OCR
|
||||
ocr_engine = get_ocr_engine()
|
||||
ocr_results = ocr_engine.extract_text(filepath)
|
||||
|
||||
if not ocr_results:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'Tidak dapat membaca teks dari gambar. Pastikan gambar jelas dan tidak blur.'
|
||||
}), 400
|
||||
|
||||
# Ekstrak field berdasarkan jenis dokumen
|
||||
if doc_type == 'ktp':
|
||||
extracted = ktp_extractor.extract(ocr_results)
|
||||
else:
|
||||
extracted = kk_extractor.extract(ocr_results)
|
||||
|
||||
# Raw text untuk debugging
|
||||
raw_text = '\n'.join([r['text'] for r in ocr_results])
|
||||
|
||||
# DEBUG: Print raw OCR results
|
||||
print("\n" + "="*50)
|
||||
print("DEBUG: Raw OCR Results")
|
||||
print("="*50)
|
||||
for i, r in enumerate(ocr_results):
|
||||
print(f"[{i}] {r['text']}")
|
||||
print("="*50 + "\n")
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'doc_type': doc_type,
|
||||
'data': extracted,
|
||||
'raw_text': raw_text,
|
||||
'ocr_count': len(ocr_results)
|
||||
})
|
||||
|
||||
finally:
|
||||
# Hapus file setelah proses (untuk keamanan data pribadi)
|
||||
if os.path.exists(filepath):
|
||||
os.remove(filepath)
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
|
||||
# ============================================
|
||||
# Region Data API (using wilayah.id)
|
||||
# ============================================
|
||||
import requests
|
||||
from functools import lru_cache
|
||||
|
||||
WILAYAH_API_BASE = "https://wilayah.id/api"
|
||||
|
||||
@lru_cache(maxsize=100)
|
||||
def fetch_region_data(endpoint):
|
||||
"""Fetch region data with caching"""
|
||||
try:
|
||||
response = requests.get(f"{WILAYAH_API_BASE}/{endpoint}", timeout=10)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Error fetching region data: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def normalize_name(name):
|
||||
"""Normalize name for comparison"""
|
||||
if not name:
|
||||
return ""
|
||||
return name.upper().strip().replace(".", "").replace(" ", "")
|
||||
|
||||
|
||||
def find_best_match(search_name, items, key='name'):
|
||||
"""Find best matching item by name (fuzzy matching)"""
|
||||
if not search_name or not items:
|
||||
return None
|
||||
|
||||
search_norm = normalize_name(search_name)
|
||||
|
||||
# Try exact match first
|
||||
for item in items:
|
||||
if normalize_name(item.get(key, '')) == search_norm:
|
||||
return item
|
||||
|
||||
# Try contains match
|
||||
for item in items:
|
||||
item_norm = normalize_name(item.get(key, ''))
|
||||
if search_norm in item_norm or item_norm in search_norm:
|
||||
return item
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@app.route('/api/provinces')
|
||||
def get_provinces():
|
||||
"""Get all provinces"""
|
||||
data = fetch_region_data("provinces.json")
|
||||
if data:
|
||||
return jsonify(data)
|
||||
return jsonify({'data': []}), 500
|
||||
|
||||
|
||||
@app.route('/api/regencies/<province_code>')
|
||||
def get_regencies(province_code):
|
||||
"""Get cities/regencies by province code"""
|
||||
data = fetch_region_data(f"regencies/{province_code}.json")
|
||||
if data:
|
||||
return jsonify(data)
|
||||
return jsonify({'data': []}), 500
|
||||
|
||||
|
||||
@app.route('/api/districts/<regency_code>')
|
||||
def get_districts(regency_code):
|
||||
"""Get districts by regency code"""
|
||||
data = fetch_region_data(f"districts/{regency_code}.json")
|
||||
if data:
|
||||
return jsonify(data)
|
||||
return jsonify({'data': []}), 500
|
||||
|
||||
|
||||
@app.route('/api/villages/<district_code>')
|
||||
def get_villages(district_code):
|
||||
"""Get villages by district code"""
|
||||
data = fetch_region_data(f"villages/{district_code}.json")
|
||||
if data:
|
||||
return jsonify(data)
|
||||
return jsonify({'data': []}), 500
|
||||
|
||||
|
||||
@app.route('/api/validate-region', methods=['POST'])
|
||||
def validate_region():
|
||||
"""Validate OCR region data against official database"""
|
||||
try:
|
||||
ocr_data = request.json
|
||||
result = {
|
||||
'provinsi': {'valid': False, 'code': None, 'suggestion': None},
|
||||
'kabupaten_kota': {'valid': False, 'code': None, 'suggestion': None},
|
||||
'kecamatan': {'valid': False, 'code': None, 'suggestion': None},
|
||||
'kel_desa': {'valid': False, 'code': None, 'suggestion': None}
|
||||
}
|
||||
|
||||
# Validate province
|
||||
provinces_data = fetch_region_data("provinces.json")
|
||||
if provinces_data and 'data' in provinces_data:
|
||||
match = find_best_match(ocr_data.get('provinsi'), provinces_data['data'])
|
||||
if match:
|
||||
result['provinsi'] = {'valid': True, 'code': match['code'], 'suggestion': match['name']}
|
||||
|
||||
# Validate regency
|
||||
regencies_data = fetch_region_data(f"regencies/{match['code']}.json")
|
||||
if regencies_data and 'data' in regencies_data:
|
||||
reg_match = find_best_match(ocr_data.get('kabupaten_kota'), regencies_data['data'])
|
||||
if reg_match:
|
||||
result['kabupaten_kota'] = {'valid': True, 'code': reg_match['code'], 'suggestion': reg_match['name']}
|
||||
|
||||
# Validate district
|
||||
districts_data = fetch_region_data(f"districts/{reg_match['code']}.json")
|
||||
if districts_data and 'data' in districts_data:
|
||||
dist_match = find_best_match(ocr_data.get('kecamatan'), districts_data['data'])
|
||||
if dist_match:
|
||||
result['kecamatan'] = {'valid': True, 'code': dist_match['code'], 'suggestion': dist_match['name']}
|
||||
|
||||
# Validate village
|
||||
villages_data = fetch_region_data(f"villages/{dist_match['code']}.json")
|
||||
if villages_data and 'data' in villages_data:
|
||||
vil_match = find_best_match(ocr_data.get('kel_desa'), villages_data['data'])
|
||||
if vil_match:
|
||||
result['kel_desa'] = {'valid': True, 'code': vil_match['code'], 'suggestion': vil_match['name']}
|
||||
|
||||
return jsonify({'success': True, 'validation': result})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/health')
|
||||
def health():
|
||||
"""Health check endpoint"""
|
||||
return jsonify({'status': 'ok'})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("="*50)
|
||||
print("OCR KTP/KK Application")
|
||||
print("="*50)
|
||||
print("Membuka: http://localhost:5000")
|
||||
print("Tekan Ctrl+C untuk berhenti")
|
||||
print("="*50)
|
||||
|
||||
app.run(host='0.0.0.0', port=5000, debug=True)
|
||||
235
kk_extractor.py
Normal file
235
kk_extractor.py
Normal file
@@ -0,0 +1,235 @@
|
||||
"""
|
||||
KK (Kartu Keluarga) Field Extractor
|
||||
Ekstraksi data terstruktur dari hasil OCR KK Indonesia
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Dict, Optional, List
|
||||
|
||||
|
||||
class KKExtractor:
|
||||
"""Ekstrak field dari hasil OCR Kartu Keluarga"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def extract(self, ocr_results: List[Dict]) -> Dict:
|
||||
"""
|
||||
Ekstrak field KK dari hasil OCR
|
||||
|
||||
Args:
|
||||
ocr_results: List hasil dari OCREngine.extract_text()
|
||||
|
||||
Returns:
|
||||
Dict dengan field KK
|
||||
"""
|
||||
all_text = '\n'.join([r['text'] for r in ocr_results])
|
||||
|
||||
result = {
|
||||
'no_kk': None,
|
||||
'nama_kepala_keluarga': None,
|
||||
'alamat': None,
|
||||
'rt_rw': None,
|
||||
'kel_desa': None,
|
||||
'kecamatan': None,
|
||||
'kabupaten_kota': None,
|
||||
'provinsi': None,
|
||||
'kode_pos': None,
|
||||
'anggota_keluarga': [],
|
||||
}
|
||||
|
||||
# Ekstrak No KK (16 digit)
|
||||
kk_match = re.search(r'\b(\d{16})\b', all_text)
|
||||
if kk_match:
|
||||
result['no_kk'] = kk_match.group(1)
|
||||
|
||||
# Track untuk deteksi tabel anggota
|
||||
in_table = False
|
||||
table_start_y = None
|
||||
|
||||
for i, ocr in enumerate(ocr_results):
|
||||
text = ocr['text'].strip()
|
||||
text_lower = text.lower()
|
||||
y_pos = ocr.get('y_center', 0)
|
||||
|
||||
# Provinsi
|
||||
if 'provinsi' in text_lower and result['provinsi'] is None:
|
||||
result['provinsi'] = self._extract_value(text, 'provinsi')
|
||||
|
||||
# Kabupaten/Kota
|
||||
if ('kabupaten' in text_lower or 'kota' in text_lower) and result['kabupaten_kota'] is None:
|
||||
val = self._extract_value(text, 'kabupaten') or self._extract_value(text, 'kota')
|
||||
if val:
|
||||
result['kabupaten_kota'] = val
|
||||
else:
|
||||
result['kabupaten_kota'] = text
|
||||
|
||||
# Kecamatan
|
||||
if 'kecamatan' in text_lower and result['kecamatan'] is None:
|
||||
result['kecamatan'] = self._extract_value(text, 'kecamatan')
|
||||
|
||||
# Kelurahan/Desa
|
||||
if ('kelurahan' in text_lower or 'desa' in text_lower) and result['kel_desa'] is None:
|
||||
result['kel_desa'] = self._extract_value(text, 'kelurahan') or self._extract_value(text, 'desa')
|
||||
|
||||
# No. KK dengan label
|
||||
if 'no' in text_lower and ('kk' in text_lower or 'kartu' in text_lower):
|
||||
# Cari 16 digit di text ini atau text berikutnya
|
||||
match = re.search(r'(\d{16})', text)
|
||||
if match:
|
||||
result['no_kk'] = match.group(1)
|
||||
elif i + 1 < len(ocr_results):
|
||||
next_text = ocr_results[i + 1]['text']
|
||||
match = re.search(r'(\d{16})', next_text)
|
||||
if match:
|
||||
result['no_kk'] = match.group(1)
|
||||
|
||||
# Nama Kepala Keluarga
|
||||
if 'kepala' in text_lower and 'keluarga' in text_lower:
|
||||
result['nama_kepala_keluarga'] = self._extract_value(text, 'keluarga')
|
||||
if not result['nama_kepala_keluarga'] and i + 1 < len(ocr_results):
|
||||
# Nama mungkin di baris berikutnya
|
||||
next_text = ocr_results[i + 1]['text'].strip()
|
||||
if not any(kw in next_text.lower() for kw in ['alamat', 'rt', 'rw', 'provinsi']):
|
||||
result['nama_kepala_keluarga'] = next_text
|
||||
|
||||
# Alamat
|
||||
if 'alamat' in text_lower and result['alamat'] is None:
|
||||
result['alamat'] = self._extract_value(text, 'alamat')
|
||||
|
||||
# RT/RW
|
||||
rt_rw_match = re.search(r'rt\s*/?\s*rw\s*[:\s]*(\d+)\s*/\s*(\d+)', text_lower)
|
||||
if rt_rw_match:
|
||||
result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}"
|
||||
|
||||
# Kode Pos
|
||||
if 'kode' in text_lower and 'pos' in text_lower:
|
||||
match = re.search(r'(\d{5})', text)
|
||||
if match:
|
||||
result['kode_pos'] = match.group(1)
|
||||
|
||||
# Deteksi header tabel anggota keluarga
|
||||
if self._is_table_header(text_lower):
|
||||
in_table = True
|
||||
table_start_y = y_pos
|
||||
continue
|
||||
|
||||
# Ekstrak anggota keluarga dari tabel
|
||||
if in_table and table_start_y:
|
||||
member = self._extract_member(text, ocr_results, i)
|
||||
if member:
|
||||
result['anggota_keluarga'].append(member)
|
||||
|
||||
# Post-processing
|
||||
result = self._post_process(result)
|
||||
|
||||
return result
|
||||
|
||||
def _extract_value(self, text: str, field: str) -> Optional[str]:
|
||||
"""Ekstrak nilai setelah label field"""
|
||||
patterns = [
|
||||
rf'{field}[a-z]*\s*:\s*(.+)',
|
||||
rf'{field}[a-z]*\s+(.+)',
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
value = match.group(1).strip()
|
||||
value = re.sub(r'^[:\s]+', '', value)
|
||||
if value:
|
||||
return value
|
||||
return None
|
||||
|
||||
def _is_table_header(self, text: str) -> bool:
|
||||
"""Cek apakah teks adalah header tabel anggota"""
|
||||
header_keywords = ['no', 'nama lengkap', 'nik', 'jenis kelamin', 'hubungan']
|
||||
count = sum(1 for kw in header_keywords if kw in text)
|
||||
return count >= 2
|
||||
|
||||
def _extract_member(self, text: str, all_results: List[Dict], current_idx: int) -> Optional[Dict]:
|
||||
"""Ekstrak data anggota keluarga dari baris tabel"""
|
||||
# Cari NIK di text
|
||||
nik_match = re.search(r'\b(\d{16})\b', text)
|
||||
if not nik_match:
|
||||
return None
|
||||
|
||||
member = {
|
||||
'nik': nik_match.group(1),
|
||||
'nama': None,
|
||||
'jenis_kelamin': None,
|
||||
'tempat_lahir': None,
|
||||
'tanggal_lahir': None,
|
||||
'hubungan': None,
|
||||
}
|
||||
|
||||
# Cari teks di sekitar yang mungkin nama atau info lain
|
||||
text_parts = text.split()
|
||||
|
||||
# Deteksi jenis kelamin
|
||||
if 'laki' in text.lower() or ' l ' in f' {text.lower()} ':
|
||||
member['jenis_kelamin'] = 'LAKI-LAKI'
|
||||
elif 'perempuan' in text.lower() or ' p ' in f' {text.lower()} ':
|
||||
member['jenis_kelamin'] = 'PEREMPUAN'
|
||||
|
||||
# Deteksi hubungan keluarga
|
||||
hubungan_keywords = {
|
||||
'kepala': 'KEPALA KELUARGA',
|
||||
'istri': 'ISTRI',
|
||||
'suami': 'SUAMI',
|
||||
'anak': 'ANAK',
|
||||
'menantu': 'MENANTU',
|
||||
'cucu': 'CUCU',
|
||||
'orang tua': 'ORANG TUA',
|
||||
'mertua': 'MERTUA',
|
||||
}
|
||||
|
||||
for keyword, value in hubungan_keywords.items():
|
||||
if keyword in text.lower():
|
||||
member['hubungan'] = value
|
||||
break
|
||||
|
||||
return member
|
||||
|
||||
def _post_process(self, result: Dict) -> Dict:
|
||||
"""Post-processing hasil ekstraksi"""
|
||||
# Validasi No KK
|
||||
if result['no_kk'] and not re.match(r'^\d{16}$', result['no_kk']):
|
||||
cleaned = re.sub(r'\D', '', result['no_kk'])
|
||||
if len(cleaned) == 16:
|
||||
result['no_kk'] = cleaned
|
||||
else:
|
||||
result['no_kk'] = None
|
||||
|
||||
# Uppercase field teks
|
||||
for field in ['nama_kepala_keluarga', 'alamat', 'kel_desa', 'kecamatan',
|
||||
'kabupaten_kota', 'provinsi']:
|
||||
if result[field]:
|
||||
result[field] = result[field].upper()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test
|
||||
sample_ocr = [
|
||||
{'text': 'KARTU KELUARGA', 'y_center': 10},
|
||||
{'text': 'No. 3204012345678901', 'y_center': 30},
|
||||
{'text': 'Nama Kepala Keluarga : JOHN DOE', 'y_center': 50},
|
||||
{'text': 'Alamat : JL. MERDEKA NO. 123', 'y_center': 70},
|
||||
{'text': 'RT/RW : 001/002', 'y_center': 90},
|
||||
{'text': 'Desa/Kelurahan : SUKAMAJU', 'y_center': 110},
|
||||
{'text': 'Kecamatan : SUKASARI', 'y_center': 130},
|
||||
{'text': 'Kabupaten/Kota : BANDUNG', 'y_center': 150},
|
||||
{'text': 'Provinsi : JAWA BARAT', 'y_center': 170},
|
||||
{'text': 'Kode Pos : 40154', 'y_center': 190},
|
||||
]
|
||||
|
||||
extractor = KKExtractor()
|
||||
result = extractor.extract(sample_ocr)
|
||||
|
||||
for key, value in result.items():
|
||||
if key != 'anggota_keluarga':
|
||||
print(f"{key}: {value}")
|
||||
|
||||
print(f"\nAnggota Keluarga: {len(result['anggota_keluarga'])} orang")
|
||||
602
ktp_extractor.py
Normal file
602
ktp_extractor.py
Normal file
@@ -0,0 +1,602 @@
|
||||
"""
|
||||
KTP Field Extractor
|
||||
Ekstraksi data terstruktur dari hasil OCR KTP Indonesia
|
||||
Mendukung berbagai format output OCR (full-width colon, standard colon, tanpa colon)
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Dict, Optional, List
|
||||
|
||||
|
||||
class KTPExtractor:
|
||||
"""Ekstrak field dari hasil OCR KTP"""
|
||||
|
||||
# Pattern colon yang berbeda-beda (standard, full-width, dll)
|
||||
COLON_PATTERN = r'[:\:]'
|
||||
|
||||
# Keywords untuk jenis kelamin
|
||||
MALE_KEYWORDS = ['laki', 'pria', 'male']
|
||||
FEMALE_KEYWORDS = ['perempuan', 'wanita', 'female']
|
||||
|
||||
# Agama yang valid
|
||||
AGAMA_LIST = ['islam', 'kristen', 'katolik', 'hindu', 'budha', 'buddha', 'konghucu']
|
||||
|
||||
# Pekerjaan umum
|
||||
PEKERJAAN_LIST = ['pelajar', 'mahasiswa', 'pegawai', 'swasta', 'pns', 'wiraswasta',
|
||||
'buruh', 'petani', 'nelayan', 'karyawan', 'ibu rumah tangga',
|
||||
'tidak bekerja', 'lainnya', 'mengurus rumah tangga']
|
||||
|
||||
# KTP Zone Template (normalized coordinates: x_min, y_min, x_max, y_max)
|
||||
# Based on standard KTP layout
|
||||
ZONES = {
|
||||
'header_provinsi': (0.15, 0.00, 0.85, 0.07), # PROVINSI header
|
||||
'header_kabupaten': (0.15, 0.05, 0.85, 0.13), # KABUPATEN header
|
||||
'nik': (0.02, 0.10, 0.70, 0.22), # NIK area
|
||||
'nama': (0.02, 0.18, 0.70, 0.28), # Nama area
|
||||
'ttl': (0.02, 0.25, 0.70, 0.36), # Tempat/Tgl Lahir
|
||||
'jenis_kelamin': (0.02, 0.33, 0.45, 0.42), # Jenis Kelamin (left)
|
||||
'gol_darah': (0.40, 0.33, 0.70, 0.42), # Gol Darah (right of jenis)
|
||||
'alamat': (0.02, 0.38, 0.70, 0.50), # Alamat
|
||||
'rt_rw': (0.02, 0.46, 0.70, 0.54), # RT/RW
|
||||
'kel_desa': (0.02, 0.51, 0.70, 0.60), # Kel/Desa
|
||||
'kecamatan': (0.02, 0.57, 0.70, 0.66), # Kecamatan
|
||||
'agama': (0.02, 0.63, 0.70, 0.72), # Agama
|
||||
'status': (0.02, 0.69, 0.70, 0.78), # Status Perkawinan
|
||||
'pekerjaan': (0.02, 0.75, 0.70, 0.84), # Pekerjaan
|
||||
'wni': (0.02, 0.81, 0.70, 0.90), # Kewarganegaraan
|
||||
'berlaku': (0.02, 0.87, 0.70, 0.96), # Berlaku Hingga
|
||||
'foto': (0.68, 0.10, 0.98, 0.55), # Foto (right side)
|
||||
'penerbitan': (0.65, 0.58, 0.98, 0.98), # Tempat & Tanggal penerbitan
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.image_width = 0
|
||||
self.image_height = 0
|
||||
|
||||
def _get_zone(self, x_center: float, y_center: float, img_width: int, img_height: int) -> Optional[str]:
|
||||
"""Determine which zone a text belongs to based on normalized coordinates"""
|
||||
if img_width == 0 or img_height == 0:
|
||||
return None
|
||||
|
||||
# Normalize coordinates
|
||||
x_norm = x_center / img_width
|
||||
y_norm = y_center / img_height
|
||||
|
||||
for zone_name, (x_min, y_min, x_max, y_max) in self.ZONES.items():
|
||||
if x_min <= x_norm <= x_max and y_min <= y_norm <= y_max:
|
||||
return zone_name
|
||||
return None
|
||||
|
||||
def _extract_value_from_text(self, text: str) -> str:
|
||||
"""Extract value part from label:value text"""
|
||||
# Split by colon (standard or full-width)
|
||||
parts = re.split(r'[::]', text, 1)
|
||||
if len(parts) > 1:
|
||||
return parts[1].strip()
|
||||
return text.strip()
|
||||
|
||||
def _detect_image_size(self, ocr_results: List[Dict]) -> tuple:
|
||||
"""Detect image dimensions from bounding boxes"""
|
||||
max_x, max_y = 0, 0
|
||||
for r in ocr_results:
|
||||
bbox = r.get('bbox', [])
|
||||
if bbox and len(bbox) >= 4:
|
||||
for point in bbox:
|
||||
if len(point) >= 2:
|
||||
max_x = max(max_x, point[0])
|
||||
max_y = max(max_y, point[1])
|
||||
# Add some margin
|
||||
return (int(max_x * 1.05), int(max_y * 1.05)) if max_x > 0 else (1000, 640)
|
||||
|
||||
def _extract_by_zones(self, zone_texts: Dict[str, List[str]], result: Dict):
|
||||
"""Extract fields based on zone assignments"""
|
||||
|
||||
# PROVINSI from header
|
||||
if 'header_provinsi' in zone_texts:
|
||||
for text in zone_texts['header_provinsi']:
|
||||
if 'provinsi' in text.lower():
|
||||
val = re.sub(r'(?i)provinsi\s*', '', text).strip()
|
||||
if val:
|
||||
result['provinsi'] = val.upper()
|
||||
break
|
||||
|
||||
# KABUPATEN/KOTA from header
|
||||
if 'header_kabupaten' in zone_texts:
|
||||
for text in zone_texts['header_kabupaten']:
|
||||
text_lower = text.lower()
|
||||
if 'kabupaten' in text_lower or 'kota' in text_lower:
|
||||
val = re.sub(r'(?i)(kabupaten|kota)\s*', '', text).strip()
|
||||
if val:
|
||||
result['kabupaten_kota'] = val.upper()
|
||||
else:
|
||||
result['kabupaten_kota'] = text.upper()
|
||||
break
|
||||
|
||||
# NAMA from nama zone (skip label line)
|
||||
if 'nama' in zone_texts:
|
||||
for text in zone_texts['nama']:
|
||||
text_lower = text.lower()
|
||||
if 'nama' not in text_lower and len(text) > 2:
|
||||
result['nama'] = text.upper()
|
||||
break
|
||||
elif 'nama' in text_lower:
|
||||
val = self._extract_value_from_text(text)
|
||||
if val and 'nama' not in val.lower():
|
||||
result['nama'] = val.upper()
|
||||
|
||||
# TTL from ttl zone
|
||||
if 'ttl' in zone_texts:
|
||||
for text in zone_texts['ttl']:
|
||||
if 'tempat' in text.lower() or 'lahir' in text.lower():
|
||||
val = self._extract_value_from_text(text)
|
||||
if val:
|
||||
self._parse_ttl(val, result)
|
||||
break
|
||||
|
||||
# JENIS KELAMIN
|
||||
if 'jenis_kelamin' in zone_texts:
|
||||
for text in zone_texts['jenis_kelamin']:
|
||||
text_lower = text.lower()
|
||||
if 'laki' in text_lower:
|
||||
result['jenis_kelamin'] = 'LAKI-LAKI'
|
||||
break
|
||||
elif 'perempuan' in text_lower:
|
||||
result['jenis_kelamin'] = 'PEREMPUAN'
|
||||
break
|
||||
|
||||
# GOL DARAH
|
||||
if 'gol_darah' in zone_texts:
|
||||
for text in zone_texts['gol_darah']:
|
||||
gol_match = re.search(r'([ABO]{1,2}[+\-]?)', text, re.IGNORECASE)
|
||||
if gol_match:
|
||||
result['gol_darah'] = gol_match.group(1).upper()
|
||||
break
|
||||
|
||||
# ALAMAT
|
||||
if 'alamat' in zone_texts:
|
||||
for text in zone_texts['alamat']:
|
||||
if 'alamat' not in text.lower() or len(zone_texts['alamat']) == 1:
|
||||
val = self._extract_value_from_text(text) if 'alamat' in text.lower() else text
|
||||
if val and 'alamat' not in val.lower():
|
||||
result['alamat'] = val.upper()
|
||||
break
|
||||
|
||||
# PENERBITAN area (tempat & tanggal dalam satu zona)
|
||||
if 'penerbitan' in zone_texts:
|
||||
for text in zone_texts['penerbitan']:
|
||||
# Look for date
|
||||
date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})', text)
|
||||
if date_match and result['tanggal_penerbitan'] is None:
|
||||
result['tanggal_penerbitan'] = date_match.group(1)
|
||||
|
||||
def extract(self, ocr_results: List[Dict]) -> Dict[str, Optional[str]]:
|
||||
"""
|
||||
Ekstrak field KTP dari hasil OCR dengan template-based zone detection
|
||||
|
||||
Args:
|
||||
ocr_results: List hasil dari OCREngine.extract_text()
|
||||
|
||||
Returns:
|
||||
Dict dengan field KTP
|
||||
"""
|
||||
result = {
|
||||
'nik': None,
|
||||
'nama': None,
|
||||
'tempat_lahir': None,
|
||||
'tanggal_lahir': None,
|
||||
'jenis_kelamin': None,
|
||||
'gol_darah': None,
|
||||
'alamat': None,
|
||||
'rt_rw': None,
|
||||
'kel_desa': None,
|
||||
'kecamatan': None,
|
||||
'agama': None,
|
||||
'status_perkawinan': None,
|
||||
'pekerjaan': None,
|
||||
'kewarganegaraan': None,
|
||||
'berlaku_hingga': None,
|
||||
'provinsi': None,
|
||||
'kabupaten_kota': None,
|
||||
'tanggal_penerbitan': None,
|
||||
}
|
||||
|
||||
# Detect image dimensions from bounding boxes
|
||||
img_width, img_height = self._detect_image_size(ocr_results)
|
||||
|
||||
# Assign zones to each OCR result
|
||||
zone_texts = {} # zone_name -> list of texts
|
||||
for r in ocr_results:
|
||||
x_center = r.get('x_center', 0)
|
||||
y_center = r.get('y_center', 0)
|
||||
zone = self._get_zone(x_center, y_center, img_width, img_height)
|
||||
if zone:
|
||||
if zone not in zone_texts:
|
||||
zone_texts[zone] = []
|
||||
zone_texts[zone].append(r['text'])
|
||||
|
||||
# Debug: print zone assignments
|
||||
print("\n[DEBUG KTPExtractor] Zone assignments:")
|
||||
for zone, texts in zone_texts.items():
|
||||
print(f" {zone}: {texts}")
|
||||
|
||||
# Extract fields using zone-based approach
|
||||
self._extract_by_zones(zone_texts, result)
|
||||
|
||||
# Gabungkan semua teks untuk fallback pattern matching
|
||||
texts = [r['text'].strip() for r in ocr_results]
|
||||
all_text = '\n'.join(texts)
|
||||
|
||||
# Ekstrak NIK (16 digit) - bisa ada di mana saja
|
||||
nik_match = re.search(r'\b(\d{16})\b', all_text)
|
||||
if nik_match:
|
||||
result['nik'] = nik_match.group(1)
|
||||
print(f" -> NIK found: {result['nik']}")
|
||||
|
||||
# Fallback: Parse line by line for fields not found by zone
|
||||
for i, text in enumerate(texts):
|
||||
text_lower = text.lower()
|
||||
|
||||
# Normalize colons
|
||||
text_normalized = re.sub(self.COLON_PATTERN, ':', text)
|
||||
text_norm_lower = text_normalized.lower()
|
||||
|
||||
# ===== PROVINSI =====
|
||||
if 'provinsi' in text_lower and result['provinsi'] is None:
|
||||
val = self._extract_after_label(text_normalized, 'provinsi')
|
||||
if val:
|
||||
result['provinsi'] = val.upper()
|
||||
elif i + 1 < len(texts) and 'provinsi' not in texts[i+1].lower():
|
||||
# Mungkin value di line berikutnya
|
||||
result['provinsi'] = texts[i+1].strip().upper()
|
||||
|
||||
# ===== KABUPATEN/KOTA =====
|
||||
if ('kabupaten' in text_lower or 'kota' in text_lower or 'jakarta' in text_lower) and result['kabupaten_kota'] is None:
|
||||
if 'provinsi' not in text_lower: # Bukan bagian dari provinsi
|
||||
val = self._extract_after_label(text_normalized, 'kabupaten|kota')
|
||||
if val:
|
||||
result['kabupaten_kota'] = val.upper()
|
||||
else:
|
||||
result['kabupaten_kota'] = text.strip().upper()
|
||||
|
||||
# ===== NAMA =====
|
||||
if 'nama' in text_lower and result['nama'] is None:
|
||||
val = self._extract_after_label(text_normalized, 'nama')
|
||||
if val and len(val) > 2:
|
||||
result['nama'] = val.upper()
|
||||
elif i + 1 < len(texts):
|
||||
# Nama di line berikutnya
|
||||
next_text = texts[i+1].strip()
|
||||
if len(next_text) > 2 and not any(kw in next_text.lower() for kw in ['tempat', 'lahir', 'jenis']):
|
||||
result['nama'] = next_text.upper()
|
||||
|
||||
# ===== TEMPAT/TANGGAL LAHIR =====
|
||||
# Match "Tempat/Tgl Lahir" or "Tempat Lahir" or similar labels
|
||||
if 'tempat' in text_lower or ('lahir' in text_lower and 'berlaku' not in text_lower):
|
||||
if result['tempat_lahir'] is None or result['tanggal_lahir'] is None:
|
||||
# Extract value after label using full-width or standard colon
|
||||
ttl = self._extract_after_label(text_normalized, r'tempat[/\s]*tgl[/\s]*lahir|tempat[/\s]*lahir|lahir')
|
||||
if ttl:
|
||||
self._parse_ttl(ttl, result)
|
||||
elif ':' in text or ':' in text:
|
||||
# Value is after colon but _extract_after_label didn't catch it
|
||||
parts = re.split(r'[::]', text, 1)
|
||||
if len(parts) > 1 and parts[1].strip():
|
||||
self._parse_ttl(parts[1].strip(), result)
|
||||
elif i + 1 < len(texts):
|
||||
# TTL di line berikutnya
|
||||
next_text = texts[i+1].strip()
|
||||
if not any(kw in next_text.lower() for kw in ['jenis', 'kelamin', 'alamat', 'gol']):
|
||||
self._parse_ttl(next_text, result)
|
||||
|
||||
# ===== JENIS KELAMIN =====
|
||||
if any(kw in text_lower for kw in self.MALE_KEYWORDS):
|
||||
if result['jenis_kelamin'] is None:
|
||||
result['jenis_kelamin'] = 'LAKI-LAKI'
|
||||
elif any(kw in text_lower for kw in self.FEMALE_KEYWORDS):
|
||||
if result['jenis_kelamin'] is None:
|
||||
result['jenis_kelamin'] = 'PEREMPUAN'
|
||||
|
||||
# ===== GOLONGAN DARAH =====
|
||||
if 'darah' in text_lower or 'gol.' in text_lower:
|
||||
# Try to find blood type on same line
|
||||
gol_match = re.search(r'(?:gol|darah)[.\s::]*([ABO]{1,2}[+\-]?)', text, re.IGNORECASE)
|
||||
if gol_match and result['gol_darah'] is None:
|
||||
result['gol_darah'] = gol_match.group(1).upper()
|
||||
elif result['gol_darah'] is None and i + 1 < len(texts):
|
||||
# Blood type might be on next line (real KTP pattern)
|
||||
next_text = texts[i+1].strip()
|
||||
if re.match(r'^[ABO]{1,2}[+\-]?$', next_text, re.IGNORECASE):
|
||||
result['gol_darah'] = next_text.upper()
|
||||
# Standalone blood type (e.g., just "O" or "A+" on its own line)
|
||||
if result['gol_darah'] is None:
|
||||
if re.match(r'^[ABO]{1,2}[+\-]?$', text.strip(), re.IGNORECASE) and len(text.strip()) <= 3:
|
||||
result['gol_darah'] = text.strip().upper()
|
||||
|
||||
# ===== ALAMAT =====
|
||||
if 'alamat' in text_lower and result['alamat'] is None:
|
||||
val = self._extract_after_label(text_normalized, 'alamat')
|
||||
if val:
|
||||
result['alamat'] = val.upper()
|
||||
elif i + 1 < len(texts):
|
||||
result['alamat'] = texts[i+1].strip().upper()
|
||||
|
||||
# ===== RT/RW =====
|
||||
rt_rw_match = re.search(r'(\d{3})\s*/\s*(\d{3})', text)
|
||||
if rt_rw_match:
|
||||
result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}"
|
||||
|
||||
# ===== KELURAHAN/DESA =====
|
||||
if ('kel' in text_lower or 'desa' in text_lower) and 'kelamin' not in text_lower:
|
||||
if result['kel_desa'] is None:
|
||||
val = self._extract_after_label(text_normalized, 'kel|desa')
|
||||
if val:
|
||||
result['kel_desa'] = val.upper()
|
||||
elif i + 1 < len(texts):
|
||||
result['kel_desa'] = texts[i+1].strip().upper()
|
||||
|
||||
# ===== KECAMATAN =====
|
||||
if 'kecamatan' in text_lower or ('kec' in text_lower and 'kelamin' not in text_lower):
|
||||
if result['kecamatan'] is None:
|
||||
val = self._extract_after_label(text_normalized, 'kecamatan|kec')
|
||||
if val:
|
||||
result['kecamatan'] = val.upper()
|
||||
elif i + 1 < len(texts):
|
||||
# Value on next line (real KTP pattern)
|
||||
next_text = texts[i+1].strip()
|
||||
if len(next_text) > 2 and not any(kw in next_text.lower() for kw in ['agama', 'status', 'pekerjaan']):
|
||||
result['kecamatan'] = next_text.upper()
|
||||
|
||||
# ===== AGAMA =====
|
||||
if 'agama' in text_lower:
|
||||
val = self._extract_after_label(text_normalized, 'agama')
|
||||
if val and result['agama'] is None:
|
||||
result['agama'] = val.upper()
|
||||
elif result['agama'] is None and i + 1 < len(texts):
|
||||
# Value on next line (real KTP pattern)
|
||||
next_text = texts[i+1].strip().upper()
|
||||
if next_text in ['ISLAM', 'KRISTEN', 'KATOLIK', 'HINDU', 'BUDHA', 'BUDDHA', 'KONGHUCU']:
|
||||
result['agama'] = next_text
|
||||
else:
|
||||
# Check if line contains only agama name
|
||||
for agama in self.AGAMA_LIST:
|
||||
if agama in text_lower and len(text) < 20:
|
||||
if result['agama'] is None:
|
||||
result['agama'] = text.strip().upper()
|
||||
break
|
||||
|
||||
# ===== STATUS PERKAWINAN =====
|
||||
if 'kawin' in text_lower:
|
||||
if result['status_perkawinan'] is None:
|
||||
val = self._extract_after_label(text_normalized, 'status.*kawin|perkawinan')
|
||||
if val:
|
||||
result['status_perkawinan'] = val.upper()
|
||||
elif 'belum' in text_lower:
|
||||
result['status_perkawinan'] = 'BELUM KAWIN'
|
||||
elif 'kawin' in text_lower and 'cerai' not in text_lower:
|
||||
result['status_perkawinan'] = 'KAWIN'
|
||||
elif 'cerai hidup' in text_lower:
|
||||
result['status_perkawinan'] = 'CERAI HIDUP'
|
||||
elif 'cerai mati' in text_lower:
|
||||
result['status_perkawinan'] = 'CERAI MATI'
|
||||
|
||||
# ===== PEKERJAAN =====
|
||||
if 'pekerjaan' in text_lower:
|
||||
val = self._extract_after_label(text_normalized, 'pekerjaan')
|
||||
if val and result['pekerjaan'] is None:
|
||||
result['pekerjaan'] = val.upper()
|
||||
elif result['pekerjaan'] is None and i + 1 < len(texts):
|
||||
# Value on next line (real KTP pattern)
|
||||
next_text = texts[i+1].strip()
|
||||
if len(next_text) > 2 and 'kewarganegaraan' not in next_text.lower():
|
||||
result['pekerjaan'] = next_text.upper()
|
||||
else:
|
||||
# Check if line contains pekerjaan keyword
|
||||
for pekerjaan in self.PEKERJAAN_LIST:
|
||||
if pekerjaan in text_lower and len(text) < 30:
|
||||
if result['pekerjaan'] is None:
|
||||
result['pekerjaan'] = text.strip().upper()
|
||||
break
|
||||
|
||||
# ===== KEWARGANEGARAAN =====
|
||||
if 'wni' in text_lower:
|
||||
result['kewarganegaraan'] = 'WNI'
|
||||
elif 'wna' in text_lower:
|
||||
result['kewarganegaraan'] = 'WNA'
|
||||
elif 'warga' in text_lower and result['kewarganegaraan'] is None:
|
||||
val = self._extract_after_label(text_normalized, 'kewarganegaraan|warga')
|
||||
if val:
|
||||
result['kewarganegaraan'] = val.upper()
|
||||
|
||||
# ===== BERLAKU HINGGA =====
|
||||
if 'berlaku' in text_lower or 'seumur' in text_lower:
|
||||
if result['berlaku_hingga'] is None:
|
||||
if 'seumur' in text_lower or 'hidup' in text_lower:
|
||||
result['berlaku_hingga'] = 'SEUMUR HIDUP'
|
||||
else:
|
||||
val = self._extract_after_label(text_normalized, 'berlaku')
|
||||
if val:
|
||||
result['berlaku_hingga'] = val.upper()
|
||||
|
||||
# ===== TANGGAL PENERBITAN (biasanya format DD-MM-YYYY di akhir) =====
|
||||
# Look for date that is NOT tanggal lahir (different date)
|
||||
if result['tanggal_penerbitan'] is None:
|
||||
# Match date format at end of text or standalone date
|
||||
date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})$', text.strip())
|
||||
if date_match:
|
||||
found_date = date_match.group(1)
|
||||
# Make sure it's not the same as tanggal_lahir
|
||||
if result['tanggal_lahir'] != found_date:
|
||||
# Likely penerbitan if after berlaku_hingga was found
|
||||
if result['berlaku_hingga'] or i > len(texts) * 0.7:
|
||||
result['tanggal_penerbitan'] = found_date
|
||||
|
||||
# Post-processing
|
||||
result = self._post_process(result)
|
||||
|
||||
return result
|
||||
|
||||
def _extract_after_label(self, text: str, label_pattern: str) -> Optional[str]:
|
||||
"""Ekstrak nilai setelah label (supports various separators)"""
|
||||
patterns = [
|
||||
rf'(?:{label_pattern})\s*:\s*(.+)', # label: value
|
||||
rf'(?:{label_pattern})\s+([A-Z0-9].+)', # label VALUE (uppercase start)
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
value = match.group(1).strip()
|
||||
# Remove trailing colon or label fragment
|
||||
value = re.sub(r'^[:\s]+', '', value)
|
||||
value = re.sub(r'\s*:\s*$', '', value)
|
||||
if value and len(value) > 1:
|
||||
return value
|
||||
|
||||
return None
|
||||
|
||||
def _parse_ttl(self, ttl_text: str, result: Dict):
|
||||
"""Parse tempat/tanggal lahir dari text"""
|
||||
ttl_text = ttl_text.strip()
|
||||
|
||||
# Normalize dates where OCR missed dashes:
|
||||
# "05 08 1978" -> "05-08-1978"
|
||||
# "05 08-1978" -> "05-08-1978"
|
||||
# "05-08 1978" -> "05-08-1978"
|
||||
ttl_text = re.sub(r'(\d{2})[\s]+(\d{2})[\s]+(\d{4})', r'\1-\2-\3', ttl_text)
|
||||
ttl_text = re.sub(r'(\d{2})[\s]+(\d{2})[-/](\d{4})', r'\1-\2-\3', ttl_text)
|
||||
ttl_text = re.sub(r'(\d{2})[-/](\d{2})[\s]+(\d{4})', r'\1-\2-\3', ttl_text)
|
||||
|
||||
# Handle 8-digit date without separator: "05081978" -> "05-08-1978"
|
||||
date_8digit = re.search(r'(\d{8})', ttl_text)
|
||||
if date_8digit:
|
||||
d = date_8digit.group(1)
|
||||
formatted = f"{d[:2]}-{d[2:4]}-{d[4:]}"
|
||||
ttl_text = ttl_text.replace(d, formatted)
|
||||
|
||||
# Handle merged city+date like "JAKARTA05-08-1978" - add space before digits
|
||||
ttl_text = re.sub(r'([A-Z])(\d{2}[-/])', r'\1 \2', ttl_text, flags=re.IGNORECASE)
|
||||
|
||||
# Format: "TEMPAT, DD-MM-YYYY" atau "TEMPAT DD-MM-YYYY"
|
||||
date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})', ttl_text)
|
||||
if date_match:
|
||||
result['tanggal_lahir'] = date_match.group(1)
|
||||
# Tempat adalah bagian sebelum tanggal
|
||||
place = ttl_text[:date_match.start()].strip(' ,:-/')
|
||||
# Clean up label remnants
|
||||
place = re.sub(r'^(tempat|tgl|lahir|:|:)[/\s::]*', '', place, flags=re.IGNORECASE).strip()
|
||||
if place and len(place) > 2:
|
||||
result['tempat_lahir'] = place.upper()
|
||||
else:
|
||||
# Coba split by comma
|
||||
parts = ttl_text.split(',')
|
||||
if len(parts) >= 2:
|
||||
result['tempat_lahir'] = parts[0].strip().upper()
|
||||
result['tanggal_lahir'] = parts[1].strip()
|
||||
elif len(parts) == 1 and len(ttl_text) > 2:
|
||||
result['tempat_lahir'] = ttl_text.upper()
|
||||
|
||||
def _post_process(self, result: Dict) -> Dict:
|
||||
"""Post-processing hasil ekstraksi"""
|
||||
# Validasi NIK (harus 16 digit)
|
||||
if result['nik'] and not re.match(r'^\d{16}$', result['nik']):
|
||||
cleaned = re.sub(r'\D', '', result['nik'])
|
||||
if len(cleaned) == 16:
|
||||
result['nik'] = cleaned
|
||||
else:
|
||||
result['nik'] = None
|
||||
|
||||
# Clean all string values - remove leading colons and extra whitespace
|
||||
for field in result:
|
||||
if result[field] and isinstance(result[field], str):
|
||||
val = result[field]
|
||||
# Remove leading colons (standard and full-width)
|
||||
val = re.sub(r'^[\s::]+', '', val)
|
||||
# Remove trailing colons
|
||||
val = re.sub(r'[\s::]+$', '', val)
|
||||
# Remove double spaces
|
||||
val = re.sub(r'\s+', ' ', val)
|
||||
result[field] = val.strip()
|
||||
|
||||
# Bersihkan label dari values
|
||||
for field in ['nama', 'alamat', 'tempat_lahir', 'kel_desa', 'kecamatan', 'agama', 'pekerjaan']:
|
||||
if result[field]:
|
||||
# Remove common labels yang ter-capture
|
||||
result[field] = re.sub(
|
||||
r'^(NAMA|ALAMAT|TEMPAT|LAHIR|TGL|KEL|DESA|KELURAHAN|KECAMATAN|KEC|AGAMA|PEKERJAAN|STATUS)[\s::]*',
|
||||
'', result[field], flags=re.IGNORECASE
|
||||
).strip()
|
||||
|
||||
# Fix status perkawinan yang masih mengandung label
|
||||
if result['status_perkawinan']:
|
||||
sp = result['status_perkawinan']
|
||||
sp = re.sub(r'^(STATUS|PERKAWINAN)[\s::]*', '', sp, flags=re.IGNORECASE).strip()
|
||||
result['status_perkawinan'] = sp
|
||||
|
||||
# Fix berlaku hingga
|
||||
if result['berlaku_hingga']:
|
||||
bh = result['berlaku_hingga']
|
||||
bh = re.sub(r'^(BERLAKU|HINGGA)[\s::]*', '', bh, flags=re.IGNORECASE).strip()
|
||||
if bh.upper() == 'HIDUP' or 'SEUMUR' in bh.upper():
|
||||
result['berlaku_hingga'] = 'SEUMUR HIDUP'
|
||||
else:
|
||||
result['berlaku_hingga'] = bh
|
||||
|
||||
# Fix merged kabupaten/kota names (e.g., JAKARTASELATAN -> JAKARTA SELATAN)
|
||||
if result['kabupaten_kota']:
|
||||
kk = result['kabupaten_kota']
|
||||
# Add space before directional words
|
||||
kk = re.sub(r'(JAKARTA|BANDUNG|SURABAYA|SEMARANG|MEDAN|BEKASI|TANGERANG|DEPOK|BOGOR)(SELATAN|UTARA|BARAT|TIMUR|PUSAT|TENGAH)',
|
||||
r'\1 \2', kk, flags=re.IGNORECASE)
|
||||
# Common merged patterns
|
||||
kk = re.sub(r'(KOTA|KABUPATEN)([A-Z])', r'\1 \2', kk, flags=re.IGNORECASE)
|
||||
result['kabupaten_kota'] = kk.upper()
|
||||
|
||||
# Fix merged provinsi names
|
||||
if result['provinsi']:
|
||||
prov = result['provinsi']
|
||||
prov = re.sub(r'(DKI|DI)(JAKARTA|YOGYAKARTA)', r'\1 \2', prov, flags=re.IGNORECASE)
|
||||
prov = re.sub(r'(JAWA|KALIMANTAN|SULAWESI|SUMATERA|NUSA TENGGARA)(BARAT|TIMUR|TENGAH|SELATAN|UTARA)',
|
||||
r'\1 \2', prov, flags=re.IGNORECASE)
|
||||
result['provinsi'] = prov.upper()
|
||||
|
||||
# Fix merged alamat/address (e.g., JLKECAPIV -> JL KECAPI V)
|
||||
if result['alamat']:
|
||||
alamat = result['alamat']
|
||||
# Add space after common street prefixes
|
||||
alamat = re.sub(r'^(JL|JLN|JALAN|GG|GANG|NO|BLOK)([A-Z])', r'\1 \2', alamat, flags=re.IGNORECASE)
|
||||
# Add space before Roman numerals at the end (I, II, III, IV, V, VI, VII, VIII, IX, X)
|
||||
alamat = re.sub(r'([A-Z])([IVX]+)$', r'\1 \2', alamat, flags=re.IGNORECASE)
|
||||
# Add space before single digits/numbers at end
|
||||
alamat = re.sub(r'([A-Z])(\d+)$', r'\1 \2', alamat, flags=re.IGNORECASE)
|
||||
# Fix common patterns: "NO123" -> "NO 123", "BLOKA" -> "BLOK A"
|
||||
alamat = re.sub(r'\b(NO|BLOK)(\d+|[A-Z])\b', r'\1 \2', alamat, flags=re.IGNORECASE)
|
||||
result['alamat'] = alamat.upper()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test
|
||||
sample_ocr = [
|
||||
{'text': 'PROVINSI JAWA BARAT'},
|
||||
{'text': 'KABUPATEN BANDUNG'},
|
||||
{'text': 'NIK : 3204012345678901'},
|
||||
{'text': 'Nama : JOHN DOE'},
|
||||
{'text': 'Tempat/Tgl Lahir : BANDUNG, 01-01-1990'},
|
||||
{'text': 'Jenis Kelamin : LAKI-LAKI'},
|
||||
{'text': 'Alamat : JL. MERDEKA NO. 123'},
|
||||
{'text': 'RT/RW : 001/002'},
|
||||
{'text': 'Kel/Desa : SUKAMAJU'},
|
||||
{'text': 'Kecamatan : SUKASARI'},
|
||||
{'text': 'Agama : ISLAM'},
|
||||
{'text': 'Status Perkawinan : BELUM KAWIN'},
|
||||
{'text': 'Pekerjaan : KARYAWAN SWASTA'},
|
||||
{'text': 'Kewarganegaraan : WNI'},
|
||||
{'text': 'Berlaku Hingga : SEUMUR HIDUP'},
|
||||
]
|
||||
|
||||
extractor = KTPExtractor()
|
||||
result = extractor.extract(sample_ocr)
|
||||
|
||||
for key, value in result.items():
|
||||
print(f"{key}: {value}")
|
||||
153
ocr_engine.py
Normal file
153
ocr_engine.py
Normal file
@@ -0,0 +1,153 @@
|
||||
"""
|
||||
OCR Engine menggunakan PaddleOCR 3.x
|
||||
Untuk membaca teks dari gambar dokumen Indonesia (KTP, KK)
|
||||
"""
|
||||
|
||||
from paddleocr import PaddleOCR
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class OCREngine:
|
||||
def __init__(self):
|
||||
"""Inisialisasi PaddleOCR 3.x dengan konfigurasi untuk dokumen Indonesia"""
|
||||
self.ocr = PaddleOCR(
|
||||
use_doc_orientation_classify=True, # Deteksi rotasi (0°/90°/180°/270°)
|
||||
use_doc_unwarping=True, # Koreksi perspektif (trapezium → persegi)
|
||||
use_textline_orientation=True, # Orientasi per baris teks
|
||||
)
|
||||
|
||||
def preprocess_image(self, image_path: str) -> np.ndarray:
|
||||
"""
|
||||
Preprocessing gambar untuk hasil OCR lebih baik
|
||||
- Resize jika terlalu besar
|
||||
- Enhance contrast
|
||||
"""
|
||||
img = cv2.imread(image_path)
|
||||
if img is None:
|
||||
raise ValueError(f"Tidak dapat membaca gambar: {image_path}")
|
||||
|
||||
# Resize jika terlalu besar (max 2000px)
|
||||
max_dim = 2000
|
||||
height, width = img.shape[:2]
|
||||
if max(height, width) > max_dim:
|
||||
scale = max_dim / max(height, width)
|
||||
img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
|
||||
|
||||
# Convert ke grayscale untuk preprocessing
|
||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Enhance contrast menggunakan CLAHE
|
||||
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(gray)
|
||||
|
||||
# Convert kembali ke BGR untuk PaddleOCR
|
||||
enhanced_bgr = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)
|
||||
|
||||
return enhanced_bgr
|
||||
|
||||
def extract_text(self, image_path: str, preprocess: bool = False) -> list:
|
||||
"""
|
||||
Ekstraksi teks dari gambar menggunakan PaddleOCR 3.x API
|
||||
|
||||
Args:
|
||||
image_path: Path ke file gambar
|
||||
preprocess: Apakah melakukan preprocessing
|
||||
|
||||
Returns:
|
||||
List of dict dengan keys: 'text', 'confidence', 'bbox'
|
||||
"""
|
||||
try:
|
||||
# Jalankan OCR dengan API baru (predict)
|
||||
result = self.ocr.predict(input=image_path)
|
||||
|
||||
if not result:
|
||||
return []
|
||||
|
||||
extracted = []
|
||||
|
||||
# Parse hasil dari PaddleOCR 3.x
|
||||
for res in result:
|
||||
# Akses data dari result object
|
||||
if hasattr(res, 'rec_texts') and hasattr(res, 'rec_scores') and hasattr(res, 'dt_polys'):
|
||||
texts = res.rec_texts if res.rec_texts else []
|
||||
scores = res.rec_scores if res.rec_scores else []
|
||||
polys = res.dt_polys if res.dt_polys else []
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
confidence = scores[i] if i < len(scores) else 0.0
|
||||
bbox = polys[i].tolist() if i < len(polys) and hasattr(polys[i], 'tolist') else []
|
||||
|
||||
# Calculate center for sorting
|
||||
if bbox and len(bbox) >= 4:
|
||||
y_center = (bbox[0][1] + bbox[2][1]) / 2
|
||||
x_center = (bbox[0][0] + bbox[2][0]) / 2
|
||||
else:
|
||||
y_center = 0
|
||||
x_center = 0
|
||||
|
||||
extracted.append({
|
||||
'text': text,
|
||||
'confidence': float(confidence),
|
||||
'bbox': bbox,
|
||||
'y_center': y_center,
|
||||
'x_center': x_center,
|
||||
})
|
||||
# Fallback: try dict-like access
|
||||
elif hasattr(res, '__getitem__'):
|
||||
try:
|
||||
texts = res.get('rec_texts', res.get('texts', []))
|
||||
scores = res.get('rec_scores', res.get('scores', []))
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
confidence = scores[i] if i < len(scores) else 0.0
|
||||
extracted.append({
|
||||
'text': text,
|
||||
'confidence': float(confidence),
|
||||
'bbox': [],
|
||||
'y_center': i * 10, # Simple ordering fallback
|
||||
'x_center': 0,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Sort berdasarkan posisi Y (atas ke bawah)
|
||||
if extracted:
|
||||
extracted.sort(key=lambda x: (x['y_center'], x['x_center']))
|
||||
|
||||
return extracted
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error OCR: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return []
|
||||
|
||||
def get_raw_text(self, image_path: str) -> str:
|
||||
"""
|
||||
Mendapatkan semua teks dari gambar sebagai string
|
||||
"""
|
||||
results = self.extract_text(image_path)
|
||||
return '\n'.join([r['text'] for r in results])
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_ocr_engine = None
|
||||
|
||||
def get_ocr_engine() -> OCREngine:
|
||||
"""Get singleton OCR engine instance"""
|
||||
global _ocr_engine
|
||||
if _ocr_engine is None:
|
||||
_ocr_engine = OCREngine()
|
||||
return _ocr_engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test OCR
|
||||
import sys
|
||||
if len(sys.argv) > 1:
|
||||
engine = get_ocr_engine()
|
||||
results = engine.extract_text(sys.argv[1])
|
||||
for r in results:
|
||||
print(f"[{r['confidence']:.2f}] {r['text']}")
|
||||
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
paddlepaddle
|
||||
paddleocr
|
||||
flask
|
||||
pillow
|
||||
opencv-python
|
||||
538
static/style.css
Normal file
538
static/style.css
Normal file
@@ -0,0 +1,538 @@
|
||||
/* OCR KTP/KK - Modern Dark Theme */
|
||||
|
||||
:root {
|
||||
--bg-primary: #0f0f1a;
|
||||
--bg-secondary: #1a1a2e;
|
||||
--bg-tertiary: #252540;
|
||||
--accent-primary: #6366f1;
|
||||
--accent-secondary: #818cf8;
|
||||
--accent-gradient: linear-gradient(135deg, #6366f1 0%, #a855f7 100%);
|
||||
--text-primary: #f1f5f9;
|
||||
--text-secondary: #94a3b8;
|
||||
--text-muted: #64748b;
|
||||
--success: #22c55e;
|
||||
--error: #ef4444;
|
||||
--warning: #f59e0b;
|
||||
--border: #334155;
|
||||
--shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.3);
|
||||
--shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.4);
|
||||
--radius: 12px;
|
||||
--radius-lg: 16px;
|
||||
}
|
||||
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: 'Segoe UI', system-ui, -apple-system, sans-serif;
|
||||
background: var(--bg-primary);
|
||||
color: var(--text-primary);
|
||||
min-height: 100vh;
|
||||
line-height: 1.6;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 2rem 1rem;
|
||||
}
|
||||
|
||||
/* Header */
|
||||
header {
|
||||
text-align: center;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
header h1 {
|
||||
font-size: 2.5rem;
|
||||
font-weight: 700;
|
||||
background: var(--accent-gradient);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.subtitle {
|
||||
color: var(--text-secondary);
|
||||
font-size: 1.1rem;
|
||||
}
|
||||
|
||||
/* Upload Section */
|
||||
.upload-section {
|
||||
background: var(--bg-secondary);
|
||||
border-radius: var(--radius-lg);
|
||||
padding: 2rem;
|
||||
box-shadow: var(--shadow-lg);
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
/* Document Type Selector */
|
||||
.doc-type-selector {
|
||||
display: flex;
|
||||
gap: 1rem;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.doc-btn {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 0.5rem;
|
||||
padding: 1rem;
|
||||
background: var(--bg-tertiary);
|
||||
border: 2px solid transparent;
|
||||
border-radius: var(--radius);
|
||||
color: var(--text-secondary);
|
||||
font-size: 1rem;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: all 0.3s ease;
|
||||
}
|
||||
|
||||
.doc-btn:hover {
|
||||
background: var(--bg-primary);
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.doc-btn.active {
|
||||
background: var(--accent-gradient);
|
||||
color: white;
|
||||
border-color: var(--accent-secondary);
|
||||
}
|
||||
|
||||
.doc-btn .icon {
|
||||
font-size: 1.5rem;
|
||||
}
|
||||
|
||||
/* Dropzone */
|
||||
.dropzone {
|
||||
border: 2px dashed var(--border);
|
||||
border-radius: var(--radius);
|
||||
padding: 3rem 2rem;
|
||||
text-align: center;
|
||||
cursor: pointer;
|
||||
transition: all 0.3s ease;
|
||||
background: var(--bg-tertiary);
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.dropzone:hover,
|
||||
.dropzone.dragover {
|
||||
border-color: var(--accent-primary);
|
||||
background: rgba(99, 102, 241, 0.1);
|
||||
}
|
||||
|
||||
.dropzone-content {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.upload-icon {
|
||||
font-size: 4rem;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.dropzone p {
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.dropzone .hint {
|
||||
color: var(--text-muted);
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.file-btn {
|
||||
display: inline-block;
|
||||
padding: 0.75rem 1.5rem;
|
||||
background: var(--accent-gradient);
|
||||
color: white;
|
||||
border-radius: var(--radius);
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
margin: 0.5rem 0;
|
||||
transition: transform 0.2s ease;
|
||||
}
|
||||
|
||||
.file-btn:hover {
|
||||
transform: scale(1.05);
|
||||
}
|
||||
|
||||
.file-types {
|
||||
font-size: 0.75rem;
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.preview-image {
|
||||
max-width: 100%;
|
||||
max-height: 400px;
|
||||
border-radius: var(--radius);
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/* Process Button */
|
||||
.process-btn {
|
||||
width: 100%;
|
||||
padding: 1rem;
|
||||
margin-top: 1.5rem;
|
||||
background: var(--accent-gradient);
|
||||
border: none;
|
||||
border-radius: var(--radius);
|
||||
color: white;
|
||||
font-size: 1.1rem;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: all 0.3s ease;
|
||||
box-shadow: var(--shadow);
|
||||
}
|
||||
|
||||
.process-btn:hover:not(:disabled) {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: var(--shadow-lg);
|
||||
}
|
||||
|
||||
.process-btn:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
/* Results Section */
|
||||
.results-section {
|
||||
background: var(--bg-secondary);
|
||||
border-radius: var(--radius-lg);
|
||||
padding: 2rem;
|
||||
box-shadow: var(--shadow-lg);
|
||||
animation: slideUp 0.3s ease;
|
||||
}
|
||||
|
||||
@keyframes slideUp {
|
||||
from {
|
||||
opacity: 0;
|
||||
transform: translateY(20px);
|
||||
}
|
||||
|
||||
to {
|
||||
opacity: 1;
|
||||
transform: translateY(0);
|
||||
}
|
||||
}
|
||||
|
||||
.results-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 1.5rem;
|
||||
flex-wrap: wrap;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.results-header h2 {
|
||||
font-size: 1.5rem;
|
||||
}
|
||||
|
||||
.results-actions {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.action-btn {
|
||||
padding: 0.5rem 1rem;
|
||||
background: var(--bg-tertiary);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
color: var(--text-primary);
|
||||
font-size: 0.875rem;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.action-btn:hover {
|
||||
background: var(--accent-primary);
|
||||
border-color: var(--accent-primary);
|
||||
}
|
||||
|
||||
.action-btn.secondary {
|
||||
background: transparent;
|
||||
}
|
||||
|
||||
/* Results Table */
|
||||
.results-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
.results-table th,
|
||||
.results-table td {
|
||||
padding: 0.875rem 1rem;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.results-table th {
|
||||
background: var(--bg-tertiary);
|
||||
color: var(--text-secondary);
|
||||
font-weight: 600;
|
||||
font-size: 0.875rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
}
|
||||
|
||||
.results-table th:first-child {
|
||||
border-radius: var(--radius) 0 0 0;
|
||||
}
|
||||
|
||||
.results-table th:last-child {
|
||||
border-radius: 0 var(--radius) 0 0;
|
||||
}
|
||||
|
||||
.field-label {
|
||||
color: var(--text-secondary);
|
||||
font-weight: 500;
|
||||
width: 40%;
|
||||
}
|
||||
|
||||
.field-value {
|
||||
color: var(--text-primary);
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.results-table tr:hover {
|
||||
background: rgba(99, 102, 241, 0.05);
|
||||
}
|
||||
|
||||
/* Editable Fields */
|
||||
.editable-field {
|
||||
width: 100%;
|
||||
padding: 0.5rem 0.75rem;
|
||||
background: var(--bg-tertiary);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
color: var(--text-primary);
|
||||
font-size: 0.95rem;
|
||||
font-weight: 600;
|
||||
font-family: inherit;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.editable-field:focus {
|
||||
outline: none;
|
||||
border-color: var(--accent-primary);
|
||||
background: var(--bg-secondary);
|
||||
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2);
|
||||
}
|
||||
|
||||
.editable-field::placeholder {
|
||||
color: var(--text-muted);
|
||||
font-weight: 400;
|
||||
}
|
||||
|
||||
/* Region Dropdown Styles */
|
||||
.region-field-wrapper {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.region-field-wrapper input,
|
||||
.region-field-wrapper select {
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.region-dropdown {
|
||||
width: 100%;
|
||||
padding: 0.5rem 0.75rem;
|
||||
background: var(--bg-tertiary);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
color: var(--text-primary);
|
||||
font-size: 0.95rem;
|
||||
font-family: inherit;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.region-dropdown:focus {
|
||||
outline: none;
|
||||
border-color: var(--accent-primary);
|
||||
}
|
||||
|
||||
.dropdown-toggle {
|
||||
padding: 0.5rem 0.75rem;
|
||||
background: var(--bg-tertiary);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
color: var(--text-secondary);
|
||||
cursor: pointer;
|
||||
transition: all 0.2s ease;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.dropdown-toggle:hover {
|
||||
background: var(--accent-primary);
|
||||
color: white;
|
||||
}
|
||||
|
||||
.dropdown-toggle.confirmed {
|
||||
background: var(--success);
|
||||
color: white;
|
||||
border-color: var(--success);
|
||||
}
|
||||
|
||||
/* Validation Indicators */
|
||||
.validation-status {
|
||||
margin-left: 0.5rem;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.validation-status.valid-field {
|
||||
color: var(--success);
|
||||
}
|
||||
|
||||
.validation-status.invalid-field {
|
||||
color: var(--warning);
|
||||
}
|
||||
|
||||
.editable-field.valid-field {
|
||||
border-color: var(--success);
|
||||
}
|
||||
|
||||
.editable-field.invalid-field {
|
||||
border-color: var(--warning);
|
||||
}
|
||||
|
||||
.suggestion-text {
|
||||
font-size: 0.75rem;
|
||||
color: var(--text-muted);
|
||||
margin-top: 0.25rem;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
/* Raw Text Section */
|
||||
.raw-text-section {
|
||||
margin-top: 1.5rem;
|
||||
padding-top: 1.5rem;
|
||||
border-top: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.raw-text-section h3 {
|
||||
font-size: 1rem;
|
||||
color: var(--text-secondary);
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.raw-text-section pre {
|
||||
background: var(--bg-primary);
|
||||
padding: 1rem;
|
||||
border-radius: var(--radius);
|
||||
font-family: 'Consolas', monospace;
|
||||
font-size: 0.875rem;
|
||||
color: var(--text-secondary);
|
||||
white-space: pre-wrap;
|
||||
word-wrap: break-word;
|
||||
max-height: 300px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
/* Error Section */
|
||||
.error-section {
|
||||
margin-top: 1rem;
|
||||
}
|
||||
|
||||
.error-content {
|
||||
background: rgba(239, 68, 68, 0.1);
|
||||
border: 1px solid var(--error);
|
||||
border-radius: var(--radius);
|
||||
padding: 1rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
.error-icon {
|
||||
font-size: 1.5rem;
|
||||
}
|
||||
|
||||
.error-content p {
|
||||
color: var(--error);
|
||||
}
|
||||
|
||||
/* Footer */
|
||||
footer {
|
||||
text-align: center;
|
||||
margin-top: 2rem;
|
||||
padding-top: 1rem;
|
||||
border-top: 1px solid var(--border);
|
||||
}
|
||||
|
||||
footer p {
|
||||
color: var(--text-muted);
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
footer a {
|
||||
color: var(--accent-secondary);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
footer a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
/* Responsive */
|
||||
@media (max-width: 600px) {
|
||||
.container {
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
header h1 {
|
||||
font-size: 2rem;
|
||||
}
|
||||
|
||||
.upload-section,
|
||||
.results-section {
|
||||
padding: 1.5rem;
|
||||
}
|
||||
|
||||
.doc-type-selector {
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.results-header {
|
||||
flex-direction: column;
|
||||
align-items: flex-start;
|
||||
}
|
||||
|
||||
.results-actions {
|
||||
width: 100%;
|
||||
justify-content: flex-start;
|
||||
}
|
||||
|
||||
.field-label {
|
||||
width: 45%;
|
||||
}
|
||||
}
|
||||
|
||||
/* Scrollbar */
|
||||
::-webkit-scrollbar {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-track {
|
||||
background: var(--bg-tertiary);
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb {
|
||||
background: var(--border);
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb:hover {
|
||||
background: var(--text-muted);
|
||||
}
|
||||
570
templates/index.html
Normal file
570
templates/index.html
Normal file
@@ -0,0 +1,570 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="id">
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>OCR KTP/KK - Pembaca Dokumen Indonesia</title>
|
||||
<link rel="stylesheet" href="/static/style.css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="container">
|
||||
<header>
|
||||
<h1>📄 OCR KTP/KK</h1>
|
||||
<p class="subtitle">Pembaca Dokumen Indonesia Offline</p>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<!-- Upload Section -->
|
||||
<section class="upload-section">
|
||||
<div class="doc-type-selector">
|
||||
<button class="doc-btn active" data-type="ktp">
|
||||
<span class="icon">🪪</span>
|
||||
KTP
|
||||
</button>
|
||||
<button class="doc-btn" data-type="kk">
|
||||
<span class="icon">👨👩👧👦</span>
|
||||
Kartu Keluarga
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="dropzone" id="dropzone">
|
||||
<div class="dropzone-content">
|
||||
<div class="upload-icon">📷</div>
|
||||
<p>Drag & drop gambar di sini</p>
|
||||
<p class="hint">atau</p>
|
||||
<label class="file-btn">
|
||||
Pilih File
|
||||
<input type="file" id="fileInput" accept="image/*" hidden>
|
||||
</label>
|
||||
<p class="file-types">PNG, JPG, JPEG, BMP, WEBP (max 16MB)</p>
|
||||
</div>
|
||||
<img id="preview" class="preview-image" style="display: none;">
|
||||
</div>
|
||||
|
||||
<button id="processBtn" class="process-btn" disabled>
|
||||
<span class="btn-text">🔍 Proses OCR</span>
|
||||
<span class="btn-loading" style="display: none;">⏳ Memproses...</span>
|
||||
</button>
|
||||
</section>
|
||||
|
||||
<!-- Results Section -->
|
||||
<section class="results-section" id="resultsSection" style="display: none;">
|
||||
<div class="results-header">
|
||||
<h2>📋 Hasil Ekstraksi</h2>
|
||||
<div class="results-actions">
|
||||
<button class="action-btn" id="copyBtn" title="Copy JSON">📋 Copy</button>
|
||||
<button class="action-btn" id="exportBtn" title="Export JSON">💾 Export</button>
|
||||
<button class="action-btn secondary" id="toggleRaw">📝 Raw Text</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="results-content">
|
||||
<table class="results-table" id="resultsTable">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Field</th>
|
||||
<th>Nilai</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="resultsBody">
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<div class="raw-text-section" id="rawTextSection" style="display: none;">
|
||||
<h3>Raw OCR Text</h3>
|
||||
<pre id="rawText"></pre>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Error Section -->
|
||||
<section class="error-section" id="errorSection" style="display: none;">
|
||||
<div class="error-content">
|
||||
<span class="error-icon">⚠️</span>
|
||||
<p id="errorMessage"></p>
|
||||
</div>
|
||||
</section>
|
||||
</main>
|
||||
|
||||
<footer>
|
||||
<p>OCR menggunakan <a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">PaddleOCR</a> • Data
|
||||
diproses secara lokal</p>
|
||||
</footer>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// State
|
||||
let selectedFile = null;
|
||||
let docType = 'ktp';
|
||||
let extractedData = null;
|
||||
|
||||
// Elements
|
||||
const dropzone = document.getElementById('dropzone');
|
||||
const fileInput = document.getElementById('fileInput');
|
||||
const preview = document.getElementById('preview');
|
||||
const processBtn = document.getElementById('processBtn');
|
||||
const resultsSection = document.getElementById('resultsSection');
|
||||
const resultsBody = document.getElementById('resultsBody');
|
||||
const rawText = document.getElementById('rawText');
|
||||
const rawTextSection = document.getElementById('rawTextSection');
|
||||
const errorSection = document.getElementById('errorSection');
|
||||
const errorMessage = document.getElementById('errorMessage');
|
||||
const docBtns = document.querySelectorAll('.doc-btn');
|
||||
|
||||
// Field labels untuk display
|
||||
const fieldLabels = {
|
||||
// KTP
|
||||
'nik': 'NIK',
|
||||
'nama': 'Nama',
|
||||
'tempat_lahir': 'Tempat Lahir',
|
||||
'tanggal_lahir': 'Tanggal Lahir',
|
||||
'jenis_kelamin': 'Jenis Kelamin',
|
||||
'gol_darah': 'Gol. Darah',
|
||||
'alamat': 'Alamat',
|
||||
'rt_rw': 'RT/RW',
|
||||
'kel_desa': 'Kel/Desa',
|
||||
'kecamatan': 'Kecamatan',
|
||||
'agama': 'Agama',
|
||||
'status_perkawinan': 'Status Perkawinan',
|
||||
'pekerjaan': 'Pekerjaan',
|
||||
'kewarganegaraan': 'Kewarganegaraan',
|
||||
'berlaku_hingga': 'Berlaku Hingga',
|
||||
'provinsi': 'Provinsi',
|
||||
'kabupaten_kota': 'Kabupaten/Kota',
|
||||
'tanggal_penerbitan': 'Tanggal Penerbitan',
|
||||
// KK
|
||||
'no_kk': 'No. KK',
|
||||
'nama_kepala_keluarga': 'Kepala Keluarga',
|
||||
'kode_pos': 'Kode Pos',
|
||||
'anggota_keluarga': 'Jumlah Anggota'
|
||||
};
|
||||
|
||||
// Doc type selection
|
||||
docBtns.forEach(btn => {
|
||||
btn.addEventListener('click', () => {
|
||||
docBtns.forEach(b => b.classList.remove('active'));
|
||||
btn.classList.add('active');
|
||||
docType = btn.dataset.type;
|
||||
});
|
||||
});
|
||||
|
||||
// Drag & drop
|
||||
dropzone.addEventListener('dragover', (e) => {
|
||||
e.preventDefault();
|
||||
dropzone.classList.add('dragover');
|
||||
});
|
||||
|
||||
dropzone.addEventListener('dragleave', () => {
|
||||
dropzone.classList.remove('dragover');
|
||||
});
|
||||
|
||||
dropzone.addEventListener('drop', (e) => {
|
||||
e.preventDefault();
|
||||
dropzone.classList.remove('dragover');
|
||||
const files = e.dataTransfer.files;
|
||||
if (files.length > 0) {
|
||||
handleFile(files[0]);
|
||||
}
|
||||
});
|
||||
|
||||
// File input
|
||||
fileInput.addEventListener('change', (e) => {
|
||||
if (e.target.files.length > 0) {
|
||||
handleFile(e.target.files[0]);
|
||||
}
|
||||
});
|
||||
|
||||
// Click on dropzone
|
||||
dropzone.addEventListener('click', (e) => {
|
||||
if (e.target === dropzone || e.target.closest('.dropzone-content')) {
|
||||
fileInput.click();
|
||||
}
|
||||
});
|
||||
|
||||
function handleFile(file) {
|
||||
if (!file.type.startsWith('image/')) {
|
||||
showError('File harus berupa gambar');
|
||||
return;
|
||||
}
|
||||
|
||||
if (file.size > 16 * 1024 * 1024) {
|
||||
showError('Ukuran file maksimal 16MB');
|
||||
return;
|
||||
}
|
||||
|
||||
selectedFile = file;
|
||||
|
||||
// Show preview
|
||||
const reader = new FileReader();
|
||||
reader.onload = (e) => {
|
||||
preview.src = e.target.result;
|
||||
preview.style.display = 'block';
|
||||
dropzone.querySelector('.dropzone-content').style.display = 'none';
|
||||
};
|
||||
reader.readAsDataURL(file);
|
||||
|
||||
processBtn.disabled = false;
|
||||
hideError();
|
||||
resultsSection.style.display = 'none';
|
||||
}
|
||||
|
||||
// Process button
|
||||
processBtn.addEventListener('click', async () => {
|
||||
if (!selectedFile) return;
|
||||
|
||||
const btnText = processBtn.querySelector('.btn-text');
|
||||
const btnLoading = processBtn.querySelector('.btn-loading');
|
||||
|
||||
processBtn.disabled = true;
|
||||
btnText.style.display = 'none';
|
||||
btnLoading.style.display = 'inline';
|
||||
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', selectedFile);
|
||||
formData.append('doc_type', docType);
|
||||
|
||||
const response = await fetch('/upload', {
|
||||
method: 'POST',
|
||||
body: formData
|
||||
});
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
if (result.success) {
|
||||
extractedData = result.data;
|
||||
displayResults(result);
|
||||
hideError();
|
||||
} else {
|
||||
showError(result.error);
|
||||
resultsSection.style.display = 'none';
|
||||
}
|
||||
} catch (error) {
|
||||
showError('Terjadi kesalahan: ' + error.message);
|
||||
} finally {
|
||||
processBtn.disabled = false;
|
||||
btnText.style.display = 'inline';
|
||||
btnLoading.style.display = 'none';
|
||||
}
|
||||
});
|
||||
|
||||
// Region fields that use dropdowns - in hierarchical order
|
||||
const regionFields = ['provinsi', 'kabupaten_kota', 'kecamatan', 'kel_desa'];
|
||||
let regionData = {
|
||||
provinces: [],
|
||||
regencies: {},
|
||||
districts: {},
|
||||
villages: {}
|
||||
};
|
||||
let validationResult = null;
|
||||
|
||||
// Define field display order
|
||||
const fieldOrder = [
|
||||
// Location hierarchy first
|
||||
'provinsi', 'kabupaten_kota', 'kecamatan', 'kel_desa',
|
||||
// Identity
|
||||
'nik', 'nama', 'tempat_lahir', 'tanggal_lahir', 'jenis_kelamin', 'gol_darah',
|
||||
// Address
|
||||
'alamat', 'rt_rw',
|
||||
// Other info
|
||||
'agama', 'status_perkawinan', 'pekerjaan', 'kewarganegaraan', 'berlaku_hingga',
|
||||
// Issue date
|
||||
'tanggal_penerbitan',
|
||||
// KK specific
|
||||
'no_kk', 'nama_kepala_keluarga', 'kode_pos', 'anggota_keluarga'
|
||||
];
|
||||
|
||||
async function displayResults(result) {
|
||||
resultsBody.innerHTML = '';
|
||||
const data = result.data;
|
||||
extractedData = data;
|
||||
|
||||
// Validate region data first
|
||||
await validateRegionData(data);
|
||||
|
||||
// Sort keys by fieldOrder
|
||||
const sortedKeys = Object.keys(data).sort((a, b) => {
|
||||
const indexA = fieldOrder.indexOf(a);
|
||||
const indexB = fieldOrder.indexOf(b);
|
||||
if (indexA === -1 && indexB === -1) return 0;
|
||||
if (indexA === -1) return 1;
|
||||
if (indexB === -1) return -1;
|
||||
return indexA - indexB;
|
||||
});
|
||||
|
||||
for (const key of sortedKeys) {
|
||||
const value = data[key];
|
||||
if (key === 'anggota_keluarga') {
|
||||
const count = Array.isArray(value) ? value.length : 0;
|
||||
addResultRow('Jumlah Anggota', count + ' orang', null, false);
|
||||
} else if (regionFields.includes(key)) {
|
||||
// Region field with dropdown
|
||||
const label = fieldLabels[key] || key;
|
||||
await addRegionRow(label, value || '', key);
|
||||
} else {
|
||||
const label = fieldLabels[key] || key;
|
||||
addResultRow(label, value || '', key, true);
|
||||
}
|
||||
}
|
||||
|
||||
rawText.textContent = result.raw_text;
|
||||
resultsSection.style.display = 'block';
|
||||
resultsSection.scrollIntoView({ behavior: 'smooth' });
|
||||
}
|
||||
|
||||
async function validateRegionData(data) {
|
||||
try {
|
||||
const response = await fetch('/api/validate-region', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data)
|
||||
});
|
||||
const result = await response.json();
|
||||
if (result.success) {
|
||||
validationResult = result.validation;
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Validation error:', e);
|
||||
}
|
||||
}
|
||||
|
||||
async function addRegionRow(label, value, key) {
|
||||
const row = document.createElement('tr');
|
||||
const validation = validationResult?.[key];
|
||||
const isValid = validation?.valid;
|
||||
const suggestion = validation?.suggestion;
|
||||
|
||||
// Status indicator
|
||||
const statusIcon = isValid ? '✓' : (value ? '⚠' : '');
|
||||
const statusClass = isValid ? 'valid-field' : (value ? 'invalid-field' : '');
|
||||
|
||||
row.innerHTML = `
|
||||
<td class="field-label">
|
||||
${label}
|
||||
<span class="validation-status ${statusClass}">${statusIcon}</span>
|
||||
</td>
|
||||
<td class="field-value">
|
||||
<div class="region-field-wrapper">
|
||||
<input type="text" class="editable-field ${statusClass}" data-key="${key}"
|
||||
value="${suggestion || value || ''}" placeholder="Ketik atau pilih...">
|
||||
<select class="region-dropdown" data-key="${key}" style="display: none;">
|
||||
<option value="">-- Pilih --</option>
|
||||
</select>
|
||||
<button type="button" class="dropdown-toggle" data-key="${key}" title="Pilih dari daftar">▼</button>
|
||||
</div>
|
||||
${suggestion && suggestion !== value ? `<div class="suggestion-text">Saran: ${suggestion}</div>` : ''}
|
||||
</td>
|
||||
`;
|
||||
|
||||
const input = row.querySelector('input');
|
||||
const select = row.querySelector('select');
|
||||
const toggleBtn = row.querySelector('.dropdown-toggle');
|
||||
|
||||
// Input change
|
||||
input.addEventListener('input', (e) => {
|
||||
if (extractedData) {
|
||||
extractedData[key] = e.target.value;
|
||||
}
|
||||
});
|
||||
|
||||
// Toggle dropdown
|
||||
toggleBtn.addEventListener('click', async () => {
|
||||
if (select.style.display === 'none') {
|
||||
await loadDropdownOptions(key, select);
|
||||
select.style.display = 'block';
|
||||
input.style.display = 'none';
|
||||
} else {
|
||||
select.style.display = 'none';
|
||||
input.style.display = 'block';
|
||||
}
|
||||
});
|
||||
|
||||
// Select change
|
||||
select.addEventListener('change', (e) => {
|
||||
const selectedOption = e.target.options[e.target.selectedIndex];
|
||||
const selectedCode = selectedOption.value;
|
||||
const selectedName = selectedOption.text !== '-- Pilih --' ? selectedOption.text : '';
|
||||
|
||||
input.value = selectedName;
|
||||
if (extractedData) {
|
||||
extractedData[key] = selectedName;
|
||||
}
|
||||
|
||||
// Update validation result with selected code for cascading
|
||||
if (!validationResult) validationResult = {};
|
||||
validationResult[key] = {
|
||||
valid: !!selectedCode,
|
||||
code: selectedCode,
|
||||
suggestion: selectedName
|
||||
};
|
||||
|
||||
select.style.display = 'none';
|
||||
input.style.display = 'block';
|
||||
|
||||
// Change toggle button to checkmark if valid selection
|
||||
if (selectedCode) {
|
||||
toggleBtn.textContent = '✓';
|
||||
toggleBtn.classList.add('confirmed');
|
||||
input.classList.remove('invalid-field');
|
||||
input.classList.add('valid-field');
|
||||
} else {
|
||||
toggleBtn.textContent = '▼';
|
||||
toggleBtn.classList.remove('confirmed');
|
||||
}
|
||||
|
||||
// Clear dependent fields and their codes
|
||||
clearDependentFields(key);
|
||||
});
|
||||
|
||||
resultsBody.appendChild(row);
|
||||
}
|
||||
|
||||
async function loadDropdownOptions(key, select) {
|
||||
select.innerHTML = '<option value="">Loading...</option>';
|
||||
|
||||
try {
|
||||
let data = [];
|
||||
|
||||
if (key === 'provinsi') {
|
||||
if (!regionData.provinces.length) {
|
||||
const res = await fetch('/api/provinces');
|
||||
const json = await res.json();
|
||||
regionData.provinces = json.data || [];
|
||||
}
|
||||
data = regionData.provinces;
|
||||
} else if (key === 'kabupaten_kota') {
|
||||
const provCode = validationResult?.provinsi?.code;
|
||||
if (provCode) {
|
||||
if (!regionData.regencies[provCode]) {
|
||||
const res = await fetch(`/api/regencies/${provCode}`);
|
||||
const json = await res.json();
|
||||
regionData.regencies[provCode] = json.data || [];
|
||||
}
|
||||
data = regionData.regencies[provCode];
|
||||
}
|
||||
} else if (key === 'kecamatan') {
|
||||
const regCode = validationResult?.kabupaten_kota?.code;
|
||||
if (regCode) {
|
||||
if (!regionData.districts[regCode]) {
|
||||
const res = await fetch(`/api/districts/${regCode}`);
|
||||
const json = await res.json();
|
||||
regionData.districts[regCode] = json.data || [];
|
||||
}
|
||||
data = regionData.districts[regCode];
|
||||
}
|
||||
} else if (key === 'kel_desa') {
|
||||
const distCode = validationResult?.kecamatan?.code;
|
||||
if (distCode) {
|
||||
if (!regionData.villages[distCode]) {
|
||||
const res = await fetch(`/api/villages/${distCode}`);
|
||||
const json = await res.json();
|
||||
regionData.villages[distCode] = json.data || [];
|
||||
}
|
||||
data = regionData.villages[distCode];
|
||||
}
|
||||
}
|
||||
|
||||
select.innerHTML = '<option value="">-- Pilih --</option>';
|
||||
data.forEach(item => {
|
||||
const option = document.createElement('option');
|
||||
option.value = item.code;
|
||||
option.textContent = item.name;
|
||||
select.appendChild(option);
|
||||
});
|
||||
} catch (e) {
|
||||
select.innerHTML = '<option value="">Error loading data</option>';
|
||||
}
|
||||
}
|
||||
|
||||
function clearDependentFields(key) {
|
||||
const dependents = {
|
||||
'provinsi': ['kabupaten_kota', 'kecamatan', 'kel_desa'],
|
||||
'kabupaten_kota': ['kecamatan', 'kel_desa'],
|
||||
'kecamatan': ['kel_desa']
|
||||
};
|
||||
|
||||
(dependents[key] || []).forEach(depKey => {
|
||||
const input = document.querySelector(`input[data-key="${depKey}"]`);
|
||||
if (input) input.value = '';
|
||||
if (extractedData) extractedData[depKey] = '';
|
||||
// Clear validation code for cascading
|
||||
if (validationResult && validationResult[depKey]) {
|
||||
validationResult[depKey] = { valid: false, code: null, suggestion: null };
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function addResultRow(label, value, key, editable = true) {
|
||||
const row = document.createElement('tr');
|
||||
if (editable && key) {
|
||||
row.innerHTML = `
|
||||
<td class="field-label">${label}</td>
|
||||
<td class="field-value">
|
||||
<input type="text" class="editable-field" data-key="${key}" value="${value || ''}" placeholder="Klik untuk edit...">
|
||||
</td>
|
||||
`;
|
||||
const input = row.querySelector('input');
|
||||
input.addEventListener('input', (e) => {
|
||||
if (extractedData && key) {
|
||||
extractedData[key] = e.target.value;
|
||||
}
|
||||
});
|
||||
} else {
|
||||
row.innerHTML = `
|
||||
<td class="field-label">${label}</td>
|
||||
<td class="field-value">${value || '-'}</td>
|
||||
`;
|
||||
}
|
||||
resultsBody.appendChild(row);
|
||||
}
|
||||
|
||||
// Toggle raw text
|
||||
document.getElementById('toggleRaw').addEventListener('click', () => {
|
||||
const isVisible = rawTextSection.style.display !== 'none';
|
||||
rawTextSection.style.display = isVisible ? 'none' : 'block';
|
||||
});
|
||||
|
||||
// Copy to clipboard
|
||||
document.getElementById('copyBtn').addEventListener('click', () => {
|
||||
if (extractedData) {
|
||||
navigator.clipboard.writeText(JSON.stringify(extractedData, null, 2))
|
||||
.then(() => alert('Data berhasil disalin!'));
|
||||
}
|
||||
});
|
||||
|
||||
// Export JSON
|
||||
document.getElementById('exportBtn').addEventListener('click', () => {
|
||||
if (extractedData) {
|
||||
const blob = new Blob([JSON.stringify(extractedData, null, 2)], { type: 'application/json' });
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
a.href = url;
|
||||
a.download = `${docType}_data.json`;
|
||||
a.click();
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
});
|
||||
|
||||
function showError(message) {
|
||||
errorMessage.textContent = message;
|
||||
errorSection.style.display = 'block';
|
||||
}
|
||||
|
||||
function hideError() {
|
||||
errorSection.style.display = 'none';
|
||||
}
|
||||
|
||||
// Reset on new file selection
|
||||
preview.addEventListener('click', () => {
|
||||
preview.style.display = 'none';
|
||||
dropzone.querySelector('.dropzone-content').style.display = 'flex';
|
||||
selectedFile = null;
|
||||
processBtn.disabled = true;
|
||||
fileInput.value = '';
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
Reference in New Issue
Block a user