OCR dengan ZONA

This commit is contained in:
2025-12-28 01:20:37 +08:00
commit 4fe381b3f0
12 changed files with 2356 additions and 0 deletions

253
app.py Normal file
View File

@@ -0,0 +1,253 @@
"""
Flask Web Server untuk OCR KTP/KK
"""
import os
from flask import Flask, render_template, request, jsonify
from werkzeug.utils import secure_filename
from ocr_engine import get_ocr_engine
from ktp_extractor import KTPExtractor
from kk_extractor import KKExtractor
app = Flask(__name__)
# Konfigurasi
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads')
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'bmp', 'webp'}
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
# Buat folder upload jika belum ada
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
# Inisialisasi extractors
ktp_extractor = KTPExtractor()
kk_extractor = KKExtractor()
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route('/')
def index():
"""Halaman utama"""
return render_template('index.html')
@app.route('/upload', methods=['POST'])
def upload_file():
"""Handle upload dan proses OCR"""
try:
# Cek file
if 'file' not in request.files:
return jsonify({'success': False, 'error': 'Tidak ada file yang diupload'}), 400
file = request.files['file']
doc_type = request.form.get('doc_type', 'ktp')
if file.filename == '':
return jsonify({'success': False, 'error': 'Nama file kosong'}), 400
if not allowed_file(file.filename):
return jsonify({'success': False, 'error': 'Format file tidak didukung. Gunakan PNG, JPG, JPEG, BMP, atau WEBP'}), 400
# Simpan file
filename = secure_filename(file.filename)
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(filepath)
try:
# Jalankan OCR
ocr_engine = get_ocr_engine()
ocr_results = ocr_engine.extract_text(filepath)
if not ocr_results:
return jsonify({
'success': False,
'error': 'Tidak dapat membaca teks dari gambar. Pastikan gambar jelas dan tidak blur.'
}), 400
# Ekstrak field berdasarkan jenis dokumen
if doc_type == 'ktp':
extracted = ktp_extractor.extract(ocr_results)
else:
extracted = kk_extractor.extract(ocr_results)
# Raw text untuk debugging
raw_text = '\n'.join([r['text'] for r in ocr_results])
# DEBUG: Print raw OCR results
print("\n" + "="*50)
print("DEBUG: Raw OCR Results")
print("="*50)
for i, r in enumerate(ocr_results):
print(f"[{i}] {r['text']}")
print("="*50 + "\n")
return jsonify({
'success': True,
'doc_type': doc_type,
'data': extracted,
'raw_text': raw_text,
'ocr_count': len(ocr_results)
})
finally:
# Hapus file setelah proses (untuk keamanan data pribadi)
if os.path.exists(filepath):
os.remove(filepath)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
# ============================================
# Region Data API (using wilayah.id)
# ============================================
import requests
from functools import lru_cache
WILAYAH_API_BASE = "https://wilayah.id/api"
@lru_cache(maxsize=100)
def fetch_region_data(endpoint):
"""Fetch region data with caching"""
try:
response = requests.get(f"{WILAYAH_API_BASE}/{endpoint}", timeout=10)
if response.status_code == 200:
return response.json()
return None
except Exception as e:
print(f"Error fetching region data: {e}")
return None
def normalize_name(name):
"""Normalize name for comparison"""
if not name:
return ""
return name.upper().strip().replace(".", "").replace(" ", "")
def find_best_match(search_name, items, key='name'):
"""Find best matching item by name (fuzzy matching)"""
if not search_name or not items:
return None
search_norm = normalize_name(search_name)
# Try exact match first
for item in items:
if normalize_name(item.get(key, '')) == search_norm:
return item
# Try contains match
for item in items:
item_norm = normalize_name(item.get(key, ''))
if search_norm in item_norm or item_norm in search_norm:
return item
return None
@app.route('/api/provinces')
def get_provinces():
"""Get all provinces"""
data = fetch_region_data("provinces.json")
if data:
return jsonify(data)
return jsonify({'data': []}), 500
@app.route('/api/regencies/<province_code>')
def get_regencies(province_code):
"""Get cities/regencies by province code"""
data = fetch_region_data(f"regencies/{province_code}.json")
if data:
return jsonify(data)
return jsonify({'data': []}), 500
@app.route('/api/districts/<regency_code>')
def get_districts(regency_code):
"""Get districts by regency code"""
data = fetch_region_data(f"districts/{regency_code}.json")
if data:
return jsonify(data)
return jsonify({'data': []}), 500
@app.route('/api/villages/<district_code>')
def get_villages(district_code):
"""Get villages by district code"""
data = fetch_region_data(f"villages/{district_code}.json")
if data:
return jsonify(data)
return jsonify({'data': []}), 500
@app.route('/api/validate-region', methods=['POST'])
def validate_region():
"""Validate OCR region data against official database"""
try:
ocr_data = request.json
result = {
'provinsi': {'valid': False, 'code': None, 'suggestion': None},
'kabupaten_kota': {'valid': False, 'code': None, 'suggestion': None},
'kecamatan': {'valid': False, 'code': None, 'suggestion': None},
'kel_desa': {'valid': False, 'code': None, 'suggestion': None}
}
# Validate province
provinces_data = fetch_region_data("provinces.json")
if provinces_data and 'data' in provinces_data:
match = find_best_match(ocr_data.get('provinsi'), provinces_data['data'])
if match:
result['provinsi'] = {'valid': True, 'code': match['code'], 'suggestion': match['name']}
# Validate regency
regencies_data = fetch_region_data(f"regencies/{match['code']}.json")
if regencies_data and 'data' in regencies_data:
reg_match = find_best_match(ocr_data.get('kabupaten_kota'), regencies_data['data'])
if reg_match:
result['kabupaten_kota'] = {'valid': True, 'code': reg_match['code'], 'suggestion': reg_match['name']}
# Validate district
districts_data = fetch_region_data(f"districts/{reg_match['code']}.json")
if districts_data and 'data' in districts_data:
dist_match = find_best_match(ocr_data.get('kecamatan'), districts_data['data'])
if dist_match:
result['kecamatan'] = {'valid': True, 'code': dist_match['code'], 'suggestion': dist_match['name']}
# Validate village
villages_data = fetch_region_data(f"villages/{dist_match['code']}.json")
if villages_data and 'data' in villages_data:
vil_match = find_best_match(ocr_data.get('kel_desa'), villages_data['data'])
if vil_match:
result['kel_desa'] = {'valid': True, 'code': vil_match['code'], 'suggestion': vil_match['name']}
return jsonify({'success': True, 'validation': result})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/health')
def health():
"""Health check endpoint"""
return jsonify({'status': 'ok'})
if __name__ == '__main__':
print("="*50)
print("OCR KTP/KK Application")
print("="*50)
print("Membuka: http://localhost:5000")
print("Tekan Ctrl+C untuk berhenti")
print("="*50)
app.run(host='0.0.0.0', port=5000, debug=True)