254 lines
8.5 KiB
Python
254 lines
8.5 KiB
Python
"""
|
|
Flask Web Server untuk OCR KTP/KK
|
|
"""
|
|
|
|
import os
|
|
from flask import Flask, render_template, request, jsonify
|
|
from werkzeug.utils import secure_filename
|
|
|
|
from ocr_engine import get_ocr_engine
|
|
from ktp_extractor import KTPExtractor
|
|
from kk_extractor import KKExtractor
|
|
|
|
app = Flask(__name__)
|
|
|
|
# Konfigurasi
|
|
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads')
|
|
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'bmp', 'webp'}
|
|
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max
|
|
|
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
|
app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
|
|
|
|
# Buat folder upload jika belum ada
|
|
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
|
|
|
# Inisialisasi extractors
|
|
ktp_extractor = KTPExtractor()
|
|
kk_extractor = KKExtractor()
|
|
|
|
|
|
def allowed_file(filename):
|
|
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
|
|
|
|
|
@app.route('/')
|
|
def index():
|
|
"""Halaman utama"""
|
|
return render_template('index.html')
|
|
|
|
|
|
@app.route('/upload', methods=['POST'])
|
|
def upload_file():
|
|
"""Handle upload dan proses OCR"""
|
|
try:
|
|
# Cek file
|
|
if 'file' not in request.files:
|
|
return jsonify({'success': False, 'error': 'Tidak ada file yang diupload'}), 400
|
|
|
|
file = request.files['file']
|
|
doc_type = request.form.get('doc_type', 'ktp')
|
|
|
|
if file.filename == '':
|
|
return jsonify({'success': False, 'error': 'Nama file kosong'}), 400
|
|
|
|
if not allowed_file(file.filename):
|
|
return jsonify({'success': False, 'error': 'Format file tidak didukung. Gunakan PNG, JPG, JPEG, BMP, atau WEBP'}), 400
|
|
|
|
# Simpan file
|
|
filename = secure_filename(file.filename)
|
|
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
|
file.save(filepath)
|
|
|
|
try:
|
|
# Jalankan OCR
|
|
ocr_engine = get_ocr_engine()
|
|
ocr_results = ocr_engine.extract_text(filepath)
|
|
|
|
if not ocr_results:
|
|
return jsonify({
|
|
'success': False,
|
|
'error': 'Tidak dapat membaca teks dari gambar. Pastikan gambar jelas dan tidak blur.'
|
|
}), 400
|
|
|
|
# Ekstrak field berdasarkan jenis dokumen
|
|
if doc_type == 'ktp':
|
|
extracted = ktp_extractor.extract(ocr_results)
|
|
else:
|
|
extracted = kk_extractor.extract(ocr_results)
|
|
|
|
# Raw text untuk debugging
|
|
raw_text = '\n'.join([r['text'] for r in ocr_results])
|
|
|
|
# DEBUG: Print raw OCR results
|
|
print("\n" + "="*50)
|
|
print("DEBUG: Raw OCR Results")
|
|
print("="*50)
|
|
for i, r in enumerate(ocr_results):
|
|
print(f"[{i}] {r['text']}")
|
|
print("="*50 + "\n")
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'doc_type': doc_type,
|
|
'data': extracted,
|
|
'raw_text': raw_text,
|
|
'ocr_count': len(ocr_results)
|
|
})
|
|
|
|
finally:
|
|
# Hapus file setelah proses (untuk keamanan data pribadi)
|
|
if os.path.exists(filepath):
|
|
os.remove(filepath)
|
|
|
|
except Exception as e:
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
|
|
|
|
# ============================================
|
|
# Region Data API (using wilayah.id)
|
|
# ============================================
|
|
import requests
|
|
from functools import lru_cache
|
|
|
|
WILAYAH_API_BASE = "https://wilayah.id/api"
|
|
|
|
@lru_cache(maxsize=100)
|
|
def fetch_region_data(endpoint):
|
|
"""Fetch region data with caching"""
|
|
try:
|
|
response = requests.get(f"{WILAYAH_API_BASE}/{endpoint}", timeout=10)
|
|
if response.status_code == 200:
|
|
return response.json()
|
|
return None
|
|
except Exception as e:
|
|
print(f"Error fetching region data: {e}")
|
|
return None
|
|
|
|
|
|
def normalize_name(name):
|
|
"""Normalize name for comparison"""
|
|
if not name:
|
|
return ""
|
|
return name.upper().strip().replace(".", "").replace(" ", "")
|
|
|
|
|
|
def find_best_match(search_name, items, key='name'):
|
|
"""Find best matching item by name (fuzzy matching)"""
|
|
if not search_name or not items:
|
|
return None
|
|
|
|
search_norm = normalize_name(search_name)
|
|
|
|
# Try exact match first
|
|
for item in items:
|
|
if normalize_name(item.get(key, '')) == search_norm:
|
|
return item
|
|
|
|
# Try contains match
|
|
for item in items:
|
|
item_norm = normalize_name(item.get(key, ''))
|
|
if search_norm in item_norm or item_norm in search_norm:
|
|
return item
|
|
|
|
return None
|
|
|
|
|
|
@app.route('/api/provinces')
|
|
def get_provinces():
|
|
"""Get all provinces"""
|
|
data = fetch_region_data("provinces.json")
|
|
if data:
|
|
return jsonify(data)
|
|
return jsonify({'data': []}), 500
|
|
|
|
|
|
@app.route('/api/regencies/<province_code>')
|
|
def get_regencies(province_code):
|
|
"""Get cities/regencies by province code"""
|
|
data = fetch_region_data(f"regencies/{province_code}.json")
|
|
if data:
|
|
return jsonify(data)
|
|
return jsonify({'data': []}), 500
|
|
|
|
|
|
@app.route('/api/districts/<regency_code>')
|
|
def get_districts(regency_code):
|
|
"""Get districts by regency code"""
|
|
data = fetch_region_data(f"districts/{regency_code}.json")
|
|
if data:
|
|
return jsonify(data)
|
|
return jsonify({'data': []}), 500
|
|
|
|
|
|
@app.route('/api/villages/<district_code>')
|
|
def get_villages(district_code):
|
|
"""Get villages by district code"""
|
|
data = fetch_region_data(f"villages/{district_code}.json")
|
|
if data:
|
|
return jsonify(data)
|
|
return jsonify({'data': []}), 500
|
|
|
|
|
|
@app.route('/api/validate-region', methods=['POST'])
|
|
def validate_region():
|
|
"""Validate OCR region data against official database"""
|
|
try:
|
|
ocr_data = request.json
|
|
result = {
|
|
'provinsi': {'valid': False, 'code': None, 'suggestion': None},
|
|
'kabupaten_kota': {'valid': False, 'code': None, 'suggestion': None},
|
|
'kecamatan': {'valid': False, 'code': None, 'suggestion': None},
|
|
'kel_desa': {'valid': False, 'code': None, 'suggestion': None}
|
|
}
|
|
|
|
# Validate province
|
|
provinces_data = fetch_region_data("provinces.json")
|
|
if provinces_data and 'data' in provinces_data:
|
|
match = find_best_match(ocr_data.get('provinsi'), provinces_data['data'])
|
|
if match:
|
|
result['provinsi'] = {'valid': True, 'code': match['code'], 'suggestion': match['name']}
|
|
|
|
# Validate regency
|
|
regencies_data = fetch_region_data(f"regencies/{match['code']}.json")
|
|
if regencies_data and 'data' in regencies_data:
|
|
reg_match = find_best_match(ocr_data.get('kabupaten_kota'), regencies_data['data'])
|
|
if reg_match:
|
|
result['kabupaten_kota'] = {'valid': True, 'code': reg_match['code'], 'suggestion': reg_match['name']}
|
|
|
|
# Validate district
|
|
districts_data = fetch_region_data(f"districts/{reg_match['code']}.json")
|
|
if districts_data and 'data' in districts_data:
|
|
dist_match = find_best_match(ocr_data.get('kecamatan'), districts_data['data'])
|
|
if dist_match:
|
|
result['kecamatan'] = {'valid': True, 'code': dist_match['code'], 'suggestion': dist_match['name']}
|
|
|
|
# Validate village
|
|
villages_data = fetch_region_data(f"villages/{dist_match['code']}.json")
|
|
if villages_data and 'data' in villages_data:
|
|
vil_match = find_best_match(ocr_data.get('kel_desa'), villages_data['data'])
|
|
if vil_match:
|
|
result['kel_desa'] = {'valid': True, 'code': vil_match['code'], 'suggestion': vil_match['name']}
|
|
|
|
return jsonify({'success': True, 'validation': result})
|
|
|
|
except Exception as e:
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
|
|
|
|
@app.route('/health')
|
|
def health():
|
|
"""Health check endpoint"""
|
|
return jsonify({'status': 'ok'})
|
|
|
|
|
|
if __name__ == '__main__':
|
|
print("="*50)
|
|
print("OCR KTP/KK Application")
|
|
print("="*50)
|
|
print("Membuka: http://localhost:5000")
|
|
print("Tekan Ctrl+C untuk berhenti")
|
|
print("="*50)
|
|
|
|
app.run(host='0.0.0.0', port=5000, debug=True)
|