OCR dengan ZONA
This commit is contained in:
253
app.py
Normal file
253
app.py
Normal file
@@ -0,0 +1,253 @@
|
||||
"""
|
||||
Flask Web Server untuk OCR KTP/KK
|
||||
"""
|
||||
|
||||
import os
|
||||
from flask import Flask, render_template, request, jsonify
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from ocr_engine import get_ocr_engine
|
||||
from ktp_extractor import KTPExtractor
|
||||
from kk_extractor import KKExtractor
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# Konfigurasi
|
||||
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads')
|
||||
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'bmp', 'webp'}
|
||||
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max
|
||||
|
||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
||||
app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
|
||||
|
||||
# Buat folder upload jika belum ada
|
||||
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
||||
|
||||
# Inisialisasi extractors
|
||||
ktp_extractor = KTPExtractor()
|
||||
kk_extractor = KKExtractor()
|
||||
|
||||
|
||||
def allowed_file(filename):
|
||||
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
"""Halaman utama"""
|
||||
return render_template('index.html')
|
||||
|
||||
|
||||
@app.route('/upload', methods=['POST'])
|
||||
def upload_file():
|
||||
"""Handle upload dan proses OCR"""
|
||||
try:
|
||||
# Cek file
|
||||
if 'file' not in request.files:
|
||||
return jsonify({'success': False, 'error': 'Tidak ada file yang diupload'}), 400
|
||||
|
||||
file = request.files['file']
|
||||
doc_type = request.form.get('doc_type', 'ktp')
|
||||
|
||||
if file.filename == '':
|
||||
return jsonify({'success': False, 'error': 'Nama file kosong'}), 400
|
||||
|
||||
if not allowed_file(file.filename):
|
||||
return jsonify({'success': False, 'error': 'Format file tidak didukung. Gunakan PNG, JPG, JPEG, BMP, atau WEBP'}), 400
|
||||
|
||||
# Simpan file
|
||||
filename = secure_filename(file.filename)
|
||||
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
||||
file.save(filepath)
|
||||
|
||||
try:
|
||||
# Jalankan OCR
|
||||
ocr_engine = get_ocr_engine()
|
||||
ocr_results = ocr_engine.extract_text(filepath)
|
||||
|
||||
if not ocr_results:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'Tidak dapat membaca teks dari gambar. Pastikan gambar jelas dan tidak blur.'
|
||||
}), 400
|
||||
|
||||
# Ekstrak field berdasarkan jenis dokumen
|
||||
if doc_type == 'ktp':
|
||||
extracted = ktp_extractor.extract(ocr_results)
|
||||
else:
|
||||
extracted = kk_extractor.extract(ocr_results)
|
||||
|
||||
# Raw text untuk debugging
|
||||
raw_text = '\n'.join([r['text'] for r in ocr_results])
|
||||
|
||||
# DEBUG: Print raw OCR results
|
||||
print("\n" + "="*50)
|
||||
print("DEBUG: Raw OCR Results")
|
||||
print("="*50)
|
||||
for i, r in enumerate(ocr_results):
|
||||
print(f"[{i}] {r['text']}")
|
||||
print("="*50 + "\n")
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'doc_type': doc_type,
|
||||
'data': extracted,
|
||||
'raw_text': raw_text,
|
||||
'ocr_count': len(ocr_results)
|
||||
})
|
||||
|
||||
finally:
|
||||
# Hapus file setelah proses (untuk keamanan data pribadi)
|
||||
if os.path.exists(filepath):
|
||||
os.remove(filepath)
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
|
||||
# ============================================
|
||||
# Region Data API (using wilayah.id)
|
||||
# ============================================
|
||||
import requests
|
||||
from functools import lru_cache
|
||||
|
||||
WILAYAH_API_BASE = "https://wilayah.id/api"
|
||||
|
||||
@lru_cache(maxsize=100)
|
||||
def fetch_region_data(endpoint):
|
||||
"""Fetch region data with caching"""
|
||||
try:
|
||||
response = requests.get(f"{WILAYAH_API_BASE}/{endpoint}", timeout=10)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Error fetching region data: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def normalize_name(name):
|
||||
"""Normalize name for comparison"""
|
||||
if not name:
|
||||
return ""
|
||||
return name.upper().strip().replace(".", "").replace(" ", "")
|
||||
|
||||
|
||||
def find_best_match(search_name, items, key='name'):
|
||||
"""Find best matching item by name (fuzzy matching)"""
|
||||
if not search_name or not items:
|
||||
return None
|
||||
|
||||
search_norm = normalize_name(search_name)
|
||||
|
||||
# Try exact match first
|
||||
for item in items:
|
||||
if normalize_name(item.get(key, '')) == search_norm:
|
||||
return item
|
||||
|
||||
# Try contains match
|
||||
for item in items:
|
||||
item_norm = normalize_name(item.get(key, ''))
|
||||
if search_norm in item_norm or item_norm in search_norm:
|
||||
return item
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@app.route('/api/provinces')
|
||||
def get_provinces():
|
||||
"""Get all provinces"""
|
||||
data = fetch_region_data("provinces.json")
|
||||
if data:
|
||||
return jsonify(data)
|
||||
return jsonify({'data': []}), 500
|
||||
|
||||
|
||||
@app.route('/api/regencies/<province_code>')
|
||||
def get_regencies(province_code):
|
||||
"""Get cities/regencies by province code"""
|
||||
data = fetch_region_data(f"regencies/{province_code}.json")
|
||||
if data:
|
||||
return jsonify(data)
|
||||
return jsonify({'data': []}), 500
|
||||
|
||||
|
||||
@app.route('/api/districts/<regency_code>')
|
||||
def get_districts(regency_code):
|
||||
"""Get districts by regency code"""
|
||||
data = fetch_region_data(f"districts/{regency_code}.json")
|
||||
if data:
|
||||
return jsonify(data)
|
||||
return jsonify({'data': []}), 500
|
||||
|
||||
|
||||
@app.route('/api/villages/<district_code>')
|
||||
def get_villages(district_code):
|
||||
"""Get villages by district code"""
|
||||
data = fetch_region_data(f"villages/{district_code}.json")
|
||||
if data:
|
||||
return jsonify(data)
|
||||
return jsonify({'data': []}), 500
|
||||
|
||||
|
||||
@app.route('/api/validate-region', methods=['POST'])
|
||||
def validate_region():
|
||||
"""Validate OCR region data against official database"""
|
||||
try:
|
||||
ocr_data = request.json
|
||||
result = {
|
||||
'provinsi': {'valid': False, 'code': None, 'suggestion': None},
|
||||
'kabupaten_kota': {'valid': False, 'code': None, 'suggestion': None},
|
||||
'kecamatan': {'valid': False, 'code': None, 'suggestion': None},
|
||||
'kel_desa': {'valid': False, 'code': None, 'suggestion': None}
|
||||
}
|
||||
|
||||
# Validate province
|
||||
provinces_data = fetch_region_data("provinces.json")
|
||||
if provinces_data and 'data' in provinces_data:
|
||||
match = find_best_match(ocr_data.get('provinsi'), provinces_data['data'])
|
||||
if match:
|
||||
result['provinsi'] = {'valid': True, 'code': match['code'], 'suggestion': match['name']}
|
||||
|
||||
# Validate regency
|
||||
regencies_data = fetch_region_data(f"regencies/{match['code']}.json")
|
||||
if regencies_data and 'data' in regencies_data:
|
||||
reg_match = find_best_match(ocr_data.get('kabupaten_kota'), regencies_data['data'])
|
||||
if reg_match:
|
||||
result['kabupaten_kota'] = {'valid': True, 'code': reg_match['code'], 'suggestion': reg_match['name']}
|
||||
|
||||
# Validate district
|
||||
districts_data = fetch_region_data(f"districts/{reg_match['code']}.json")
|
||||
if districts_data and 'data' in districts_data:
|
||||
dist_match = find_best_match(ocr_data.get('kecamatan'), districts_data['data'])
|
||||
if dist_match:
|
||||
result['kecamatan'] = {'valid': True, 'code': dist_match['code'], 'suggestion': dist_match['name']}
|
||||
|
||||
# Validate village
|
||||
villages_data = fetch_region_data(f"villages/{dist_match['code']}.json")
|
||||
if villages_data and 'data' in villages_data:
|
||||
vil_match = find_best_match(ocr_data.get('kel_desa'), villages_data['data'])
|
||||
if vil_match:
|
||||
result['kel_desa'] = {'valid': True, 'code': vil_match['code'], 'suggestion': vil_match['name']}
|
||||
|
||||
return jsonify({'success': True, 'validation': result})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/health')
|
||||
def health():
|
||||
"""Health check endpoint"""
|
||||
return jsonify({'status': 'ok'})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("="*50)
|
||||
print("OCR KTP/KK Application")
|
||||
print("="*50)
|
||||
print("Membuka: http://localhost:5000")
|
||||
print("Tekan Ctrl+C untuk berhenti")
|
||||
print("="*50)
|
||||
|
||||
app.run(host='0.0.0.0', port=5000, debug=True)
|
||||
Reference in New Issue
Block a user