""" Flask Web Server untuk OCR KTP/KK """ import os from flask import Flask, render_template, request, jsonify from werkzeug.utils import secure_filename from ocr_engine import get_ocr_engine from ktp_extractor import KTPExtractor from kk_extractor import KKExtractor app = Flask(__name__) # Konfigurasi UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads') ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'bmp', 'webp'} MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH # Buat folder upload jika belum ada os.makedirs(UPLOAD_FOLDER, exist_ok=True) # Inisialisasi extractors ktp_extractor = KTPExtractor() kk_extractor = KKExtractor() def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS @app.route('/') def index(): """Halaman utama""" return render_template('index.html') @app.route('/upload', methods=['POST']) def upload_file(): """Handle upload dan proses OCR""" try: # Cek file if 'file' not in request.files: return jsonify({'success': False, 'error': 'Tidak ada file yang diupload'}), 400 file = request.files['file'] doc_type = request.form.get('doc_type', 'ktp') if file.filename == '': return jsonify({'success': False, 'error': 'Nama file kosong'}), 400 if not allowed_file(file.filename): return jsonify({'success': False, 'error': 'Format file tidak didukung. Gunakan PNG, JPG, JPEG, BMP, atau WEBP'}), 400 # Simpan file filename = secure_filename(file.filename) filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(filepath) try: # Jalankan OCR ocr_engine = get_ocr_engine() ocr_results = ocr_engine.extract_text(filepath) if not ocr_results: return jsonify({ 'success': False, 'error': 'Tidak dapat membaca teks dari gambar. Pastikan gambar jelas dan tidak blur.' }), 400 # Ekstrak field berdasarkan jenis dokumen if doc_type == 'ktp': extracted = ktp_extractor.extract(ocr_results) else: extracted = kk_extractor.extract(ocr_results) # Raw text untuk debugging raw_text = '\n'.join([r['text'] for r in ocr_results]) # DEBUG: Print raw OCR results print("\n" + "="*50) print("DEBUG: Raw OCR Results") print("="*50) for i, r in enumerate(ocr_results): print(f"[{i}] {r['text']}") print("="*50 + "\n") return jsonify({ 'success': True, 'doc_type': doc_type, 'data': extracted, 'raw_text': raw_text, 'ocr_count': len(ocr_results) }) finally: # Hapus file setelah proses (untuk keamanan data pribadi) if os.path.exists(filepath): os.remove(filepath) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 # ============================================ # Region Data API (using wilayah.id) # ============================================ import requests from functools import lru_cache WILAYAH_API_BASE = "https://wilayah.id/api" @lru_cache(maxsize=100) def fetch_region_data(endpoint): """Fetch region data with caching""" try: response = requests.get(f"{WILAYAH_API_BASE}/{endpoint}", timeout=10) if response.status_code == 200: return response.json() return None except Exception as e: print(f"Error fetching region data: {e}") return None def normalize_name(name): """Normalize name for comparison""" if not name: return "" return name.upper().strip().replace(".", "").replace(" ", "") def find_best_match(search_name, items, key='name'): """Find best matching item by name (fuzzy matching)""" if not search_name or not items: return None search_norm = normalize_name(search_name) # Try exact match first for item in items: if normalize_name(item.get(key, '')) == search_norm: return item # Try contains match for item in items: item_norm = normalize_name(item.get(key, '')) if search_norm in item_norm or item_norm in search_norm: return item return None @app.route('/api/provinces') def get_provinces(): """Get all provinces""" data = fetch_region_data("provinces.json") if data: return jsonify(data) return jsonify({'data': []}), 500 @app.route('/api/regencies/') def get_regencies(province_code): """Get cities/regencies by province code""" data = fetch_region_data(f"regencies/{province_code}.json") if data: return jsonify(data) return jsonify({'data': []}), 500 @app.route('/api/districts/') def get_districts(regency_code): """Get districts by regency code""" data = fetch_region_data(f"districts/{regency_code}.json") if data: return jsonify(data) return jsonify({'data': []}), 500 @app.route('/api/villages/') def get_villages(district_code): """Get villages by district code""" data = fetch_region_data(f"villages/{district_code}.json") if data: return jsonify(data) return jsonify({'data': []}), 500 @app.route('/api/validate-region', methods=['POST']) def validate_region(): """Validate OCR region data against official database""" try: ocr_data = request.json result = { 'provinsi': {'valid': False, 'code': None, 'suggestion': None}, 'kabupaten_kota': {'valid': False, 'code': None, 'suggestion': None}, 'kecamatan': {'valid': False, 'code': None, 'suggestion': None}, 'kel_desa': {'valid': False, 'code': None, 'suggestion': None} } # Validate province provinces_data = fetch_region_data("provinces.json") if provinces_data and 'data' in provinces_data: match = find_best_match(ocr_data.get('provinsi'), provinces_data['data']) if match: result['provinsi'] = {'valid': True, 'code': match['code'], 'suggestion': match['name']} # Validate regency regencies_data = fetch_region_data(f"regencies/{match['code']}.json") if regencies_data and 'data' in regencies_data: reg_match = find_best_match(ocr_data.get('kabupaten_kota'), regencies_data['data']) if reg_match: result['kabupaten_kota'] = {'valid': True, 'code': reg_match['code'], 'suggestion': reg_match['name']} # Validate district districts_data = fetch_region_data(f"districts/{reg_match['code']}.json") if districts_data and 'data' in districts_data: dist_match = find_best_match(ocr_data.get('kecamatan'), districts_data['data']) if dist_match: result['kecamatan'] = {'valid': True, 'code': dist_match['code'], 'suggestion': dist_match['name']} # Validate village villages_data = fetch_region_data(f"villages/{dist_match['code']}.json") if villages_data and 'data' in villages_data: vil_match = find_best_match(ocr_data.get('kel_desa'), villages_data['data']) if vil_match: result['kel_desa'] = {'valid': True, 'code': vil_match['code'], 'suggestion': vil_match['name']} return jsonify({'success': True, 'validation': result}) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/health') def health(): """Health check endpoint""" return jsonify({'status': 'ok'}) if __name__ == '__main__': print("="*50) print("OCR KTP/KK Application") print("="*50) print("Membuka: http://localhost:5000") print("Tekan Ctrl+C untuk berhenti") print("="*50) app.run(host='0.0.0.0', port=5000, debug=True)