diff --git a/.gemini/settings.json b/.gemini/settings.json new file mode 100644 index 0000000..74553cd --- /dev/null +++ b/.gemini/settings.json @@ -0,0 +1,8 @@ +{ + "mcpServers": { + "context7": { + "command": "npx", + "args": ["-y", "@upstash/context7-mcp@latest"] + } + } +} diff --git a/KK/5103040808220001.jpg b/KK/5103040808220001.jpg new file mode 100644 index 0000000..94c9f89 Binary files /dev/null and b/KK/5103040808220001.jpg differ diff --git a/KTP/3303080307040003.jpg b/KTP/3303080307040003.jpg new file mode 100644 index 0000000..10e7884 Binary files /dev/null and b/KTP/3303080307040003.jpg differ diff --git a/KTP/3529245512000002.jpg b/KTP/3529245512000002.jpg new file mode 100644 index 0000000..302eaab Binary files /dev/null and b/KTP/3529245512000002.jpg differ diff --git a/KTP/3671092111950003.jpg b/KTP/3671092111950003.jpg new file mode 100644 index 0000000..2487edd Binary files /dev/null and b/KTP/3671092111950003.jpg differ diff --git a/KTP/5102045811690001.jpg b/KTP/5102045811690001.jpg new file mode 100644 index 0000000..f355b82 Binary files /dev/null and b/KTP/5102045811690001.jpg differ diff --git a/KTP/5103022906800001.jpg b/KTP/5103022906800001.jpg new file mode 100644 index 0000000..3231655 Binary files /dev/null and b/KTP/5103022906800001.jpg differ diff --git a/KTP/5171042004950004.jpg b/KTP/5171042004950004.jpg new file mode 100644 index 0000000..d935a9e Binary files /dev/null and b/KTP/5171042004950004.jpg differ diff --git a/KTP/7306046502850001.jpg b/KTP/7306046502850001.jpg new file mode 100644 index 0000000..07979a4 Binary files /dev/null and b/KTP/7306046502850001.jpg differ diff --git a/__pycache__/app.cpython-313.pyc b/__pycache__/app.cpython-313.pyc new file mode 100644 index 0000000..8170d49 Binary files /dev/null and b/__pycache__/app.cpython-313.pyc differ diff --git a/__pycache__/database.cpython-313.pyc b/__pycache__/database.cpython-313.pyc new file mode 100644 index 0000000..1c64d80 Binary files /dev/null and b/__pycache__/database.cpython-313.pyc differ diff --git a/__pycache__/image_processor.cpython-313.pyc b/__pycache__/image_processor.cpython-313.pyc new file mode 100644 index 0000000..dc58158 Binary files /dev/null and b/__pycache__/image_processor.cpython-313.pyc differ diff --git a/__pycache__/ktp_extractor.cpython-313.pyc b/__pycache__/ktp_extractor.cpython-313.pyc index f44e43c..dd1d062 100644 Binary files a/__pycache__/ktp_extractor.cpython-313.pyc and b/__pycache__/ktp_extractor.cpython-313.pyc differ diff --git a/__pycache__/models.cpython-313.pyc b/__pycache__/models.cpython-313.pyc new file mode 100644 index 0000000..9d3c441 Binary files /dev/null and b/__pycache__/models.cpython-313.pyc differ diff --git a/__pycache__/ocr_engine.cpython-313.pyc b/__pycache__/ocr_engine.cpython-313.pyc index c01f14b..950a5f0 100644 Binary files a/__pycache__/ocr_engine.cpython-313.pyc and b/__pycache__/ocr_engine.cpython-313.pyc differ diff --git a/app.py b/app.py index dd652fe..1ebc461 100644 --- a/app.py +++ b/app.py @@ -3,30 +3,249 @@ Flask Web Server untuk OCR KTP/KK """ import os -from flask import Flask, render_template, request, jsonify +import logging +import uuid +import requests +import difflib +from functools import lru_cache +from flask import Flask, render_template, request, jsonify, send_from_directory, session, send_file from werkzeug.utils import secure_filename +from PIL import Image +import numpy as np +import numpy as np +import math +import io +from openpyxl import Workbook +from openpyxl.styles import Font, Alignment from ocr_engine import get_ocr_engine from ktp_extractor import KTPExtractor from kk_extractor import KKExtractor +from database import db, init_db +from models import KTPRecord, KKRecord app = Flask(__name__) # Konfigurasi UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads') +KTP_FOLDER = os.path.join(os.path.dirname(__file__), 'KTP') # Defines KTP_FOLDER from previous steps (if not already there check context) +# Wait, let's make sure KTP_FOLDER is consistently defined. +# It was added in Step 94. +# Current view shows it might be there. I will ensure it's correct. + ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'bmp', 'webp'} MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max +ALLOWED_DOC_TYPES = {'ktp', 'kk'} app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER +app.config['KTP_FOLDER'] = KTP_FOLDER +app.config['KK_FOLDER'] = os.path.join(os.path.dirname(__file__), 'KK') # New KK Folder app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH +app.secret_key = 'secure-key-ocr-ktp-app' # Required for session -# Buat folder upload jika belum ada +# Simple Security +ADMIN_USERNAME = 'admin' +ADMIN_PASSWORD = '123' + + +# Setup logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +# Production mode flag +PRODUCTION_MODE = os.environ.get('FLASK_ENV', 'development').lower() == 'production' + +# Buat folder jika belum ada os.makedirs(UPLOAD_FOLDER, exist_ok=True) +# Buat folder jika belum ada +os.makedirs(UPLOAD_FOLDER, exist_ok=True) +os.makedirs(KTP_FOLDER, exist_ok=True) +os.makedirs(app.config['KK_FOLDER'], exist_ok=True) + +# Helper untuk Perspective Transform menggunakan Numpy (Inverse Matrix) +def find_coeffs(pa, pb): + matrix = [] + for p1, p2 in zip(pa, pb): + matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]]) + matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]]) + + A = np.matrix(matrix, dtype=float) + B = np.array(pb).reshape(8) + + res = np.dot(np.linalg.inv(A.T * A) * A.T, B) + return np.array(res).reshape(8) + +@app.route('/api/transform-perspective', methods=['POST']) +def transform_perspective(): + try: + if 'image' not in request.files: + return jsonify({'success': False, 'error': 'No image uploaded'}), 400 + + file = request.files['image'] + points_json = request.form.get('points', '[]') + import json + points = json.loads(points_json) # Expecting [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] (TL, TR, BR, BL) + + if len(points) != 4: + return jsonify({'success': False, 'error': 'Invalid points'}), 400 + + # Load Image + img = Image.open(file.stream) + + # Determine format + fmt = img.format if img.format else 'JPEG' + if fmt not in ['JPEG', 'PNG', 'WEBP']: + fmt = 'JPEG' + + # Target Dimensions (KTP Aspect Ratio) + # We can estimate width based on distance between top points or default to something high res + width = max( + math.hypot(points[1][0] - points[0][0], points[1][1] - points[0][1]), + math.hypot(points[2][0] - points[3][0], points[2][1] - points[3][1]) + ) + height = max( + math.hypot(points[3][0] - points[0][0], points[3][1] - points[0][1]), + math.hypot(points[2][0] - points[1][0], points[2][1] - points[1][1]) + ) + + # Force Aspect Ratio based on Doc Type + doc_type = request.form.get('doc_type', 'ktp') + + if doc_type == 'kk': + target_ratio = 297.0 / 210.0 # A4 Landscape + else: + target_ratio = 85.6 / 53.98 # KTP ID-1 + + # Use the calculated width/height as baseline, but adjust to match ratio + # Ideally, take the larger dimension and derive the other + if width / height > target_ratio: + target_width = int(width) + target_height = int(width / target_ratio) + else: + target_height = int(height) + target_width = int(height * target_ratio) + + # Destination Points (Rectangular) + # [0,0], [w,0], [w,h], [0,h] + dst_points = [(0, 0), (target_width, 0), (target_width, target_height), (0, target_height)] + + # Calculate coeffs + # Note: PIL transform perspective method uses INVERSE logic? + # "transform(size, PERSPECTIVE, data, method, fill, resample)" + # "data is an 8-tuple (a, b, c, d, e, f, g, h) which contains the first 8 coefficients of the perspective transform matrix." + # "For each pixel (x, y) in the output image, the new value is taken from a position (P a x + b y + c) / (g x + h y + 1), (d x + e y + f) / (g x + h y + 1) in the input image" + # So we map DST -> SRC + + coeffs = find_coeffs(dst_points, points) # Dst -> Src mapping + + # Perform transform + new_img = img.transform((target_width, target_height), Image.PERSPECTIVE, coeffs, Image.BICUBIC) + + # Save to temporary buffer/file to return URL + filename = f"temp_transformed_{secure_filename(file.filename)}" + filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) + new_img.save(filepath, format=fmt) + + return jsonify({ + 'success': True, + 'image_url': f"/uploads/{filename}", + 'filename': filename + }) + + except Exception as e: + logger.error(f"Perspective transform error: {e}", exc_info=True) + return jsonify({'success': False, 'error': str(e)}), 500 + +@app.route('/uploads/') +def serve_uploaded_file(filename): + return send_from_directory(app.config['UPLOAD_FOLDER'], filename) + +# ============================================ +# Helper Functions +# ============================================ + +def sanitize_error_message(error, default_message="Terjadi kesalahan pada server"): + """Sanitize error messages untuk production - jangan expose detail""" + if PRODUCTION_MODE: + return default_message + return str(error) + + +def validate_pagination(page, per_page, max_per_page=100): + """Validate and sanitize pagination parameters""" + try: + page = int(page) if page else 1 + per_page = int(per_page) if per_page else 10 + + # Ensure positive values + page = max(1, page) + per_page = max(1, min(per_page, max_per_page)) # Cap at max_per_page + + return page, per_page + except (ValueError, TypeError): + return 1, 10 # Default values + + +def validate_nik(nik): + """Validate NIK format (16 digits)""" + if not nik: + return False + # NIK should be 16 digits + return nik.isdigit() and len(nik) == 16 + + +def validate_no_kk(no_kk): + """Validate No KK format (16 digits)""" + if not no_kk: + return False + # No KK should be 16 digits + return no_kk.isdigit() and len(no_kk) == 16 + +# ============================================ +# Error Handlers +# ============================================ + +@app.errorhandler(404) +def not_found(error): + """Handle 404 errors""" + return jsonify({ + 'success': False, + 'error': 'Resource not found' + }), 404 + + +@app.errorhandler(500) +def internal_error(error): + """Handle 500 errors""" + logger.error(f"Internal server error: {error}", exc_info=True) + return jsonify({ + 'success': False, + 'error': sanitize_error_message(error, 'Terjadi kesalahan pada server') + }), 500 + + +@app.errorhandler(413) +def request_too_large(error): + """Handle file too large errors""" + return jsonify({ + 'success': False, + 'error': 'File terlalu besar. Maksimal 16MB' + }), 413 # Inisialisasi extractors ktp_extractor = KTPExtractor() kk_extractor = KKExtractor() +# Inisialisasi database +try: + init_db(app) +except Exception as e: + logger.warning(f"Database connection failed: {e}") + logger.warning("The app will work but data won't be saved to MySQL.") + def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS @@ -47,7 +266,14 @@ def upload_file(): return jsonify({'success': False, 'error': 'Tidak ada file yang diupload'}), 400 file = request.files['file'] - doc_type = request.form.get('doc_type', 'ktp') + doc_type = request.form.get('doc_type', 'ktp').lower() + + # Validasi doc_type + if doc_type not in ALLOWED_DOC_TYPES: + return jsonify({ + 'success': False, + 'error': f'Jenis dokumen tidak valid. Gunakan: {", ".join(ALLOWED_DOC_TYPES)}' + }), 400 if file.filename == '': return jsonify({'success': False, 'error': 'Nama file kosong'}), 400 @@ -55,9 +281,11 @@ def upload_file(): if not allowed_file(file.filename): return jsonify({'success': False, 'error': 'Format file tidak didukung. Gunakan PNG, JPG, JPEG, BMP, atau WEBP'}), 400 - # Simpan file - filename = secure_filename(file.filename) - filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) + # Simpan file dengan unique filename untuk menghindari race condition + original_filename = secure_filename(file.filename) + file_ext = os.path.splitext(original_filename)[1] + unique_filename = f"{uuid.uuid4().hex}{file_ext}" + filepath = os.path.join(app.config['UPLOAD_FOLDER'], unique_filename) file.save(filepath) try: @@ -72,48 +300,104 @@ def upload_file(): }), 400 # Ekstrak field berdasarkan jenis dokumen + validation_meta = None + if doc_type == 'ktp': extracted = ktp_extractor.extract(ocr_results) + # Auto-correct and validate regions + extracted, validation_meta = validate_and_correct_regions(extracted) else: extracted = kk_extractor.extract(ocr_results) # Raw text untuk debugging raw_text = '\n'.join([r['text'] for r in ocr_results]) - # DEBUG: Print raw OCR results - print("\n" + "="*50) - print("DEBUG: Raw OCR Results") - print("="*50) + # Log raw OCR results untuk debugging + logger.debug("Raw OCR Results:") for i, r in enumerate(ocr_results): - print(f"[{i}] {r['text']}") - print("="*50 + "\n") + logger.debug(f"[{i}] {r['text']}") + + # Simpan ke database (optional - jika save_to_db=true) + record_id = None + save_to_db = request.form.get('save_to_db', 'true').lower() == 'true' + + if save_to_db: + try: + if doc_type == 'ktp': + # Cek apakah NIK sudah ada + existing = KTPRecord.query.filter_by(nik=extracted.get('nik')).first() + if existing: + # Update existing record + for key, value in extracted.items(): + if hasattr(existing, key) and value: + setattr(existing, key, value) + existing.raw_text = raw_text + db.session.commit() + record_id = existing.id + logger.info(f"Updated KTP record: NIK {extracted.get('nik')}") + else: + # Create new record + record = KTPRecord.from_ocr_data(extracted, raw_text) + db.session.add(record) + db.session.commit() + record_id = record.id + logger.info(f"Created new KTP record: NIK {extracted.get('nik')}") + else: + # KK + existing = KKRecord.query.filter_by(no_kk=extracted.get('no_kk')).first() + if existing: + for key, value in extracted.items(): + if hasattr(existing, key) and value: + setattr(existing, key, value) + existing.raw_text = raw_text + db.session.commit() + record_id = existing.id + logger.info(f"Updated KK record: No KK {extracted.get('no_kk')}") + else: + record = KKRecord.from_ocr_data(extracted, raw_text) + db.session.add(record) + db.session.commit() + record_id = record.id + logger.info(f"Created new KK record: No KK {extracted.get('no_kk')}") + except Exception as db_error: + db.session.rollback() + logger.error(f"Database save error: {db_error}", exc_info=True) + # Continue without saving - don't fail the OCR request return jsonify({ 'success': True, 'doc_type': doc_type, 'data': extracted, + 'validation': validation_meta, 'raw_text': raw_text, - 'ocr_count': len(ocr_results) + 'ocr_count': len(ocr_results), + 'record_id': record_id, + 'saved_to_db': record_id is not None }) finally: - # Hapus file setelah proses (untuk keamanan data pribadi) + # Hapus file upload setelah proses (untuk keamanan data pribadi) if os.path.exists(filepath): - os.remove(filepath) + try: + os.remove(filepath) + except Exception as cleanup_error: + logger.warning(f"Failed to cleanup file {filepath}: {cleanup_error}") except Exception as e: - return jsonify({'success': False, 'error': str(e)}), 500 + logger.error(f"Error in upload_file: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'error': sanitize_error_message(e, 'Gagal memproses file. Pastikan file valid dan coba lagi.') + }), 500 # ============================================ # Region Data API (using wilayah.id) # ============================================ -import requests -from functools import lru_cache WILAYAH_API_BASE = "https://wilayah.id/api" -@lru_cache(maxsize=100) +@lru_cache(maxsize=500) # Optimized: increased from 100 def fetch_region_data(endpoint): """Fetch region data with caching""" try: @@ -122,7 +406,7 @@ def fetch_region_data(endpoint): return response.json() return None except Exception as e: - print(f"Error fetching region data: {e}") + logger.error(f"Error fetching region data from {endpoint}: {e}") return None @@ -133,107 +417,193 @@ def normalize_name(name): return name.upper().strip().replace(".", "").replace(" ", "") -def find_best_match(search_name, items, key='name'): - """Find best matching item by name (fuzzy matching)""" +def find_best_match(search_name, items, key='name', cutoff=0.6): + """Find best matching item using difflib for better fuzzy matching""" if not search_name or not items: return None - search_norm = normalize_name(search_name) + # Optimized: Create map of uppercase names to items directly + # This avoids the inefficient iteration at the end + names_upper = [] + name_map_upper = {} - # Try exact match first for item in items: - if normalize_name(item.get(key, '')) == search_norm: - return item - - # Try contains match - for item in items: - item_norm = normalize_name(item.get(key, '')) - if search_norm in item_norm or item_norm in search_norm: - return item + name = item.get(key, '') + name_upper = name.upper() + names_upper.append(name_upper) + # Store both original name and item for quick lookup + if name_upper not in name_map_upper: + name_map_upper[name_upper] = item + + # Python's difflib is good for typos + search_upper = search_name.upper() + matches = difflib.get_close_matches(search_upper, names_upper, n=1, cutoff=cutoff) + if matches: + # Direct lookup - much more efficient + matched_name_upper = matches[0] + return name_map_upper.get(matched_name_upper) + return None +def validate_and_correct_regions(ocr_data): + """ + Validate and correct region data cascadingly: + Prov -> Kab/Kota -> Kec -> Kel/Desa + + ENHANCED: Jika Provinsi/Kabupaten kosong tapi Kecamatan/Desa terdeteksi, + lakukan reverse lookup menggunakan: + 1. Kode NIK (2 digit pertama = provinsi, 4 digit = kabupaten) + 2. Search di database wilayah.id + + Returns tuple (updated_data, validation_codes) + """ + result = ocr_data.copy() + validation_codes = { + 'provinsi': {'valid': False, 'code': None, 'suggestion': None}, + 'kabupaten_kota': {'valid': False, 'code': None, 'suggestion': None}, + 'kecamatan': {'valid': False, 'code': None, 'suggestion': None}, + 'kel_desa': {'valid': False, 'code': None, 'suggestion': None} + } + + # ============================================ + # STRATEGY 1: Extract from NIK if available + # ============================================ + # NIK format: PPKKDD-DDMMYY-XXXX + # PP = Provinsi, PPKK = Kabupaten, PPKKDD = Kecamatan + nik = result.get('nik', '') + prov_code_from_nik = None + kab_code_from_nik = None + + if nik and len(nik) >= 6: + prov_code_from_nik = nik[:2] # 2 digit provinsi + kab_code_from_nik = nik[:4] # 4 digit kabupaten + logger.debug(f"NIK LOOKUP: Provinsi code: {prov_code_from_nik}, Kabupaten code: {kab_code_from_nik}") + + provinces_data = fetch_region_data("provinces.json") + + # ============================================ + # STRATEGY 2: Forward validation (jika data ada) + # ============================================ + if provinces_data and 'data' in provinces_data: + prov_match = None + + # Coba match dari nama provinsi terlebih dahulu + if result.get('provinsi'): + prov_match = find_best_match(result.get('provinsi'), provinces_data['data']) + + # Jika tidak ada nama provinsi, coba dari NIK + if not prov_match and prov_code_from_nik: + for prov in provinces_data['data']: + if prov['code'] == prov_code_from_nik: + prov_match = prov + logger.debug(f"NIK LOOKUP: Found province from NIK: {prov['name']}") + break + + if prov_match: + result['provinsi'] = prov_match['name'] + validation_codes['provinsi'] = {'valid': True, 'code': prov_match['code'], 'suggestion': prov_match['name']} + + # 2. Validate Regency (using Prov Code) + regencies_data = fetch_region_data(f"regencies/{prov_match['code']}.json") + if regencies_data and 'data' in regencies_data: + reg_match = None + + # Coba match dari nama kabupaten + if result.get('kabupaten_kota'): + reg_match = find_best_match(result.get('kabupaten_kota'), regencies_data['data']) + + # Jika tidak ada, coba dari NIK + if not reg_match and kab_code_from_nik: + for reg in regencies_data['data']: + if reg['code'] == kab_code_from_nik: + reg_match = reg + logger.debug(f"NIK LOOKUP: Found regency from NIK: {reg['name']}") + break + + # REVERSE LOOKUP: Jika masih tidak ada, cari dari kecamatan (dengan limit untuk performa) + if not reg_match and result.get('kecamatan'): + logger.debug(f"REVERSE LOOKUP: Searching regency by kecamatan: {result.get('kecamatan')}") + max_reverse_lookup = 50 # Limit untuk performa + for i, reg in enumerate(regencies_data['data']): + if i >= max_reverse_lookup: + logger.warning(f"Reverse lookup stopped at limit {max_reverse_lookup}") + break + districts_data = fetch_region_data(f"districts/{reg['code']}.json") + if districts_data and 'data' in districts_data: + dist_match = find_best_match(result.get('kecamatan'), districts_data['data'], cutoff=0.7) + if dist_match: + reg_match = reg + logger.debug(f"REVERSE LOOKUP: Found regency: {reg['name']} (via kecamatan {dist_match['name']})") + break + + if reg_match: + result['kabupaten_kota'] = reg_match['name'] + validation_codes['kabupaten_kota'] = {'valid': True, 'code': reg_match['code'], 'suggestion': reg_match['name']} + + # 3. Validate District (using Kab Code) + districts_data = fetch_region_data(f"districts/{reg_match['code']}.json") + if districts_data and 'data' in districts_data: + dist_match = find_best_match(result.get('kecamatan'), districts_data['data']) + + # REVERSE LOOKUP: Jika kecamatan kosong tapi kel_desa ada, cari dari desa (dengan limit) + if not dist_match and result.get('kel_desa'): + logger.debug(f"REVERSE LOOKUP: Searching kecamatan by kel_desa: {result.get('kel_desa')}") + max_reverse_lookup = 30 # Limit untuk performa + for i, dist in enumerate(districts_data['data']): + if i >= max_reverse_lookup: + logger.warning(f"Reverse lookup stopped at limit {max_reverse_lookup}") + break + villages_data = fetch_region_data(f"villages/{dist['code']}.json") + if villages_data and 'data' in villages_data: + vil_match = find_best_match(result.get('kel_desa'), villages_data['data'], cutoff=0.7) + if vil_match: + dist_match = dist + logger.debug(f"REVERSE LOOKUP: Found kecamatan: {dist['name']} (via desa {vil_match['name']})") + # Juga update result untuk kecamatan + result['kecamatan'] = dist['name'] + break + + if dist_match: + result['kecamatan'] = dist_match['name'] + validation_codes['kecamatan'] = {'valid': True, 'code': dist_match['code'], 'suggestion': dist_match['name']} + + # 4. Validate Village (using Kec Code) + villages_data = fetch_region_data(f"villages/{dist_match['code']}.json") + if villages_data and 'data' in villages_data: + vil_match = find_best_match(result.get('kel_desa'), villages_data['data']) + if vil_match: + result['kel_desa'] = vil_match['name'] + validation_codes['kel_desa'] = {'valid': True, 'code': vil_match['code'], 'suggestion': vil_match['name']} -@app.route('/api/provinces') -def get_provinces(): - """Get all provinces""" - data = fetch_region_data("provinces.json") - if data: - return jsonify(data) - return jsonify({'data': []}), 500 - - -@app.route('/api/regencies/') -def get_regencies(province_code): - """Get cities/regencies by province code""" - data = fetch_region_data(f"regencies/{province_code}.json") - if data: - return jsonify(data) - return jsonify({'data': []}), 500 - - -@app.route('/api/districts/') -def get_districts(regency_code): - """Get districts by regency code""" - data = fetch_region_data(f"districts/{regency_code}.json") - if data: - return jsonify(data) - return jsonify({'data': []}), 500 - - -@app.route('/api/villages/') -def get_villages(district_code): - """Get villages by district code""" - data = fetch_region_data(f"villages/{district_code}.json") - if data: - return jsonify(data) - return jsonify({'data': []}), 500 - + return result, validation_codes @app.route('/api/validate-region', methods=['POST']) def validate_region(): """Validate OCR region data against official database""" try: + if not request.is_json: + return jsonify({ + 'success': False, + 'error': 'Request harus berupa JSON' + }), 400 + ocr_data = request.json - result = { - 'provinsi': {'valid': False, 'code': None, 'suggestion': None}, - 'kabupaten_kota': {'valid': False, 'code': None, 'suggestion': None}, - 'kecamatan': {'valid': False, 'code': None, 'suggestion': None}, - 'kel_desa': {'valid': False, 'code': None, 'suggestion': None} - } + if not ocr_data: + return jsonify({ + 'success': False, + 'error': 'Data tidak boleh kosong' + }), 400 - # Validate province - provinces_data = fetch_region_data("provinces.json") - if provinces_data and 'data' in provinces_data: - match = find_best_match(ocr_data.get('provinsi'), provinces_data['data']) - if match: - result['provinsi'] = {'valid': True, 'code': match['code'], 'suggestion': match['name']} - - # Validate regency - regencies_data = fetch_region_data(f"regencies/{match['code']}.json") - if regencies_data and 'data' in regencies_data: - reg_match = find_best_match(ocr_data.get('kabupaten_kota'), regencies_data['data']) - if reg_match: - result['kabupaten_kota'] = {'valid': True, 'code': reg_match['code'], 'suggestion': reg_match['name']} - - # Validate district - districts_data = fetch_region_data(f"districts/{reg_match['code']}.json") - if districts_data and 'data' in districts_data: - dist_match = find_best_match(ocr_data.get('kecamatan'), districts_data['data']) - if dist_match: - result['kecamatan'] = {'valid': True, 'code': dist_match['code'], 'suggestion': dist_match['name']} - - # Validate village - villages_data = fetch_region_data(f"villages/{dist_match['code']}.json") - if villages_data and 'data' in villages_data: - vil_match = find_best_match(ocr_data.get('kel_desa'), villages_data['data']) - if vil_match: - result['kel_desa'] = {'valid': True, 'code': vil_match['code'], 'suggestion': vil_match['name']} - - return jsonify({'success': True, 'validation': result}) + _, validation_result = validate_and_correct_regions(ocr_data) + return jsonify({'success': True, 'validation': validation_result}) except Exception as e: - return jsonify({'success': False, 'error': str(e)}), 500 + logger.error(f"Error in validate_region: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'error': sanitize_error_message(e, 'Gagal memvalidasi data wilayah') + }), 500 @app.route('/health') @@ -241,13 +611,433 @@ def health(): """Health check endpoint""" return jsonify({'status': 'ok'}) +@app.route('/api/login', methods=['POST']) +def login(): + """Simple login""" + if not request.is_json: + return jsonify({'success': False}), 400 + data = request.json + if data.get('username') == ADMIN_USERNAME and data.get('password') == ADMIN_PASSWORD: + session['logged_in'] = True + return jsonify({'success': True}) + return jsonify({'success': False, 'error': 'Username atau Password salah'}), 401 + +@app.route('/api/check-auth') +def check_auth(): + """Check session""" + if session.get('logged_in'): + return jsonify({'authenticated': True}) + return jsonify({'authenticated': False}), 401 + + + +# ============================================ +# KTP Archive Endpoints +# ============================================ + +# ============================================ +# KTP Archive Endpoints +# ============================================ + +@app.route('/api/save-ktp', methods=['POST']) +def save_ktp(): + """Save edited KTP data with cropped image""" + return save_document('ktp') + +@app.route('/api/save-kk', methods=['POST']) +def save_kk(): + """Save edited KK data with cropped image""" + return save_document('kk') + +def save_document(doc_type): + """Generic save function""" + try: + if 'image' not in request.files: + return jsonify({'success': False, 'error': 'Tidak ada gambar'}), 400 + + image_file = request.files['image'] + data_json = request.form.get('data', '{}') + + import json + try: + data = json.loads(data_json) + except: + return jsonify({'success': False, 'error': 'Data tidak valid'}), 400 + + # Determine config based on type + if doc_type == 'ktp': + identifier = data.get('nik', '') + folder = app.config['KTP_FOLDER'] + Model = KTPRecord + id_field = 'nik' + else: + identifier = data.get('no_kk', '') + folder = app.config['KK_FOLDER'] + Model = KKRecord + id_field = 'no_kk' + + if not identifier: + return jsonify({'success': False, 'error': f'{id_field.upper()} tidak boleh kosong'}), 400 + + # Save image + file_ext = os.path.splitext(image_file.filename)[1] or '.jpg' + image_filename = f"{identifier}{file_ext}" + image_path = os.path.join(folder, image_filename) + image_file.save(image_path) + + # DB Update + existing = Model.query.filter_by(**{id_field: identifier}).first() + if existing: + for k, v in data.items(): + if hasattr(existing, k) and v: + setattr(existing, k, v) + existing.image_path = image_filename + db.session.commit() + record_id = existing.id + else: + # New Record + # Only pass fields that exist in model + # Filter data to match model columns (naive approach or using from_ocr_data if appropriate) + # Actually, direct generic init is risky if fields mismatch. + # Best to use specific logic or robust setattr? + # Let's use simple generic approach: Create empty, then update. + record = Model() + for k, v in data.items(): + if hasattr(record, k): + setattr(record, k, v) + setattr(record, id_field, identifier) + record.image_path = image_filename + db.session.add(record) + db.session.commit() + record_id = record.id + + return jsonify({'success': True, 'record_id': record_id, 'image_path': image_filename}) + + except Exception as e: + db.session.rollback() + logger.error(f"Save error: {e}") + return jsonify({'success': False, 'error': 'Gagal menyimpan'}), 500 + + +@app.route('/ktp-images/') +def serve_ktp_image(filename): + """Serve KTP images - Protected""" + if not session.get('logged_in'): + return "Unauthorized", 401 + return send_from_directory(app.config['KTP_FOLDER'], filename) + +@app.route('/kk-images/') +def serve_kk_image(filename): + """Serve KK images - Protected""" + if not session.get('logged_in'): + return "Unauthorized", 401 + return send_from_directory(app.config['KK_FOLDER'], filename) + + + +@app.route('/api/ktp-archive', methods=['GET']) +def list_ktp_archive(): + """List all KTP records with images - Protected""" + if not session.get('logged_in'): + return jsonify({'success': False, 'error': 'Unauthorized'}), 401 + try: + page, per_page = validate_pagination( + request.args.get('page', 1), + request.args.get('per_page', 20) + ) + + # Only get records that have images saved + pagination = KTPRecord.query.filter( + KTPRecord.image_path.isnot(None) + ).order_by(KTPRecord.created_at.desc()).paginate( + page=page, per_page=per_page, error_out=False + ) + + return jsonify({ + 'success': True, + 'data': [r.to_dict() for r in pagination.items], + 'total': pagination.total, + 'pages': pagination.pages, + 'current_page': page + }) + except Exception as e: + logger.error(f"Error listing KTP archive: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'error': sanitize_error_message(e, 'Gagal mengambil arsip KTP') + }), 500 + + +@app.route('/api/kk-archive', methods=['GET']) +def list_kk_archive(): + """List all KK records with images - Protected""" + if not session.get('logged_in'): + return jsonify({'success': False, 'error': 'Unauthorized'}), 401 + try: + page, per_page = validate_pagination( + request.args.get('page', 1), + request.args.get('per_page', 20) + ) + + # Only get records that have images saved + pagination = KKRecord.query.filter( + KKRecord.image_path.isnot(None) + ).order_by(KKRecord.created_at.desc()).paginate( + page=page, per_page=per_page, error_out=False + ) + + return jsonify({ + 'success': True, + 'data': [r.to_dict() for r in pagination.items], + 'total': pagination.total, + 'pages': pagination.pages, + 'current_page': page + }) + except Exception as e: + logger.error(f"Error listing KK archive: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'error': sanitize_error_message(e, 'Gagal mengambil arsip KK') + }), 500 + +@app.route('/api/export-excel', methods=['POST']) +def export_excel(): + """Export data to Excel (XLSX)""" + try: + if not request.is_json: + return jsonify({'error': 'Format data tidak valid'}), 400 + + data = request.json + if not data: + return jsonify({'error': 'Data kosong'}), 400 + + wb = Workbook() + ws = wb.active + ws.title = "Data KTP" + + # Headers + headers = ["ATRIBUT", "NILAI"] + ws.append(headers) + + # Style Headers (Bold, Center, Gray Bg optional) + for cell in ws[1]: + cell.font = Font(bold=True) + cell.alignment = Alignment(horizontal='center') + + # Data + exclude = ['raw_text', 'image_path', 'updated_at', 'created_at', 'id'] + + # Specific Order if possible + order = ['nik', 'nama', 'tempat_lahir', 'tanggal_lahir', 'jenis_kelamin', 'gol_darah', + 'alamat', 'rt_rw', 'kel_desa', 'kecamatan', 'kabupaten_kota', 'provinsi', + 'agama', 'status_perkawinan', 'pekerjaan', 'kewarganegaraan', 'berlaku_hingga'] + + start_row = 2 + + # Main Fields in Order + for key in order: + val = data.get(key) + if val: + ws.append([key.replace('_', ' ').upper(), str(val)]) + + # Other fields not in order + for k, v in data.items(): + if k not in exclude and k not in order: + ws.append([k.replace('_', ' ').upper(), str(v)]) + + # Auto-adjust columns + for col in ws.columns: + max_length = 0 + column = col[0].column_letter # Get the column name + for cell in col: + try: + if len(str(cell.value)) > max_length: + max_length = len(str(cell.value)) + except: + pass + adjusted_width = (max_length + 2) * 1.2 + ws.column_dimensions[column].width = adjusted_width + + # Save to buffer + output = io.BytesIO() + wb.save(output) + output.seek(0) + + filename = f"Data_KTP_{data.get('nik', 'Export')}.xlsx" + + return send_file( + output, + as_attachment=True, + download_name=filename, + mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + ) + + except Exception as e: + logger.error(f"Export Excel Error: {e}") + return jsonify({'error': 'Gagal export excel'}), 500 + + + +# ============================================ +# Database CRUD API Endpoints +# ============================================ + +@app.route('/api/ktp', methods=['GET']) +def list_ktp_records(): + """List all KTP records with pagination""" + try: + page, per_page = validate_pagination( + request.args.get('page', 1), + request.args.get('per_page', 10) + ) + + pagination = KTPRecord.query.order_by(KTPRecord.created_at.desc()).paginate( + page=page, per_page=per_page, error_out=False + ) + + return jsonify({ + 'success': True, + 'data': [r.to_dict() for r in pagination.items], + 'total': pagination.total, + 'pages': pagination.pages, + 'current_page': page, + 'per_page': per_page + }) + except Exception as e: + logger.error(f"Error in list_ktp_records: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'error': sanitize_error_message(e, 'Gagal mengambil data KTP') + }), 500 + + +@app.route('/api/ktp/', methods=['GET']) +def get_ktp_record(id): + """Get KTP record by ID""" + try: + record = KTPRecord.query.get_or_404(id) + return jsonify({'success': True, 'data': record.to_dict()}) + except Exception as e: + logger.error(f"Error in get_ktp_record: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'error': sanitize_error_message(e, 'Data KTP tidak ditemukan') + }), 404 + + +@app.route('/api/ktp/nik/', methods=['GET']) +def get_ktp_by_nik(nik): + """Get KTP record by NIK""" + try: + # Validasi format NIK + if not validate_nik(nik): + return jsonify({ + 'success': False, + 'error': 'Format NIK tidak valid. NIK harus 16 digit angka' + }), 400 + + record = KTPRecord.query.filter_by(nik=nik).first() + if record: + return jsonify({'success': True, 'data': record.to_dict()}) + return jsonify({'success': False, 'error': 'NIK tidak ditemukan'}), 404 + except Exception as e: + logger.error(f"Error in get_ktp_by_nik: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'error': sanitize_error_message(e, 'Gagal mencari data KTP') + }), 500 + + +@app.route('/api/ktp/', methods=['DELETE']) +def delete_ktp_record(id): + """Delete KTP record by ID""" + try: + record = KTPRecord.query.get_or_404(id) + db.session.delete(record) + db.session.commit() + logger.info(f"Deleted KTP record: ID {id}") + return jsonify({'success': True, 'message': f'Record {id} berhasil dihapus'}) + except Exception as e: + db.session.rollback() + logger.error(f"Error in delete_ktp_record: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'error': sanitize_error_message(e, 'Gagal menghapus data KTP') + }), 500 + + +@app.route('/api/kk', methods=['GET']) +def list_kk_records(): + """List all KK records with pagination""" + try: + page, per_page = validate_pagination( + request.args.get('page', 1), + request.args.get('per_page', 10) + ) + + pagination = KKRecord.query.order_by(KKRecord.created_at.desc()).paginate( + page=page, per_page=per_page, error_out=False + ) + + return jsonify({ + 'success': True, + 'data': [r.to_dict() for r in pagination.items], + 'total': pagination.total, + 'pages': pagination.pages, + 'current_page': page, + 'per_page': per_page + }) + except Exception as e: + logger.error(f"Error in list_kk_records: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'error': sanitize_error_message(e, 'Gagal mengambil data KK') + }), 500 + + +@app.route('/api/kk/', methods=['GET']) +def get_kk_record(id): + """Get KK record by ID""" + try: + record = KKRecord.query.get_or_404(id) + return jsonify({'success': True, 'data': record.to_dict()}) + except Exception as e: + logger.error(f"Error in get_kk_record: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'error': sanitize_error_message(e, 'Data KK tidak ditemukan') + }), 404 + + +@app.route('/api/kk/', methods=['DELETE']) +def delete_kk_record(id): + """Delete KK record by ID""" + try: + record = KKRecord.query.get_or_404(id) + db.session.delete(record) + db.session.commit() + logger.info(f"Deleted KK record: ID {id}") + return jsonify({'success': True, 'message': f'Record {id} berhasil dihapus'}) + except Exception as e: + db.session.rollback() + logger.error(f"Error in delete_kk_record: {e}", exc_info=True) + return jsonify({ + 'success': False, + 'error': sanitize_error_message(e, 'Gagal menghapus data KK') + }), 500 + if __name__ == '__main__': - print("="*50) - print("OCR KTP/KK Application") - print("="*50) - print("Membuka: http://localhost:5000") - print("Tekan Ctrl+C untuk berhenti") - print("="*50) + # Konfigurasi dari environment variables + host = os.environ.get('FLASK_HOST', '0.0.0.0') + port = int(os.environ.get('FLASK_PORT', 5000)) + debug = os.environ.get('FLASK_DEBUG', 'True').lower() == 'true' - app.run(host='0.0.0.0', port=5000, debug=True) + logger.info("="*50) + logger.info("OCR KTP/KK Application") + logger.info("="*50) + logger.info(f"Membuka: http://{host}:{port}") + logger.info("Tekan Ctrl+C untuk berhenti") + logger.info("="*50) + + app.run(host=host, port=port, debug=debug) diff --git a/database.py b/database.py new file mode 100644 index 0000000..cef2d25 --- /dev/null +++ b/database.py @@ -0,0 +1,39 @@ +""" +Database Configuration for OCR Application +Using Flask-SQLAlchemy with MySQL (PyMySQL driver) +""" + +import os +from flask_sqlalchemy import SQLAlchemy + +db = SQLAlchemy() + +# Database configuration +DB_CONFIG = { + 'host': os.environ.get('DB_HOST', 'localhost'), + 'port': os.environ.get('DB_PORT', '3306'), + 'database': os.environ.get('DB_NAME', 'ocr_db'), + 'user': os.environ.get('DB_USER', 'ocr_user'), + 'password': os.environ.get('DB_PASSWORD', 'ocr_password123') +} + +def get_database_uri(): + """Generate SQLAlchemy database URI""" + return f"mysql+pymysql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}?charset=utf8mb4" + +def init_db(app): + """Initialize database with Flask app""" + app.config['SQLALCHEMY_DATABASE_URI'] = get_database_uri() + app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + app.config['SQLALCHEMY_ENGINE_OPTIONS'] = { + 'pool_recycle': 3600, + 'pool_pre_ping': True + } + + db.init_app(app) + + with app.app_context(): + db.create_all() + print(f"✓ Database connected: {DB_CONFIG['database']}@{DB_CONFIG['host']}") + + return db diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..19f66f2 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,21 @@ +version: '3.8' + +services: + mysql: + image: mysql:8.0 + container_name: mysql-server + restart: unless-stopped + environment: + MYSQL_ROOT_PASSWORD: root123 + MYSQL_DATABASE: ocr_db + MYSQL_USER: ocr_user + MYSQL_PASSWORD: ocr_password123 + ports: + - "3306:3306" + volumes: + - mysql_data:/var/lib/mysql + command: --default-authentication-plugin=mysql_native_password + +volumes: + mysql_data: + driver: local diff --git a/image_processor.py b/image_processor.py new file mode 100644 index 0000000..d9a13c8 --- /dev/null +++ b/image_processor.py @@ -0,0 +1,174 @@ +""" +KTP Image Processor - Enhanced Version +Crop, resize, dan enhanced preprocessing untuk OCR yang lebih akurat + +Standar e-KTP: 85.6mm x 53.98mm = 1011x638 px @300dpi + +Improvements based on Context7 documentation: +- Pillow ImageEnhance for contrast/sharpness +- OpenCV CLAHE for adaptive histogram equalization +- Denoising for cleaner text detection +""" + +import cv2 +import numpy as np +import os +from PIL import Image, ImageEnhance, ImageFilter + +KTP_WIDTH = 1011 +KTP_HEIGHT = 638 + + +def enhance_image_pil(image_path: str, output_path: str = None) -> str: + """ + Enhance image using Pillow (from Context7 docs) + - Contrast enhancement + - Sharpness enhancement + - Detail filter for text clarity + + Args: + image_path: Path to input image + output_path: Optional path to save enhanced image + + Returns: + Path to enhanced image + """ + try: + img = Image.open(image_path) + + # Contrast enhancement (factor 1.3 from Context7) + contrast = ImageEnhance.Contrast(img) + img = contrast.enhance(1.3) + + # Sharpness enhancement + sharpness = ImageEnhance.Sharpness(img) + img = sharpness.enhance(1.2) + + # Apply detail filter for text clarity + img = img.filter(ImageFilter.DETAIL) + + # Save + if output_path is None: + base, ext = os.path.splitext(image_path) + output_path = f"{base}_enhanced.jpg" + + img.save(output_path, quality=95) + print(f" [ENHANCE] Pillow enhanced: {output_path}") + + return output_path + + except Exception as e: + print(f" [ENHANCE] Pillow error: {e}") + return image_path # Return original if enhancement fails + + +def enhance_image_cv(image: np.ndarray) -> np.ndarray: + """ + Enhance image using OpenCV (from Context7 docs) + - CLAHE for adaptive histogram equalization + - Denoising + - Sharpening using Laplacian kernel + + Args: + image: OpenCV image (BGR) + + Returns: + Enhanced image (BGR) + """ + try: + # Convert to grayscale for processing + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + # Denoise (from Context7) + denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21) + + # Enhanced CLAHE for documents + clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) + enhanced = clahe.apply(denoised) + + # Sharpen using kernel (from Context7) + kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], dtype=np.float32) + sharpened = cv2.filter2D(enhanced, -1, kernel) + + # Convert back to BGR + return cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR) + + except Exception as e: + print(f" [ENHANCE] OpenCV error: {e}") + return image # Return original if enhancement fails + + +def crop_by_ocr_bounds(image, ocr_results, padding=0.03): + """Crop image based on OCR bounding boxes""" + if not ocr_results: + return image + + h, w = image.shape[:2] + all_x = [] + all_y = [] + + for r in ocr_results: + box = r.get('box', []) + if len(box) >= 4: + try: + for point in box: + if isinstance(point, (list, tuple)) and len(point) >= 2: + all_x.append(float(point[0])) + all_y.append(float(point[1])) + except: + continue + + if not all_x or not all_y: + return image + + x1 = int(max(0, min(all_x) - w * padding)) + y1 = int(max(0, min(all_y) - h * padding)) + x2 = int(min(w, max(all_x) + w * padding)) + y2 = int(min(h, max(all_y) + h * padding)) + + return image[y1:y2, x1:x2] + + +def normalize_ktp_image(image_path, output_path=None, ocr_results=None): + """ + Normalisasi gambar KTP: + 1. Crop berdasarkan OCR bounds + 2. Ensure landscape + 3. Resize ke ukuran standar + """ + try: + image = cv2.imread(image_path) + if image is None: + return None, False, "Gagal membaca gambar" + + h, w = image.shape[:2] + print(f" [IMAGE] Original: {w}x{h}") + + # Crop + if ocr_results: + image = crop_by_ocr_bounds(image, ocr_results) + h, w = image.shape[:2] + print(f" [IMAGE] Cropped: {w}x{h}") + + # Landscape + if h > w: + image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) + + # Resize + resized = cv2.resize(image, (KTP_WIDTH, KTP_HEIGHT), + interpolation=cv2.INTER_LANCZOS4) + + # Save + if output_path is None: + base, ext = os.path.splitext(image_path) + output_path = f"{base}_normalized.jpg" + + cv2.imwrite(output_path, resized, [cv2.IMWRITE_JPEG_QUALITY, 95]) + print(f" [IMAGE] Saved: {output_path}") + + return output_path, True, f"Normalized to {KTP_WIDTH}x{KTP_HEIGHT}" + + except Exception as e: + import traceback + traceback.print_exc() + return None, False, f"Error: {str(e)}" diff --git a/ktp_extractor.py b/ktp_extractor.py index b94e146..d330829 100644 --- a/ktp_extractor.py +++ b/ktp_extractor.py @@ -2,17 +2,42 @@ KTP Field Extractor Ekstraksi data terstruktur dari hasil OCR KTP Indonesia Mendukung berbagai format output OCR (full-width colon, standard colon, tanpa colon) + +OPTIMIZED: Pre-compiled regex patterns for better performance """ import re from typing import Dict, Optional, List +import difflib +# Debug mode - set to False for production +DEBUG_MODE = False class KTPExtractor: """Ekstrak field dari hasil OCR KTP""" - # Pattern colon yang berbeda-beda (standard, full-width, dll) - COLON_PATTERN = r'[:\:]' + # Pre-compiled regex patterns (optimization) + COLON_PATTERN = re.compile(r'[::]') + NIK_PATTERN = re.compile(r'\b(\d{16})\b') + DATE_PATTERN = re.compile(r'(\d{2}[-/]\d{2}[-/]\d{4})') + RT_RW_PATTERN = re.compile(r'(\d{3})\s*/\s*(\d{3})') + GOL_DARAH_PATTERN = re.compile(r'([ABO]{1,2}[+\-]?)', re.IGNORECASE) + PROVINSI_SPLIT_PATTERN = re.compile(r'(?i)provinsi\s*') + KABUPATEN_SPLIT_PATTERN = re.compile(r'(?i)\s*(kabupaten|kota)\s*') + TTL_PATTERN = re.compile(r'(?i)tempat[/\s]*tgl[/\s]*lahir|tempat[/\s]*lahir|lahir') + + # Pattern colon string (for backward compatibility) + COLON_PATTERN_STR = r'[::]' + + # Daftar Provinsi Indonesia (38 Provinsi) + PROVINSI_LIST = [ + "ACEH", "SUMATERA UTARA", "SUMATERA BARAT", "RIAU", "JAMBI", "SUMATERA SELATAN", "BENGKULU", "LAMPUNG", + "KEPULAUAN BANGKA BELITUNG", "KEPULAUAN RIAU", "DKI JAKARTA", "JAWA BARAT", "JAWA TENGAH", "DI YOGYAKARTA", + "JAWA TIMUR", "BANTEN", "BALI", "NUSA TENGGARA BARAT", "NUSA TENGGARA TIMUR", "KALIMANTAN BARAT", + "KALIMANTAN TENGAH", "KALIMANTAN SELATAN", "KALIMANTAN TIMUR", "KALIMANTAN UTARA", "SULAWESI UTARA", + "SULAWESI TENGAH", "SULAWESI SELATAN", "SULAWESI TENGGARA", "GORONTALO", "SULAWESI BARAT", "MALUKU", + "MALUKU UTARA", "PAPUA BARAT", "PAPUA", "PAPUA SELATAN", "PAPUA TENGAH", "PAPUA PEGUNUNGAN", "PAPUA BARAT DAYA" + ] # Keywords untuk jenis kelamin MALE_KEYWORDS = ['laki', 'pria', 'male'] @@ -26,6 +51,99 @@ class KTPExtractor: 'buruh', 'petani', 'nelayan', 'karyawan', 'ibu rumah tangga', 'tidak bekerja', 'lainnya', 'mengurus rumah tangga'] + # Status Perkawinan yang valid + STATUS_PERKAWINAN_LIST = ['BELUM KAWIN', 'KAWIN', 'CERAI HIDUP', 'CERAI MATI'] + + # Field Labels untuk fuzzy matching (mengatasi typo OCR seperti "Aamat" -> "ALAMAT") + FIELD_LABELS = { + 'nama': ['NAMA'], + 'alamat': ['ALAMAT'], + 'agama': ['AGAMA'], + 'pekerjaan': ['PEKERJAAN'], + 'kewarganegaraan': ['KEWARGANEGARAAN', 'WARGANEGARA'], + 'tempat_lahir': ['TEMPAT', 'LAHIR', 'TEMPAT/TGL LAHIR'], + 'jenis_kelamin': ['JENIS KELAMIN', 'JENIS', 'KELAMIN'], + 'gol_darah': ['GOL. DARAH', 'GOL DARAH', 'GOLONGAN DARAH'], + 'kel_desa': ['KEL/DESA', 'KELURAHAN', 'DESA'], + 'kecamatan': ['KECAMATAN', 'KEC'], + 'status_perkawinan': ['STATUS PERKAWINAN', 'PERKAWINAN'], + 'berlaku_hingga': ['BERLAKU HINGGA', 'BERLAKU'], + 'rt_rw': ['RT/RW', 'RT', 'RW'], + } + + # ============================================ + # Sistem Penamaan Hindu Bali + # ============================================ + # Struktur: [Prefix Gender] + [Gelar Kasta] + [Penanda Gender] + [Urutan Lahir] + [Nama Pribadi] + + # Prefix penanda gender (harus di awal nama) + BALI_GENDER_PREFIX = { + 'NI': 'PEREMPUAN', # Prefix untuk perempuan + 'I': 'LAKI-LAKI', # Prefix untuk laki-laki + } + + # Gelar Kasta (setelah prefix gender) + BALI_KASTA = { + 'IDA': 'BRAHMANA', + 'GUSTI': 'KSATRIA', + 'ANAK AGUNG': 'KSATRIA', + 'COKORDA': 'KSATRIA', + 'DEWA': 'KSATRIA', + 'DESAK': 'KSATRIA', + 'AGUNG': 'KSATRIA', + 'NGAKAN': 'WAISYA', + 'SANG': 'WAISYA', + 'SI': 'WAISYA', + } + + # Penanda gender tambahan (setelah kasta) + BALI_GENDER_MARKER = { + 'AYU': 'PEREMPUAN', + 'ISTRI': 'PEREMPUAN', + 'LUH': 'PEREMPUAN', + 'BAGUS': 'LAKI-LAKI', + 'GEDE': 'LAKI-LAKI', + 'AGUS': 'LAKI-LAKI', + 'ALIT': 'LAKI-LAKI', # Kecil/muda (untuk laki-laki) + } + + # Urutan kelahiran (bersiklus setiap 4 anak) + BALI_BIRTH_ORDER = { + 'PUTU': 1, 'WAYAN': 1, 'GEDE': 1, 'ILUH': 1, + 'MADE': 2, 'KADEK': 2, 'NENGAH': 2, + 'NYOMAN': 3, 'KOMANG': 3, + 'KETUT': 4, + 'BALIK': 5, # Untuk anak ke-5+ (siklus ulang) + } + + # Soroh/Klan Bali (identifikasi garis keturunan) + BALI_SOROH = { + 'PASEK': 'SOROH', # Klan mayoritas (~60% Hindu Bali) + 'PANDE': 'SOROH', # Klan pandai besi/metalurgi + 'ARYA': 'SOROH', # Klan Arya + 'BENDESA': 'SOROH', # Pemimpin adat + 'TANGKAS': 'SOROH', # Klan Tangkas + 'CELAGI': 'SOROH', # Klan Celagi + 'SENGGUHU': 'SOROH', # Klan Sengguhu + 'KUBAYAN': 'SOROH', # Klan Kubayan + 'BANDESA': 'SOROH', # Varian Bendesa + } + + # Gabungkan semua komponen untuk deteksi (urut dari panjang ke pendek) + BALI_NAME_COMPONENTS = [ + # Prefix gender + 'NI', 'I', + # Kasta (prioritas: yang lebih panjang dulu) + 'ANAK AGUNG', 'COKORDA', 'NGAKAN', + 'IDA', 'GUSTI', 'DEWA', 'DESAK', 'AGUNG', 'SANG', 'SI', + # Soroh/Klan + 'PASEK', 'PANDE', 'ARYA', 'BENDESA', 'BANDESA', 'TANGKAS', 'CELAGI', 'SENGGUHU', 'KUBAYAN', + # Gender marker + 'AYU', 'ISTRI', 'LUH', 'BAGUS', 'GEDE', 'AGUS', 'ALIT', + # Urutan lahir + 'WAYAN', 'PUTU', 'ILUH', 'MADE', 'KADEK', 'NENGAH', 'NYOMAN', 'KOMANG', 'KETUT', 'BALIK', + ] + # KTP Zone Template (normalized coordinates: x_min, y_min, x_max, y_max) # Based on standard KTP layout ZONES = { @@ -74,6 +192,211 @@ class KTPExtractor: if len(parts) > 1: return parts[1].strip() return text.strip() + + def _find_best_match(self, text: str, candidates: List[str], cutoff: float = 0.6) -> Optional[str]: + """Find best fuzzy match from candidates""" + matches = difflib.get_close_matches(text, candidates, n=1, cutoff=cutoff) + return matches[0] if matches else None + + def _is_label_match(self, text: str, field_name: str, cutoff: float = 0.7) -> bool: + """ + Fuzzy match untuk label field - mengatasi typo OCR seperti "Aamat" -> "ALAMAT" + Returns True jika text cocok dengan salah satu label untuk field tersebut + """ + if not text or not text.strip(): + return False + + if field_name not in self.FIELD_LABELS: + return field_name.lower() in text.lower() + + text_upper = text.upper().strip() + + # Coba exact match dulu (lebih cepat) + for label in self.FIELD_LABELS[field_name]: + if label in text_upper: + return True + + # Fuzzy match jika tidak ada exact match + # Ekstrak kata pertama dari text (biasanya label ada di awal) + parts = text_upper.split(':')[0].split() + if not parts: + return False + first_word = parts[0] + + for label in self.FIELD_LABELS[field_name]: + label_parts = label.split() + if not label_parts: + continue + # Bandingkan dengan kata pertama + ratio = difflib.SequenceMatcher(None, first_word, label_parts[0]).ratio() + if ratio >= cutoff: + print(f" [FUZZY LABEL] '{first_word}' matched '{label}' (ratio={ratio:.2f})") + return True + + return False + + def _parse_balinese_name(self, name: str) -> str: + """ + Parse nama Bali yang digabung OCR dan tambahkan spasi yang tepat. + Contoh: "NIGUSTIAYUNYOMANSUWETRI" -> "NI GUSTI AYU NYOMAN SUWETRI" + + Struktur nama Bali: + [Prefix Gender] + [Gelar Kasta] + [Penanda Gender] + [Urutan Lahir] + [Nama Pribadi] + + PENTING: Hanya proses jika nama benar-benar mengandung komponen Bali! + """ + if not name: + return name + + name_upper = name.upper().strip() + + # Jika sudah ada spasi dengan jumlah wajar, kembalikan apa adanya + if name_upper.count(' ') >= 2: + return name_upper + + # Cek apakah nama mengandung komponen Bali + # Nama harus dimulai dengan NI, I GUSTI, IDA, atau komponen urutan lahir Bali + name_clean = name_upper.replace(' ', '') + + is_balinese_name = False + # Cek prefix khas Bali + if name_clean.startswith('NI') and len(name_clean) > 3: + # NI harus diikuti komponen Bali lain (GUSTI, LUH, WAYAN, dll) + after_ni = name_clean[2:] + for comp in ['GUSTI', 'LUH', 'WAYAN', 'MADE', 'NYOMAN', 'KETUT', 'PUTU', 'KADEK', 'KOMANG', 'PASEK', 'PANDE']: + if after_ni.startswith(comp): + is_balinese_name = True + break + elif name_clean.startswith('IGUSTI') or name_clean.startswith('IDABAGUS') or name_clean.startswith('IDAAYU'): + is_balinese_name = True + elif any(name_clean.startswith(p) for p in ['GUSTI', 'WAYAN', 'PUTU', 'MADE', 'KADEK', 'NYOMAN', 'KOMANG', 'KETUT']): + is_balinese_name = True + + if not is_balinese_name: + # Bukan nama Bali, kembalikan dengan pemisahan spasi standar + # Jika ada 1 spasi, kembalikan apa adanya + if ' ' in name_upper: + return name_upper + # Jika tidak ada spasi sama sekali, kembalikan apa adanya (mungkin memang 1 kata) + return name_upper + + # Urutan komponen yang akan dicari (dari yang terpanjang ke terpendek untuk akurasi) + components_ordered = sorted(self.BALI_NAME_COMPONENTS, key=len, reverse=True) + + result_parts = [] + remaining = name_clean + + # Parse prefix gender (NI atau I di awal) + if remaining.startswith('NI'): + result_parts.append('NI') + remaining = remaining[2:] + elif remaining.startswith('I') and len(remaining) > 1: + # Pastikan bukan bagian dari kata lain + next_char = remaining[1] if len(remaining) > 1 else '' + # Cek apakah karakter setelah I adalah konsonan (bukan vokal) + if next_char not in 'AIUEO': + result_parts.append('I') + remaining = remaining[1:] + + # Parse komponen-komponen lainnya + found = True + max_iterations = 10 # Prevent infinite loop + iteration = 0 + + while remaining and found and iteration < max_iterations: + found = False + iteration += 1 + + for component in components_ordered: + if remaining.startswith(component): + # Skip jika komponen sudah ada di result (kecuali nama pribadi) + if component not in result_parts or component not in self.BALI_NAME_COMPONENTS: + result_parts.append(component) + remaining = remaining[len(component):] + found = True + break + + # Sisa adalah nama pribadi + if remaining: + result_parts.append(remaining) + + parsed_name = ' '.join(result_parts) + + # Log jika ada perubahan + if parsed_name != name_upper: + print(f" [BALI NAME] '{name_upper}' -> '{parsed_name}'") + + return parsed_name + + def _search_best_match_in_text(self, text: str, candidates: List[str], prefix: str = "") -> tuple: + """ + Search if any candidate is present in text using multiple strategies: + 1. Exact substring + 2. Prefix + Candidate (Fuzzy) - e.g. "PROVINSI BALI" + 3. Candidate Only (Fuzzy) - e.g. "BALI" (if prefix is missing/damaged) + Returns (best_candidate, confidence_score) + """ + text_upper = text.upper() + best_match = None + best_ratio = 0.0 + + # Strategy 1: Exact substring match (fastest & most reliable) + for candidate in candidates: + if candidate in text_upper: + if len(candidate) > len(best_match or ""): + best_match = candidate + best_ratio = 1.0 + + if best_ratio == 1.0: + return best_match, best_ratio + + # Strategy 2: Prefix Construction & Fuzzy Match + prefix_upper = prefix.upper() if prefix else "" + + # DEBUG: Print checking (controlled by DEBUG_MODE) + if DEBUG_MODE: + print(f"DEBUG Check Text: '{text_upper}' with Prefix: '{prefix_upper}'") + + for candidate in candidates: + # 2a. Compare with Prefix + Space (e.g. "PROVINSI BALI") + if prefix: + target_spaced = f"{prefix_upper} {candidate}" + s_spaced = difflib.SequenceMatcher(None, target_spaced, text_upper) + ratio_spaced = s_spaced.ratio() + + # print(f" -> Compare '{target_spaced}' vs '{text_upper}' = {ratio_spaced:.2f}") + + if ratio_spaced > best_ratio and ratio_spaced > 0.5: + best_ratio = ratio_spaced + best_match = candidate + + # 2b. Compare with Prefix NO SPACE (e.g. "PROVINSIBALI") + # This handles "PROVNSIBALI" perfectly + target_merged = f"{prefix_upper}{candidate}" + s_merged = difflib.SequenceMatcher(None, target_merged, text_upper) + ratio_merged = s_merged.ratio() + + if DEBUG_MODE: + print(f" -> Compare Merged '{target_merged}' vs '{text_upper}' = {ratio_merged:.2f}") + + if ratio_merged > best_ratio and ratio_merged > 0.5: + best_ratio = ratio_merged + best_match = candidate + + # 2c. Compare Candidate ONLY (e.g. "BALI") + if len(candidate) > 3: + s_raw = difflib.SequenceMatcher(None, candidate, text_upper) + ratio_raw = s_raw.ratio() + + # print(f" -> Compare Raw '{candidate}' vs '{text_upper}' = {ratio_raw:.2f}") + + if ratio_raw > best_ratio and ratio_raw > 0.6: + best_ratio = ratio_raw + best_match = candidate + + if DEBUG_MODE: + print(f"DEBUG Best Match: {best_match} ({best_ratio:.2f})") + return best_match, best_ratio def _detect_image_size(self, ocr_results: List[Dict]) -> tuple: """Detect image dimensions from bounding boxes""" @@ -93,10 +416,62 @@ class KTPExtractor: # PROVINSI from header if 'header_provinsi' in zone_texts: + print(f"DEBUG Zone Provinsi Content: {zone_texts['header_provinsi']}") for text in zone_texts['header_provinsi']: - if 'provinsi' in text.lower(): - val = re.sub(r'(?i)provinsi\s*', '', text).strip() - if val: + text_clean = text.strip() + # Use prefix strategy: "PROVINSI " + result vs text + match, score = self._search_best_match_in_text(text_clean, self.PROVINSI_LIST, prefix="PROVINSI") + + # LOWER THRESHOLD to 0.5 because "PROVINSI BALI" vs "PROVNSIBALI" is roughly 0.5-0.6 range + if match and score > 0.5: + result['provinsi'] = match + + # Remove the found province (and label) from text to see what's left + # If we matched "PROVINSI JAWA TIMUR", the text might be "PROVNSIJAWATMRKABUPATENSUMENEP" + # It's hard to cleanly remove "PROVISI JAWA TIMUR" if it was fuzzy matched. + + # BUT, we can try to find "KABUPATEN" or "KOTA" in the original text + # independent of the province match + if 'kabupaten' in text_clean.lower() or 'kota' in text_clean.lower(): + parts = re.split(r'(?i)\s*(kabupaten|kota)', text_clean) + if len(parts) > 1: + kab_part = "".join(parts[1:]).strip() + kab_val = re.sub(r'^(?i)(kabupaten|kota)\s*', '', kab_part).strip() + if kab_val and result['kabupaten_kota'] is None: + prefix = "KABUPATEN" if "kabupaten" in text_clean.lower() else "KOTA" + result['kabupaten_kota'] = f"{prefix} {kab_val.upper()}" + break + + # Fallback to keyword splitting (Legacy/Blurry fallback) + text_lower = text.lower() + val = text + + # If keyword exists, strip it + if 'provinsi' in text_lower: + split_prov = re.split(r'(?i)provinsi\s*', text, 1) + if len(split_prov) > 1: + val = split_prov[1].strip() + else: + val = "" + + # Check for merged text + if 'kabupaten' in text_lower or 'kota' in text_lower: + parts = re.split(r'(?i)\s*(kabupaten|kota)', val) + val = parts[0].strip() + + if len(parts) > 1: + kab_part = "".join(parts[1:]).strip() + kab_val = re.sub(r'^(?i)(kabupaten|kota)\s*', '', kab_part).strip() + if kab_val and result['kabupaten_kota'] is None: + prefix = "KABUPATEN" if "kabupaten" in text_lower else "KOTA" + result['kabupaten_kota'] = f"{prefix} {kab_val.upper()}" + + if val and len(val) > 2: + # Try fuzzy match again on the cleaned value + best_match = self._find_best_match(val.upper(), self.PROVINSI_LIST, cutoff=0.6) + if best_match: + result['provinsi'] = best_match + else: result['provinsi'] = val.upper() break @@ -104,13 +479,32 @@ class KTPExtractor: if 'header_kabupaten' in zone_texts: for text in zone_texts['header_kabupaten']: text_lower = text.lower() + val = text + + # Check keyword if 'kabupaten' in text_lower or 'kota' in text_lower: - val = re.sub(r'(?i)(kabupaten|kota)\s*', '', text).strip() - if val: - result['kabupaten_kota'] = val.upper() + split_kab = re.split(r'(?i)\s*(kabupaten|kota)\s*', text, 1) + if len(split_kab) > 1: + val = split_kab[-1].strip() else: - result['kabupaten_kota'] = text.upper() - break + val = "" + + # If no keyword, but it's in the kabupaten zone, assume it's data + if val: + # Re-add prefix standard if we separated it or if it was missing + # Heuristic: if validation suggests it's a known regency, we are good. + # For now, standardize format. + if result['kabupaten_kota'] is None: + prefix = "KABUPATEN" if "kabupaten" in text_lower else "KOTA" + # If no keyword found, default to KABUPATEN? Or better check Wilayah? + # Let's default to detected keyword or KABUPATEN + if "kota" in text_lower: + prefix = "KOTA" + else: + prefix = "KABUPATEN" + + result['kabupaten_kota'] = f"{prefix} {val.upper()}" + break # NAMA from nama zone (skip label line) if 'nama' in zone_texts: @@ -161,6 +555,89 @@ class KTPExtractor: result['alamat'] = val.upper() break + # RT/RW + if 'rt_rw' in zone_texts: + for text in zone_texts['rt_rw']: + rt_rw_match = re.search(r'(\d{3})\s*/\s*(\d{3})', text) + if rt_rw_match: + result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}" + break + + # KEL/DESA + if 'kel_desa' in zone_texts: + for text in zone_texts['kel_desa']: + if 'kel' in text.lower() or 'desa' in text.lower(): + val = self._extract_value_from_text(text) + if val and 'kel' not in val.lower(): + result['kel_desa'] = val.upper() + break + elif result['kel_desa'] is None: + # Fallback context: simple text + result['kel_desa'] = text.upper() + + # KECAMATAN + if 'kecamatan' in zone_texts: + for text in zone_texts['kecamatan']: + if 'kec' in text.lower(): + val = self._extract_value_from_text(text) + if val and 'kec' not in val.lower(): + result['kecamatan'] = val.upper() + break + elif result['kecamatan'] is None: + result['kecamatan'] = text.upper() + + # AGAMA + if 'agama' in zone_texts: + for text in zone_texts['agama']: + val = text.upper() + if 'agama' in text.lower(): + val = self._extract_value_from_text(text).upper() + + # Verify against valid list + for agama in self.AGAMA_LIST: + if agama.upper() in val: + result['agama'] = agama.upper() + break + if result['agama']: break + + # STATUS PERKAWINAN + if 'status' in zone_texts: + for text in zone_texts['status']: + val = text.upper() + # Normalize common OCR errors (e.g. BELUMKAWIN) + val = val.replace("BELUMKAWIN", "BELUM KAWIN") + + # Check against official list + found_status = False + for status in self.STATUS_PERKAWINAN_LIST: + if status in val: + result['status_perkawinan'] = status + found_status = True + break + if found_status: break + + # PEKERJAAN + if 'pekerjaan' in zone_texts: + for text in zone_texts['pekerjaan']: + val = text.upper() + if 'pekerjaan' in text.lower(): + val = self._extract_value_from_text(text).upper() + + # Check against list or take value + if len(val) > 3 and 'pekerjaan' not in val.lower(): + result['pekerjaan'] = val + break + + # WNI + if 'wni' in zone_texts: + for text in zone_texts['wni']: + if 'wni' in text.lower(): + result['kewarganegaraan'] = 'WNI' + break + elif 'wna' in text.lower(): + result['kewarganegaraan'] = 'WNA' + break + # PENERBITAN area (tempat & tanggal dalam satu zona) if 'penerbitan' in zone_texts: for text in zone_texts['penerbitan']: @@ -194,7 +671,7 @@ class KTPExtractor: 'status_perkawinan': None, 'pekerjaan': None, 'kewarganegaraan': None, - 'berlaku_hingga': None, + 'berlaku_hingga': 'SEUMUR HIDUP', # Default sesuai peraturan pemerintah e-KTP 'provinsi': None, 'kabupaten_kota': None, 'tanggal_penerbitan': None, @@ -234,6 +711,14 @@ class KTPExtractor: # Fallback: Parse line by line for fields not found by zone for i, text in enumerate(texts): + # Skip baris yang hanya berisi punctuation atau kosong + text_stripped = text.strip() + if not text_stripped or text_stripped in [':', ':', '.', '-', '/', '|']: + continue + # Skip baris yang terlalu pendek (hanya 1-2 karakter non-alfanumerik) + if len(text_stripped) <= 2 and not any(c.isalnum() for c in text_stripped): + continue + text_lower = text.lower() # Normalize colons @@ -242,19 +727,49 @@ class KTPExtractor: # ===== PROVINSI ===== if 'provinsi' in text_lower and result['provinsi'] is None: - val = self._extract_after_label(text_normalized, 'provinsi') - if val: - result['provinsi'] = val.upper() - elif i + 1 < len(texts) and 'provinsi' not in texts[i+1].lower(): - # Mungkin value di line berikutnya - result['provinsi'] = texts[i+1].strip().upper() + # Split by PROVINSI and take remainder + split_prov = re.split(r'(?i)provinsi\s*', text, 1) + if len(split_prov) > 1: + val = split_prov[1].strip() + # Check if it contains kabupaten/kota (merged line case) + if 'kabupaten' in val.lower() or 'kota' in val.lower(): + parts = re.split(r'(?i)\s*(kabupaten|kota)', val) + val = parts[0].strip() + + if val: + # Fuzzy match against valid provinces + best_match = self._find_best_match(val.upper(), self.PROVINSI_LIST, cutoff=0.6) + if best_match: + result['provinsi'] = best_match + else: + result['provinsi'] = val.upper() + + # Check for next line if current line only had 'PROVINSI' + if result['provinsi'] is None and i + 1 < len(texts): + next_text = texts[i+1].strip() + next_lower = next_text.lower() + # Only take next line if it doesn't look like another field + if not any(kw in next_lower for kw in ['provinsi', 'kabupaten', 'kota', 'nik']): + # Fuzzy match next line + val = next_text.upper() + best_match = self._find_best_match(val, self.PROVINSI_LIST, cutoff=0.6) + if best_match: + result['provinsi'] = best_match + else: + result['provinsi'] = val # ===== KABUPATEN/KOTA ===== if ('kabupaten' in text_lower or 'kota' in text_lower or 'jakarta' in text_lower) and result['kabupaten_kota'] is None: if 'provinsi' not in text_lower: # Bukan bagian dari provinsi - val = self._extract_after_label(text_normalized, 'kabupaten|kota') - if val: - result['kabupaten_kota'] = val.upper() + # Split by KABUPATEN or KOTA and take remainder + split_kab = re.split(r'(?i)\s*(kabupaten|kota)\s*', text, 1) + if len(split_kab) > 1: + prefix = "KABUPATEN" if "kabupaten" in text_lower else "KOTA" + val = split_kab[-1].strip() + if val: + result['kabupaten_kota'] = f"{prefix} {val.upper()}" + else: + result['kabupaten_kota'] = text.strip().upper() else: result['kabupaten_kota'] = text.strip().upper() @@ -312,13 +827,17 @@ class KTPExtractor: if re.match(r'^[ABO]{1,2}[+\-]?$', text.strip(), re.IGNORECASE) and len(text.strip()) <= 3: result['gol_darah'] = text.strip().upper() - # ===== ALAMAT ===== - if 'alamat' in text_lower and result['alamat'] is None: - val = self._extract_after_label(text_normalized, 'alamat') + # ===== ALAMAT ===== (dengan fuzzy label matching) + if result['alamat'] is None and self._is_label_match(text, 'alamat'): + val = self._extract_after_label(text_normalized, r'a{1,2}l{0,2}a?m{0,2}a?t') if val: result['alamat'] = val.upper() elif i + 1 < len(texts): - result['alamat'] = texts[i+1].strip().upper() + # Ambil nilai dari baris berikutnya + next_text = texts[i+1].strip() + # Pastikan bukan label field lain + if len(next_text) > 2 and not self._is_label_match(next_text, 'rt_rw'): + result['alamat'] = next_text.upper() # ===== RT/RW ===== rt_rw_match = re.search(r'(\d{3})\s*/\s*(\d{3})', text) @@ -346,9 +865,9 @@ class KTPExtractor: if len(next_text) > 2 and not any(kw in next_text.lower() for kw in ['agama', 'status', 'pekerjaan']): result['kecamatan'] = next_text.upper() - # ===== AGAMA ===== - if 'agama' in text_lower: - val = self._extract_after_label(text_normalized, 'agama') + # ===== AGAMA ===== (dengan fuzzy label matching) + if self._is_label_match(text, 'agama'): + val = self._extract_after_label(text_normalized, r'a?g{0,2}a?m{0,2}a') if val and result['agama'] is None: result['agama'] = val.upper() elif result['agama'] is None and i + 1 < len(texts): @@ -367,17 +886,18 @@ class KTPExtractor: # ===== STATUS PERKAWINAN ===== if 'kawin' in text_lower: if result['status_perkawinan'] is None: - val = self._extract_after_label(text_normalized, 'status.*kawin|perkawinan') - if val: - result['status_perkawinan'] = val.upper() - elif 'belum' in text_lower: - result['status_perkawinan'] = 'BELUM KAWIN' - elif 'kawin' in text_lower and 'cerai' not in text_lower: - result['status_perkawinan'] = 'KAWIN' - elif 'cerai hidup' in text_lower: - result['status_perkawinan'] = 'CERAI HIDUP' - elif 'cerai mati' in text_lower: - result['status_perkawinan'] = 'CERAI MATI' + # Check against official list first + text_upper = text.upper().replace("BELUMKAWIN", "BELUM KAWIN") + for status in self.STATUS_PERKAWINAN_LIST: + if status in text_upper: + result['status_perkawinan'] = status + break + + # Fallback to extraction if not found in list + if result['status_perkawinan'] is None: + val = self._extract_after_label(text_normalized, 'status.*kawin|perkawinan') + if val: + result['status_perkawinan'] = val.upper() # ===== PEKERJAAN ===== if 'pekerjaan' in text_lower: @@ -430,6 +950,88 @@ class KTPExtractor: if result['berlaku_hingga'] or i > len(texts) * 0.7: result['tanggal_penerbitan'] = found_date + # ============================================ + # AGGRESSIVE SCAN: Cari agama dari semua teks OCR + # ============================================ + # Indonesia hanya punya 6 agama resmi, mudah dideteksi + if result['agama'] is None: + # Daftar agama dengan variasi penulisan + agama_patterns = { + 'ISLAM': ['ISLAM', 'ISLM', 'ISIAM', 'ISLAMI'], + 'KRISTEN': ['KRISTEN', 'KRISTEN PROTESTAN', 'PROTESTAN', 'KRISTN'], + 'KATOLIK': ['KATOLIK', 'KATHOLIK', 'KATHOLK', 'KATOLIK ROMA', 'KATOLIK.'], + 'HINDU': ['HINDU', 'HNDU', 'HINDU DHARMA', 'HINDHU'], + 'BUDDHA': ['BUDDHA', 'BUDHA', 'BUDDA', 'BUDDHIS'], + 'KONGHUCU': ['KONGHUCU', 'KHONGHUCU', 'KONGHUCHU', 'CONFUCIUS'], + } + + for text in texts: + text_upper = text.upper().strip() + # Skip jika teks terlalu pendek atau terlalu panjang + if len(text_upper) < 4 or len(text_upper) > 30: + continue + + for agama_std, variants in agama_patterns.items(): + for variant in variants: + if variant in text_upper: + result['agama'] = agama_std + print(f" [AGAMA SCAN] Found '{variant}' in '{text_upper}' -> {agama_std}") + break + if result['agama']: + break + if result['agama']: + break + + # ============================================ + # AGGRESSIVE SCAN: Cari golongan darah dari semua teks OCR + # ============================================ + # Golongan darah hanya 4: A, B, AB, O (dengan/tanpa rhesus +/-) + if result['gol_darah'] is None: + gol_darah_patterns = ['AB+', 'AB-', 'A+', 'A-', 'B+', 'B-', 'O+', 'O-', 'AB', 'A', 'B', 'O'] + + for text in texts: + text_upper = text.upper().strip() + # Hapus punctuation umum + text_clean = re.sub(r'[:\.\,\s]+', '', text_upper) + # Konversi 0 (nol) menjadi O (huruf) - OCR sering salah baca + text_clean = text_clean.replace('0', 'O') + + # Skip jika teks terlalu panjang (bukan gol darah) + if len(text_clean) > 10: + continue + + # Cari match untuk gol darah (dari panjang ke pendek untuk prioritas AB sebelum A/B) + for gol in gol_darah_patterns: + # Exact match setelah dibersihkan + if text_clean == gol: + result['gol_darah'] = gol + print(f" [GOL DARAH SCAN] Found '{text_upper}' -> {gol}") + break + # Match dengan prefix GOL + if text_clean == f"GOL{gol}" or text_clean == f"GOLDARAH{gol}": + result['gol_darah'] = gol + print(f" [GOL DARAH SCAN] Found '{text_upper}' -> {gol}") + break + # Match sebagai single character di akhir teks pendek + if len(text_clean) <= 3 and text_clean.endswith(gol): + result['gol_darah'] = gol + print(f" [GOL DARAH SCAN] Found '{text_upper}' -> {gol}") + break + + if result['gol_darah']: + break + + # ============================================ + # AGGRESSIVE SCAN: Cari berlaku hingga dari semua teks OCR + # ============================================ + if result['berlaku_hingga'] is None: + for text in texts: + text_upper = text.upper().strip() + if 'SEUMUR' in text_upper or 'HIDUP' in text_upper: + result['berlaku_hingga'] = 'SEUMUR HIDUP' + print(f" [BERLAKU SCAN] Found '{text_upper}' -> SEUMUR HIDUP") + break + # Post-processing result = self._post_process(result) @@ -505,6 +1107,21 @@ class KTPExtractor: else: result['nik'] = None + # Fix format tanggal lahir yang salah + # Pattern: DDMM-YYYY (contoh: 1608-1976) -> DD-MM-YYYY (16-08-1976) + if result['tanggal_lahir']: + tl = result['tanggal_lahir'] + # Match DDMM-YYYY format (salah) + wrong_format = re.match(r'^(\d{2})(\d{2})-(\d{4})$', tl) + if wrong_format: + result['tanggal_lahir'] = f"{wrong_format.group(1)}-{wrong_format.group(2)}-{wrong_format.group(3)}" + print(f" [DATE FIX] '{tl}' -> '{result['tanggal_lahir']}'") + # Match DDMMYYYY format (tanpa separator) + no_sep_format = re.match(r'^(\d{2})(\d{2})(\d{4})$', tl) + if no_sep_format: + result['tanggal_lahir'] = f"{no_sep_format.group(1)}-{no_sep_format.group(2)}-{no_sep_format.group(3)}" + print(f" [DATE FIX] '{tl}' -> '{result['tanggal_lahir']}'") + # Clean all string values - remove leading colons and extra whitespace for field in result: if result[field] and isinstance(result[field], str): @@ -540,6 +1157,54 @@ class KTPExtractor: result['berlaku_hingga'] = 'SEUMUR HIDUP' else: result['berlaku_hingga'] = bh + else: + # Fallback: Sesuai peraturan pemerintah, e-KTP berlaku seumur hidup + # Berlaku untuk e-KTP yang diterbitkan sejak 2011 + result['berlaku_hingga'] = 'SEUMUR HIDUP' + print(" [FALLBACK] berlaku_hingga = SEUMUR HIDUP (peraturan pemerintah)") + + # ============================================ + # Parse nama Bali jika terdeteksi + # ============================================ + # Deteksi apakah ini KTP Bali berdasarkan: + # 1. Provinsi = BALI + # 2. NIK dimulai dengan 51 (kode Bali) + # 3. Nama mengandung komponen khas Bali (NI, I GUSTI, dll) + is_bali = False + if result.get('provinsi') and 'BALI' in result['provinsi'].upper(): + is_bali = True + elif result.get('nik') and result['nik'].startswith('51'): + is_bali = True + elif result.get('nama'): + nama_upper = result['nama'].upper() + # Cek apakah nama dimulai dengan prefix Bali + if nama_upper.startswith('NI') or nama_upper.startswith('IGUSTI') or \ + nama_upper.startswith('IDABAGUS') or nama_upper.startswith('IDAAYU') or \ + any(nama_upper.startswith(p) for p in ['GUSTI', 'WAYAN', 'MADE', 'NYOMAN', 'KETUT', 'PUTU', 'KADEK', 'KOMANG']): + is_bali = True + + if is_bali and result.get('nama'): + result['nama'] = self._parse_balinese_name(result['nama']) + + # ============================================ + # Validasi dan koreksi Agama + # ============================================ + if result.get('agama'): + agama = result['agama'].upper().strip() + # Fuzzy match terhadap daftar agama valid + agama_match = None + best_ratio = 0 + for valid_agama in self.AGAMA_LIST: + ratio = difflib.SequenceMatcher(None, agama, valid_agama.upper()).ratio() + if ratio > best_ratio and ratio > 0.6: + best_ratio = ratio + agama_match = valid_agama.upper() + + if agama_match: + if agama_match != agama: + print(f" [AGAMA VALIDATE] '{agama}' -> '{agama_match}' (ratio={best_ratio:.2f})") + result['agama'] = agama_match + # Tidak ada fallback otomatis untuk agama - harus dari OCR # Fix merged kabupaten/kota names (e.g., JAKARTASELATAN -> JAKARTA SELATAN) if result['kabupaten_kota']: @@ -572,6 +1237,29 @@ class KTPExtractor: alamat = re.sub(r'\b(NO|BLOK)(\d+|[A-Z])\b', r'\1 \2', alamat, flags=re.IGNORECASE) result['alamat'] = alamat.upper() + # ============================================ + # Cross-validation: Tempat Lahir vs Kel/Desa + # ============================================ + # Pada KTP, tempat lahir sering sama dengan desa/kelurahan + # Jika tempat_lahir mirip dengan kel_desa, gunakan yang tervalidasi + if result.get('tempat_lahir') and result.get('kel_desa'): + tl = result['tempat_lahir'].upper() + kd = result['kel_desa'].upper() + + # Hitung similarity + ratio = difflib.SequenceMatcher(None, tl, kd).ratio() + + if ratio > 0.7: + # Tempat lahir mirip dengan kel/desa, gunakan kel/desa yang sudah divalidasi + print(f" [CROSS-VALIDATE] Tempat Lahir '{tl}' mirip dengan Kel/Desa '{kd}' (ratio={ratio:.2f})") + result['tempat_lahir'] = kd + elif ratio > 0.5: + # Cukup mirip, log untuk debugging + print(f" [CROSS-VALIDATE] Tempat Lahir '{tl}' mungkin sama dengan Kel/Desa '{kd}' (ratio={ratio:.2f})") + + # Jika tempat_lahir kosong tapi kel_desa ada, mungkin sama + # (tidak otomatis mengisi karena bisa beda) + return result diff --git a/migrate_db.py b/migrate_db.py new file mode 100644 index 0000000..278a9ca --- /dev/null +++ b/migrate_db.py @@ -0,0 +1,48 @@ +import os +import pymysql +from database import DB_CONFIG + +def migrate_db(): + conn = pymysql.connect( + host=DB_CONFIG['host'], + port=int(DB_CONFIG['port']), + user=DB_CONFIG['user'], + password=DB_CONFIG['password'], + database=DB_CONFIG['database'] + ) + + try: + with conn.cursor() as cursor: + # Check if column exists + print("Checking schema...") + cursor.execute("SHOW COLUMNS FROM ktp_records LIKE 'image_path'") + result = cursor.fetchone() + + if not result: + print("Adding image_path column to ktp_records...") + cursor.execute("ALTER TABLE ktp_records ADD COLUMN image_path VARCHAR(255) NULL AFTER berlaku_hingga") + conn.commit() + print("Migration successful: Added image_path column.") + else: + print("Column image_path already exists in KTP. No migration needed.") + + # Check KK + print("Checking KK schema...") + cursor.execute("SHOW COLUMNS FROM kk_records LIKE 'image_path'") + result_kk = cursor.fetchone() + + if not result_kk: + print("Adding image_path column to kk_records...") + cursor.execute("ALTER TABLE kk_records ADD COLUMN image_path VARCHAR(255) NULL AFTER kode_pos") + conn.commit() + print("Migration successful: Added image_path column to KK.") + else: + print("Column image_path already exists in KK.") + + except Exception as e: + print(f"Migration error: {e}") + finally: + conn.close() + +if __name__ == "__main__": + migrate_db() diff --git a/models.py b/models.py new file mode 100644 index 0000000..5601e49 --- /dev/null +++ b/models.py @@ -0,0 +1,138 @@ +""" +Database Models for OCR Application +""" + +from datetime import datetime +from database import db + + +class KTPRecord(db.Model): + """Model untuk menyimpan data KTP hasil OCR""" + __tablename__ = 'ktp_records' + + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + nik = db.Column(db.String(16), unique=True, nullable=True, index=True) + nama = db.Column(db.String(100), nullable=True) + tempat_lahir = db.Column(db.String(50), nullable=True) + tanggal_lahir = db.Column(db.String(20), nullable=True) + jenis_kelamin = db.Column(db.String(20), nullable=True) + gol_darah = db.Column(db.String(5), nullable=True) + alamat = db.Column(db.Text, nullable=True) + rt_rw = db.Column(db.String(10), nullable=True) + kel_desa = db.Column(db.String(50), nullable=True) + kecamatan = db.Column(db.String(50), nullable=True) + kabupaten_kota = db.Column(db.String(50), nullable=True) + provinsi = db.Column(db.String(50), nullable=True) + agama = db.Column(db.String(20), nullable=True) + status_perkawinan = db.Column(db.String(30), nullable=True) + pekerjaan = db.Column(db.String(50), nullable=True) + kewarganegaraan = db.Column(db.String(10), nullable=True) + berlaku_hingga = db.Column(db.String(20), nullable=True) + image_path = db.Column(db.String(255), nullable=True) # Path to saved KTP image + raw_text = db.Column(db.Text, nullable=True) + created_at = db.Column(db.DateTime, default=datetime.utcnow) + updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + def to_dict(self): + """Convert model to dictionary""" + return { + 'id': self.id, + 'nik': self.nik, + 'nama': self.nama, + 'tempat_lahir': self.tempat_lahir, + 'tanggal_lahir': self.tanggal_lahir, + 'jenis_kelamin': self.jenis_kelamin, + 'gol_darah': self.gol_darah, + 'alamat': self.alamat, + 'rt_rw': self.rt_rw, + 'kel_desa': self.kel_desa, + 'kecamatan': self.kecamatan, + 'kabupaten_kota': self.kabupaten_kota, + 'provinsi': self.provinsi, + 'agama': self.agama, + 'status_perkawinan': self.status_perkawinan, + 'pekerjaan': self.pekerjaan, + 'kewarganegaraan': self.kewarganegaraan, + 'berlaku_hingga': self.berlaku_hingga, + 'image_path': self.image_path, + 'created_at': self.created_at.isoformat() if self.created_at else None, + 'updated_at': self.updated_at.isoformat() if self.updated_at else None + } + + @classmethod + def from_ocr_data(cls, ocr_data, raw_text=None): + """Create KTPRecord from OCR extracted data""" + return cls( + nik=ocr_data.get('nik'), + nama=ocr_data.get('nama'), + tempat_lahir=ocr_data.get('tempat_lahir'), + tanggal_lahir=ocr_data.get('tanggal_lahir'), + jenis_kelamin=ocr_data.get('jenis_kelamin'), + gol_darah=ocr_data.get('gol_darah'), + alamat=ocr_data.get('alamat'), + rt_rw=ocr_data.get('rt_rw'), + kel_desa=ocr_data.get('kel_desa'), + kecamatan=ocr_data.get('kecamatan'), + kabupaten_kota=ocr_data.get('kabupaten_kota'), + provinsi=ocr_data.get('provinsi'), + agama=ocr_data.get('agama'), + status_perkawinan=ocr_data.get('status_perkawinan'), + pekerjaan=ocr_data.get('pekerjaan'), + kewarganegaraan=ocr_data.get('kewarganegaraan'), + berlaku_hingga=ocr_data.get('berlaku_hingga'), + raw_text=raw_text + ) + + +class KKRecord(db.Model): + """Model untuk menyimpan data Kartu Keluarga hasil OCR""" + __tablename__ = 'kk_records' + + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + no_kk = db.Column(db.String(16), unique=True, nullable=True, index=True) + kepala_keluarga = db.Column(db.String(100), nullable=True) + alamat = db.Column(db.Text, nullable=True) + rt_rw = db.Column(db.String(10), nullable=True) + kel_desa = db.Column(db.String(50), nullable=True) + kecamatan = db.Column(db.String(50), nullable=True) + kabupaten_kota = db.Column(db.String(50), nullable=True) + provinsi = db.Column(db.String(50), nullable=True) + kode_pos = db.Column(db.String(10), nullable=True) + image_path = db.Column(db.String(255), nullable=True) # Path to saved KK image + raw_text = db.Column(db.Text, nullable=True) + created_at = db.Column(db.DateTime, default=datetime.utcnow) + updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + def to_dict(self): + """Convert model to dictionary""" + return { + 'id': self.id, + 'no_kk': self.no_kk, + 'kepala_keluarga': self.kepala_keluarga, + 'alamat': self.alamat, + 'rt_rw': self.rt_rw, + 'kel_desa': self.kel_desa, + 'kecamatan': self.kecamatan, + 'kabupaten_kota': self.kabupaten_kota, + 'provinsi': self.provinsi, + 'kode_pos': self.kode_pos, + 'image_path': self.image_path, + 'created_at': self.created_at.isoformat() if self.created_at else None, + 'updated_at': self.updated_at.isoformat() if self.updated_at else None + } + + @classmethod + def from_ocr_data(cls, ocr_data, raw_text=None): + """Create KKRecord from OCR extracted data""" + return cls( + no_kk=ocr_data.get('no_kk'), + kepala_keluarga=ocr_data.get('kepala_keluarga'), + alamat=ocr_data.get('alamat'), + rt_rw=ocr_data.get('rt_rw'), + kel_desa=ocr_data.get('kel_desa'), + kecamatan=ocr_data.get('kecamatan'), + kabupaten_kota=ocr_data.get('kabupaten_kota'), + provinsi=ocr_data.get('provinsi'), + kode_pos=ocr_data.get('kode_pos'), + raw_text=raw_text + ) diff --git a/ocr_engine.py b/ocr_engine.py index ba998c3..07c37dc 100644 --- a/ocr_engine.py +++ b/ocr_engine.py @@ -20,16 +20,19 @@ class OCREngine: def preprocess_image(self, image_path: str) -> np.ndarray: """ - Preprocessing gambar untuk hasil OCR lebih baik + Enhanced preprocessing untuk hasil OCR lebih baik + Based on Context7 OpenCV documentation: - Resize jika terlalu besar - - Enhance contrast + - Denoising untuk mengurangi noise + - CLAHE untuk adaptive histogram equalization + - Sharpening untuk teks lebih jelas """ img = cv2.imread(image_path) if img is None: raise ValueError(f"Tidak dapat membaca gambar: {image_path}") - # Resize jika terlalu besar (max 2000px) - max_dim = 2000 + # Resize jika terlalu besar (max 1500px - optimized for speed) + max_dim = 1500 height, width = img.shape[:2] if max(height, width) > max_dim: scale = max_dim / max(height, width) @@ -38,12 +41,20 @@ class OCREngine: # Convert ke grayscale untuk preprocessing gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - # Enhance contrast menggunakan CLAHE - clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) - enhanced = clahe.apply(gray) + # Denoise (from Context7) - mengurangi noise tanpa blur teks + denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21) + + # Enhanced CLAHE untuk dokumen (from Context7) + # clipLimit lebih tinggi untuk kontras lebih baik + clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) + enhanced = clahe.apply(denoised) + + # Sharpen using kernel (from Context7) + kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], dtype=np.float32) + sharpened = cv2.filter2D(enhanced, -1, kernel) # Convert kembali ke BGR untuk PaddleOCR - enhanced_bgr = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR) + enhanced_bgr = cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR) return enhanced_bgr diff --git a/requirements.txt b/requirements.txt index deb2a32..77cda35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,6 @@ paddleocr flask pillow opencv-python +pymysql +flask-sqlalchemy +requests diff --git a/static/style.css b/static/style.css index 6e7d67e..3390921 100644 --- a/static/style.css +++ b/static/style.css @@ -175,6 +175,121 @@ header h1 { max-height: 400px; border-radius: var(--radius); cursor: pointer; + display: block; + margin: 0 auto; +} + +/* Crop Container */ +.crop-container { + position: relative; + max-width: 100%; + margin-bottom: 1rem; + overflow: hidden; + border-radius: var(--radius); + background: #000; +} + +.crop-area { + position: absolute; + top: 0; + left: 0; + pointer-events: none; + /* Let clicks pass through, handles catch them */ +} + +/* Perspective Crop Handles */ +.crop-handle { + position: absolute; + width: 20px; + height: 20px; + background: var(--accent-primary); + border: 2px solid #fff; + border-radius: 50%; + transform: translate(-50%, -50%); + cursor: move; + pointer-events: auto; + z-index: 10; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.5); + transition: transform 0.1s ease; +} + +.crop-handle:hover, +.crop-handle.active { + background: #fff; + border-color: var(--accent-primary); + transform: translate(-50%, -50%) scale(1.2); +} + +/* Crop Actions & Controls */ +.crop-actions-container { + display: flex; + flex-direction: column; + gap: 1rem; + margin-top: 1rem; + padding: 1rem; + background: var(--bg-secondary); + border-radius: var(--radius); + border: 1px solid var(--border); +} + +.rotation-control { + display: flex; + align-items: center; + gap: 1rem; + color: var(--text-secondary); +} + +.rotation-control label { + font-weight: 500; + min-width: 80px; +} + +.rotation-control input[type="range"] { + flex: 1; + cursor: pointer; + accent-color: var(--accent-primary); +} + +.crop-buttons { + display: flex; + gap: 1rem; + justify-content: center; +} + +.crop-action-btn { + padding: 0.5rem 1rem; + border: none; + border-radius: var(--radius); + cursor: pointer; + font-weight: 500; + transition: all 0.2s ease; + flex: 1; +} + +.crop-action-btn.primary { + background: var(--accent-gradient); + color: white; +} + +.crop-action-btn.primary:hover:not(:disabled) { + transform: translateY(-2px); + box-shadow: var(--shadow); +} + +.crop-action-btn.primary:disabled { + opacity: 0.6; + cursor: not-allowed; +} + +.crop-action-btn.secondary { + background: var(--bg-tertiary); + color: var(--text-secondary); + border: 1px solid var(--border); +} + +.crop-action-btn.secondary:hover { + background: var(--bg-primary); + color: var(--text-primary); } /* Process Button */ @@ -533,6 +648,290 @@ footer a:hover { border-radius: 4px; } -::-webkit-scrollbar-thumb:hover { - background: var(--text-muted); +/* Archive Header Button */ +.header-actions { + display: flex; + gap: 1rem; + justify-content: center; + margin-top: 1rem; +} + +.archive-header-btn { + padding: 0.5rem 1.25rem; + background: transparent; + border: 1px solid var(--accent-secondary); + color: var(--accent-secondary); + border-radius: var(--radius); + cursor: pointer; + font-weight: 600; + transition: all 0.2s ease; +} + +.archive-header-btn:hover { + background: var(--accent-primary); + color: white; + border-color: var(--accent-primary); +} + +/* Modal Styles */ +.modal { + position: fixed; + z-index: 1000; + left: 0; + top: 0; + width: 100%; + height: 100%; + background-color: rgba(0, 0, 0, 0.7); + backdrop-filter: blur(4px); + overflow: auto; + animation: fadeIn 0.3s; +} + +@keyframes fadeIn { + from { + opacity: 0; + } + + to { + opacity: 1; + } +} + +.modal-content { + background-color: var(--bg-secondary); + margin: 5% auto; + padding: 2rem; + border: 1px solid var(--border); + border-radius: var(--radius-lg); + width: 90%; + max-width: 1000px; + box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.5); + position: relative; + animation: slideDown 0.3s; +} + +@keyframes slideDown { + from { + transform: translateY(-50px); + opacity: 0; + } + + to { + transform: translateY(0); + opacity: 1; + } +} + +.modal-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 2rem; + padding-bottom: 1rem; + border-bottom: 1px solid var(--border); +} + +.modal-header h2 { + color: var(--text-primary); + font-size: 1.5rem; +} + +.close-btn { + background: transparent; + border: none; + color: var(--text-muted); + font-size: 2rem; + cursor: pointer; + line-height: 1; + transition: color 0.2s; +} + +.close-btn:hover { + color: var(--text-primary); +} + +/* Archive List Grid */ +.archive-list { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); + gap: 1.5rem; + max-height: 70vh; + overflow-y: auto; + padding-right: 0.5rem; +} + +.archive-card { + background: var(--bg-tertiary); + border: 1px solid var(--border); + border-radius: var(--radius); + overflow: hidden; + transition: transform 0.2s, box-shadow 0.2s; + display: flex; + flex-direction: column; +} + +.archive-card:hover { + transform: translateY(-4px); + box-shadow: var(--shadow-lg); + border-color: var(--accent-primary); +} + +.archive-card-img { + width: 100%; + height: 160px; + background: #000; + display: flex; + align-items: center; + justify-content: center; + overflow: hidden; +} + +.archive-card-img img { + width: 100%; + height: 100%; + object-fit: cover; + transition: transform 0.3s; +} + +.archive-card:hover .archive-card-img img { + transform: scale(1.05); +} + +.archive-card-content { + padding: 1rem; + flex: 1; + display: flex; + flex-direction: column; +} + +.archive-card-content h3 { + font-size: 1rem; + margin-bottom: 0.5rem; + color: var(--text-primary); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +.archive-card-meta { + margin-bottom: 1rem; + font-size: 0.8rem; + color: var(--text-muted); + display: flex; + flex-direction: column; + gap: 0.25rem; +} + +.archive-card-actions { + margin-top: auto; +} + +.view-btn { + width: 100%; + padding: 0.5rem; + background: var(--bg-primary); + border: 1px solid var(--border); + color: var(--text-secondary); + border-radius: 6px; + cursor: pointer; + font-size: 0.875rem; + transition: all 0.2s; +} + +.view-btn:hover { + background: var(--accent-primary); + color: white; + border-color: var(--accent-primary); +} + +.archive-loading, +.archive-empty { + text-align: center; + padding: 3rem; + color: var(--text-muted); + font-size: 1.1rem; +} + +/* Print Styles */ +@media print { + + /* Reset Page */ + @page { + margin: 0; + size: auto; + } + + body { + margin: 0; + padding: 0; + background: white !important; + /* Ensure no scroll or extra pages from hidden content */ + height: 100vh !important; + overflow: hidden !important; + } + + /* Hide EVERYTHING initially with high specificity */ + body * { + visibility: hidden !important; + display: none !important; + /* Force display none to remove layout space */ + } + + /* Show ONLY Print Area and its children */ + #printArea, + #printArea * { + visibility: visible !important; + display: flex !important; + /* Restore display for parent */ + } + + /* Reset display for children of printArea specifically */ + #printArea * { + display: block !important; + /* Default to block or whatever needed */ + } + + /* Specific fix for image inside */ + #printArea img { + display: inline-block !important; + } + + #printArea { + position: fixed !important; + /* Fixed helps detach from flow */ + left: 0 !important; + top: 0 !important; + width: 100% !important; + height: 100% !important; + overflow: visible !important; + z-index: 99999 !important; + background: white !important; + + display: flex !important; + justify-content: center; + align-items: flex-start; + padding-top: 5cm; + /* Adjust padding as needed */ + } + + .ktp-print-size { + /* Standar ISO/IEC 7810 ID-1: 85.60 × 53.98 mm */ + width: 85.60mm !important; + height: 53.98mm !important; + max-width: none !important; + max-height: none !important; + border: 1px dashed #ccc; + box-shadow: none !important; + /* Remove any shadow */ + } + + .a4-print-size { + /* A4 Landscape: 297mm x 210mm */ + /* Use slightly less to account for margins if necessary, but standard is distinct */ + width: 297mm !important; + height: 210mm !important; + max-width: none !important; + max-height: none !important; + border: none; + } } \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index 7f70e90..e303d3c 100644 --- a/templates/index.html +++ b/templates/index.html @@ -13,6 +13,12 @@

📄 OCR KTP/KK

Pembaca Dokumen Indonesia Offline

+
+ + + +
@@ -40,7 +46,42 @@

PNG, JPG, JPEG, BMP, WEBP (max 16MB)

- + + + + + + + - + + + + + @@ -88,6 +132,52 @@
+ + + + + +