KK KTP arsip

This commit is contained in:
2025-12-31 01:38:01 +08:00
parent 4fe381b3f0
commit 1de94bfeb4
28 changed files with 3296 additions and 191 deletions

8
.gemini/settings.json Normal file
View File

@@ -0,0 +1,8 @@
{
"mcpServers": {
"context7": {
"command": "npx",
"args": ["-y", "@upstash/context7-mcp@latest"]
}
}
}

BIN
KK/5103040808220001.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 201 KiB

BIN
KTP/3303080307040003.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 142 KiB

BIN
KTP/3529245512000002.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

BIN
KTP/3671092111950003.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 255 KiB

BIN
KTP/5102045811690001.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

BIN
KTP/5103022906800001.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

BIN
KTP/5171042004950004.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 237 KiB

BIN
KTP/7306046502850001.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 216 KiB

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

1010
app.py

File diff suppressed because it is too large Load Diff

39
database.py Normal file
View File

@@ -0,0 +1,39 @@
"""
Database Configuration for OCR Application
Using Flask-SQLAlchemy with MySQL (PyMySQL driver)
"""
import os
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy()
# Database configuration
DB_CONFIG = {
'host': os.environ.get('DB_HOST', 'localhost'),
'port': os.environ.get('DB_PORT', '3306'),
'database': os.environ.get('DB_NAME', 'ocr_db'),
'user': os.environ.get('DB_USER', 'ocr_user'),
'password': os.environ.get('DB_PASSWORD', 'ocr_password123')
}
def get_database_uri():
"""Generate SQLAlchemy database URI"""
return f"mysql+pymysql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}?charset=utf8mb4"
def init_db(app):
"""Initialize database with Flask app"""
app.config['SQLALCHEMY_DATABASE_URI'] = get_database_uri()
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {
'pool_recycle': 3600,
'pool_pre_ping': True
}
db.init_app(app)
with app.app_context():
db.create_all()
print(f"✓ Database connected: {DB_CONFIG['database']}@{DB_CONFIG['host']}")
return db

21
docker-compose.yml Normal file
View File

@@ -0,0 +1,21 @@
version: '3.8'
services:
mysql:
image: mysql:8.0
container_name: mysql-server
restart: unless-stopped
environment:
MYSQL_ROOT_PASSWORD: root123
MYSQL_DATABASE: ocr_db
MYSQL_USER: ocr_user
MYSQL_PASSWORD: ocr_password123
ports:
- "3306:3306"
volumes:
- mysql_data:/var/lib/mysql
command: --default-authentication-plugin=mysql_native_password
volumes:
mysql_data:
driver: local

174
image_processor.py Normal file
View File

@@ -0,0 +1,174 @@
"""
KTP Image Processor - Enhanced Version
Crop, resize, dan enhanced preprocessing untuk OCR yang lebih akurat
Standar e-KTP: 85.6mm x 53.98mm = 1011x638 px @300dpi
Improvements based on Context7 documentation:
- Pillow ImageEnhance for contrast/sharpness
- OpenCV CLAHE for adaptive histogram equalization
- Denoising for cleaner text detection
"""
import cv2
import numpy as np
import os
from PIL import Image, ImageEnhance, ImageFilter
KTP_WIDTH = 1011
KTP_HEIGHT = 638
def enhance_image_pil(image_path: str, output_path: str = None) -> str:
"""
Enhance image using Pillow (from Context7 docs)
- Contrast enhancement
- Sharpness enhancement
- Detail filter for text clarity
Args:
image_path: Path to input image
output_path: Optional path to save enhanced image
Returns:
Path to enhanced image
"""
try:
img = Image.open(image_path)
# Contrast enhancement (factor 1.3 from Context7)
contrast = ImageEnhance.Contrast(img)
img = contrast.enhance(1.3)
# Sharpness enhancement
sharpness = ImageEnhance.Sharpness(img)
img = sharpness.enhance(1.2)
# Apply detail filter for text clarity
img = img.filter(ImageFilter.DETAIL)
# Save
if output_path is None:
base, ext = os.path.splitext(image_path)
output_path = f"{base}_enhanced.jpg"
img.save(output_path, quality=95)
print(f" [ENHANCE] Pillow enhanced: {output_path}")
return output_path
except Exception as e:
print(f" [ENHANCE] Pillow error: {e}")
return image_path # Return original if enhancement fails
def enhance_image_cv(image: np.ndarray) -> np.ndarray:
"""
Enhance image using OpenCV (from Context7 docs)
- CLAHE for adaptive histogram equalization
- Denoising
- Sharpening using Laplacian kernel
Args:
image: OpenCV image (BGR)
Returns:
Enhanced image (BGR)
"""
try:
# Convert to grayscale for processing
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Denoise (from Context7)
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
# Enhanced CLAHE for documents
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
enhanced = clahe.apply(denoised)
# Sharpen using kernel (from Context7)
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], dtype=np.float32)
sharpened = cv2.filter2D(enhanced, -1, kernel)
# Convert back to BGR
return cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR)
except Exception as e:
print(f" [ENHANCE] OpenCV error: {e}")
return image # Return original if enhancement fails
def crop_by_ocr_bounds(image, ocr_results, padding=0.03):
"""Crop image based on OCR bounding boxes"""
if not ocr_results:
return image
h, w = image.shape[:2]
all_x = []
all_y = []
for r in ocr_results:
box = r.get('box', [])
if len(box) >= 4:
try:
for point in box:
if isinstance(point, (list, tuple)) and len(point) >= 2:
all_x.append(float(point[0]))
all_y.append(float(point[1]))
except:
continue
if not all_x or not all_y:
return image
x1 = int(max(0, min(all_x) - w * padding))
y1 = int(max(0, min(all_y) - h * padding))
x2 = int(min(w, max(all_x) + w * padding))
y2 = int(min(h, max(all_y) + h * padding))
return image[y1:y2, x1:x2]
def normalize_ktp_image(image_path, output_path=None, ocr_results=None):
"""
Normalisasi gambar KTP:
1. Crop berdasarkan OCR bounds
2. Ensure landscape
3. Resize ke ukuran standar
"""
try:
image = cv2.imread(image_path)
if image is None:
return None, False, "Gagal membaca gambar"
h, w = image.shape[:2]
print(f" [IMAGE] Original: {w}x{h}")
# Crop
if ocr_results:
image = crop_by_ocr_bounds(image, ocr_results)
h, w = image.shape[:2]
print(f" [IMAGE] Cropped: {w}x{h}")
# Landscape
if h > w:
image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
# Resize
resized = cv2.resize(image, (KTP_WIDTH, KTP_HEIGHT),
interpolation=cv2.INTER_LANCZOS4)
# Save
if output_path is None:
base, ext = os.path.splitext(image_path)
output_path = f"{base}_normalized.jpg"
cv2.imwrite(output_path, resized, [cv2.IMWRITE_JPEG_QUALITY, 95])
print(f" [IMAGE] Saved: {output_path}")
return output_path, True, f"Normalized to {KTP_WIDTH}x{KTP_HEIGHT}"
except Exception as e:
import traceback
traceback.print_exc()
return None, False, f"Error: {str(e)}"

View File

@@ -2,17 +2,42 @@
KTP Field Extractor
Ekstraksi data terstruktur dari hasil OCR KTP Indonesia
Mendukung berbagai format output OCR (full-width colon, standard colon, tanpa colon)
OPTIMIZED: Pre-compiled regex patterns for better performance
"""
import re
from typing import Dict, Optional, List
import difflib
# Debug mode - set to False for production
DEBUG_MODE = False
class KTPExtractor:
"""Ekstrak field dari hasil OCR KTP"""
# Pattern colon yang berbeda-beda (standard, full-width, dll)
COLON_PATTERN = r'[:\]'
# Pre-compiled regex patterns (optimization)
COLON_PATTERN = re.compile(r'[:]')
NIK_PATTERN = re.compile(r'\b(\d{16})\b')
DATE_PATTERN = re.compile(r'(\d{2}[-/]\d{2}[-/]\d{4})')
RT_RW_PATTERN = re.compile(r'(\d{3})\s*/\s*(\d{3})')
GOL_DARAH_PATTERN = re.compile(r'([ABO]{1,2}[+\-]?)', re.IGNORECASE)
PROVINSI_SPLIT_PATTERN = re.compile(r'(?i)provinsi\s*')
KABUPATEN_SPLIT_PATTERN = re.compile(r'(?i)\s*(kabupaten|kota)\s*')
TTL_PATTERN = re.compile(r'(?i)tempat[/\s]*tgl[/\s]*lahir|tempat[/\s]*lahir|lahir')
# Pattern colon string (for backward compatibility)
COLON_PATTERN_STR = r'[:]'
# Daftar Provinsi Indonesia (38 Provinsi)
PROVINSI_LIST = [
"ACEH", "SUMATERA UTARA", "SUMATERA BARAT", "RIAU", "JAMBI", "SUMATERA SELATAN", "BENGKULU", "LAMPUNG",
"KEPULAUAN BANGKA BELITUNG", "KEPULAUAN RIAU", "DKI JAKARTA", "JAWA BARAT", "JAWA TENGAH", "DI YOGYAKARTA",
"JAWA TIMUR", "BANTEN", "BALI", "NUSA TENGGARA BARAT", "NUSA TENGGARA TIMUR", "KALIMANTAN BARAT",
"KALIMANTAN TENGAH", "KALIMANTAN SELATAN", "KALIMANTAN TIMUR", "KALIMANTAN UTARA", "SULAWESI UTARA",
"SULAWESI TENGAH", "SULAWESI SELATAN", "SULAWESI TENGGARA", "GORONTALO", "SULAWESI BARAT", "MALUKU",
"MALUKU UTARA", "PAPUA BARAT", "PAPUA", "PAPUA SELATAN", "PAPUA TENGAH", "PAPUA PEGUNUNGAN", "PAPUA BARAT DAYA"
]
# Keywords untuk jenis kelamin
MALE_KEYWORDS = ['laki', 'pria', 'male']
@@ -26,6 +51,99 @@ class KTPExtractor:
'buruh', 'petani', 'nelayan', 'karyawan', 'ibu rumah tangga',
'tidak bekerja', 'lainnya', 'mengurus rumah tangga']
# Status Perkawinan yang valid
STATUS_PERKAWINAN_LIST = ['BELUM KAWIN', 'KAWIN', 'CERAI HIDUP', 'CERAI MATI']
# Field Labels untuk fuzzy matching (mengatasi typo OCR seperti "Aamat" -> "ALAMAT")
FIELD_LABELS = {
'nama': ['NAMA'],
'alamat': ['ALAMAT'],
'agama': ['AGAMA'],
'pekerjaan': ['PEKERJAAN'],
'kewarganegaraan': ['KEWARGANEGARAAN', 'WARGANEGARA'],
'tempat_lahir': ['TEMPAT', 'LAHIR', 'TEMPAT/TGL LAHIR'],
'jenis_kelamin': ['JENIS KELAMIN', 'JENIS', 'KELAMIN'],
'gol_darah': ['GOL. DARAH', 'GOL DARAH', 'GOLONGAN DARAH'],
'kel_desa': ['KEL/DESA', 'KELURAHAN', 'DESA'],
'kecamatan': ['KECAMATAN', 'KEC'],
'status_perkawinan': ['STATUS PERKAWINAN', 'PERKAWINAN'],
'berlaku_hingga': ['BERLAKU HINGGA', 'BERLAKU'],
'rt_rw': ['RT/RW', 'RT', 'RW'],
}
# ============================================
# Sistem Penamaan Hindu Bali
# ============================================
# Struktur: [Prefix Gender] + [Gelar Kasta] + [Penanda Gender] + [Urutan Lahir] + [Nama Pribadi]
# Prefix penanda gender (harus di awal nama)
BALI_GENDER_PREFIX = {
'NI': 'PEREMPUAN', # Prefix untuk perempuan
'I': 'LAKI-LAKI', # Prefix untuk laki-laki
}
# Gelar Kasta (setelah prefix gender)
BALI_KASTA = {
'IDA': 'BRAHMANA',
'GUSTI': 'KSATRIA',
'ANAK AGUNG': 'KSATRIA',
'COKORDA': 'KSATRIA',
'DEWA': 'KSATRIA',
'DESAK': 'KSATRIA',
'AGUNG': 'KSATRIA',
'NGAKAN': 'WAISYA',
'SANG': 'WAISYA',
'SI': 'WAISYA',
}
# Penanda gender tambahan (setelah kasta)
BALI_GENDER_MARKER = {
'AYU': 'PEREMPUAN',
'ISTRI': 'PEREMPUAN',
'LUH': 'PEREMPUAN',
'BAGUS': 'LAKI-LAKI',
'GEDE': 'LAKI-LAKI',
'AGUS': 'LAKI-LAKI',
'ALIT': 'LAKI-LAKI', # Kecil/muda (untuk laki-laki)
}
# Urutan kelahiran (bersiklus setiap 4 anak)
BALI_BIRTH_ORDER = {
'PUTU': 1, 'WAYAN': 1, 'GEDE': 1, 'ILUH': 1,
'MADE': 2, 'KADEK': 2, 'NENGAH': 2,
'NYOMAN': 3, 'KOMANG': 3,
'KETUT': 4,
'BALIK': 5, # Untuk anak ke-5+ (siklus ulang)
}
# Soroh/Klan Bali (identifikasi garis keturunan)
BALI_SOROH = {
'PASEK': 'SOROH', # Klan mayoritas (~60% Hindu Bali)
'PANDE': 'SOROH', # Klan pandai besi/metalurgi
'ARYA': 'SOROH', # Klan Arya
'BENDESA': 'SOROH', # Pemimpin adat
'TANGKAS': 'SOROH', # Klan Tangkas
'CELAGI': 'SOROH', # Klan Celagi
'SENGGUHU': 'SOROH', # Klan Sengguhu
'KUBAYAN': 'SOROH', # Klan Kubayan
'BANDESA': 'SOROH', # Varian Bendesa
}
# Gabungkan semua komponen untuk deteksi (urut dari panjang ke pendek)
BALI_NAME_COMPONENTS = [
# Prefix gender
'NI', 'I',
# Kasta (prioritas: yang lebih panjang dulu)
'ANAK AGUNG', 'COKORDA', 'NGAKAN',
'IDA', 'GUSTI', 'DEWA', 'DESAK', 'AGUNG', 'SANG', 'SI',
# Soroh/Klan
'PASEK', 'PANDE', 'ARYA', 'BENDESA', 'BANDESA', 'TANGKAS', 'CELAGI', 'SENGGUHU', 'KUBAYAN',
# Gender marker
'AYU', 'ISTRI', 'LUH', 'BAGUS', 'GEDE', 'AGUS', 'ALIT',
# Urutan lahir
'WAYAN', 'PUTU', 'ILUH', 'MADE', 'KADEK', 'NENGAH', 'NYOMAN', 'KOMANG', 'KETUT', 'BALIK',
]
# KTP Zone Template (normalized coordinates: x_min, y_min, x_max, y_max)
# Based on standard KTP layout
ZONES = {
@@ -74,6 +192,211 @@ class KTPExtractor:
if len(parts) > 1:
return parts[1].strip()
return text.strip()
def _find_best_match(self, text: str, candidates: List[str], cutoff: float = 0.6) -> Optional[str]:
"""Find best fuzzy match from candidates"""
matches = difflib.get_close_matches(text, candidates, n=1, cutoff=cutoff)
return matches[0] if matches else None
def _is_label_match(self, text: str, field_name: str, cutoff: float = 0.7) -> bool:
"""
Fuzzy match untuk label field - mengatasi typo OCR seperti "Aamat" -> "ALAMAT"
Returns True jika text cocok dengan salah satu label untuk field tersebut
"""
if not text or not text.strip():
return False
if field_name not in self.FIELD_LABELS:
return field_name.lower() in text.lower()
text_upper = text.upper().strip()
# Coba exact match dulu (lebih cepat)
for label in self.FIELD_LABELS[field_name]:
if label in text_upper:
return True
# Fuzzy match jika tidak ada exact match
# Ekstrak kata pertama dari text (biasanya label ada di awal)
parts = text_upper.split(':')[0].split()
if not parts:
return False
first_word = parts[0]
for label in self.FIELD_LABELS[field_name]:
label_parts = label.split()
if not label_parts:
continue
# Bandingkan dengan kata pertama
ratio = difflib.SequenceMatcher(None, first_word, label_parts[0]).ratio()
if ratio >= cutoff:
print(f" [FUZZY LABEL] '{first_word}' matched '{label}' (ratio={ratio:.2f})")
return True
return False
def _parse_balinese_name(self, name: str) -> str:
"""
Parse nama Bali yang digabung OCR dan tambahkan spasi yang tepat.
Contoh: "NIGUSTIAYUNYOMANSUWETRI" -> "NI GUSTI AYU NYOMAN SUWETRI"
Struktur nama Bali:
[Prefix Gender] + [Gelar Kasta] + [Penanda Gender] + [Urutan Lahir] + [Nama Pribadi]
PENTING: Hanya proses jika nama benar-benar mengandung komponen Bali!
"""
if not name:
return name
name_upper = name.upper().strip()
# Jika sudah ada spasi dengan jumlah wajar, kembalikan apa adanya
if name_upper.count(' ') >= 2:
return name_upper
# Cek apakah nama mengandung komponen Bali
# Nama harus dimulai dengan NI, I GUSTI, IDA, atau komponen urutan lahir Bali
name_clean = name_upper.replace(' ', '')
is_balinese_name = False
# Cek prefix khas Bali
if name_clean.startswith('NI') and len(name_clean) > 3:
# NI harus diikuti komponen Bali lain (GUSTI, LUH, WAYAN, dll)
after_ni = name_clean[2:]
for comp in ['GUSTI', 'LUH', 'WAYAN', 'MADE', 'NYOMAN', 'KETUT', 'PUTU', 'KADEK', 'KOMANG', 'PASEK', 'PANDE']:
if after_ni.startswith(comp):
is_balinese_name = True
break
elif name_clean.startswith('IGUSTI') or name_clean.startswith('IDABAGUS') or name_clean.startswith('IDAAYU'):
is_balinese_name = True
elif any(name_clean.startswith(p) for p in ['GUSTI', 'WAYAN', 'PUTU', 'MADE', 'KADEK', 'NYOMAN', 'KOMANG', 'KETUT']):
is_balinese_name = True
if not is_balinese_name:
# Bukan nama Bali, kembalikan dengan pemisahan spasi standar
# Jika ada 1 spasi, kembalikan apa adanya
if ' ' in name_upper:
return name_upper
# Jika tidak ada spasi sama sekali, kembalikan apa adanya (mungkin memang 1 kata)
return name_upper
# Urutan komponen yang akan dicari (dari yang terpanjang ke terpendek untuk akurasi)
components_ordered = sorted(self.BALI_NAME_COMPONENTS, key=len, reverse=True)
result_parts = []
remaining = name_clean
# Parse prefix gender (NI atau I di awal)
if remaining.startswith('NI'):
result_parts.append('NI')
remaining = remaining[2:]
elif remaining.startswith('I') and len(remaining) > 1:
# Pastikan bukan bagian dari kata lain
next_char = remaining[1] if len(remaining) > 1 else ''
# Cek apakah karakter setelah I adalah konsonan (bukan vokal)
if next_char not in 'AIUEO':
result_parts.append('I')
remaining = remaining[1:]
# Parse komponen-komponen lainnya
found = True
max_iterations = 10 # Prevent infinite loop
iteration = 0
while remaining and found and iteration < max_iterations:
found = False
iteration += 1
for component in components_ordered:
if remaining.startswith(component):
# Skip jika komponen sudah ada di result (kecuali nama pribadi)
if component not in result_parts or component not in self.BALI_NAME_COMPONENTS:
result_parts.append(component)
remaining = remaining[len(component):]
found = True
break
# Sisa adalah nama pribadi
if remaining:
result_parts.append(remaining)
parsed_name = ' '.join(result_parts)
# Log jika ada perubahan
if parsed_name != name_upper:
print(f" [BALI NAME] '{name_upper}' -> '{parsed_name}'")
return parsed_name
def _search_best_match_in_text(self, text: str, candidates: List[str], prefix: str = "") -> tuple:
"""
Search if any candidate is present in text using multiple strategies:
1. Exact substring
2. Prefix + Candidate (Fuzzy) - e.g. "PROVINSI BALI"
3. Candidate Only (Fuzzy) - e.g. "BALI" (if prefix is missing/damaged)
Returns (best_candidate, confidence_score)
"""
text_upper = text.upper()
best_match = None
best_ratio = 0.0
# Strategy 1: Exact substring match (fastest & most reliable)
for candidate in candidates:
if candidate in text_upper:
if len(candidate) > len(best_match or ""):
best_match = candidate
best_ratio = 1.0
if best_ratio == 1.0:
return best_match, best_ratio
# Strategy 2: Prefix Construction & Fuzzy Match
prefix_upper = prefix.upper() if prefix else ""
# DEBUG: Print checking (controlled by DEBUG_MODE)
if DEBUG_MODE:
print(f"DEBUG Check Text: '{text_upper}' with Prefix: '{prefix_upper}'")
for candidate in candidates:
# 2a. Compare with Prefix + Space (e.g. "PROVINSI BALI")
if prefix:
target_spaced = f"{prefix_upper} {candidate}"
s_spaced = difflib.SequenceMatcher(None, target_spaced, text_upper)
ratio_spaced = s_spaced.ratio()
# print(f" -> Compare '{target_spaced}' vs '{text_upper}' = {ratio_spaced:.2f}")
if ratio_spaced > best_ratio and ratio_spaced > 0.5:
best_ratio = ratio_spaced
best_match = candidate
# 2b. Compare with Prefix NO SPACE (e.g. "PROVINSIBALI")
# This handles "PROVNSIBALI" perfectly
target_merged = f"{prefix_upper}{candidate}"
s_merged = difflib.SequenceMatcher(None, target_merged, text_upper)
ratio_merged = s_merged.ratio()
if DEBUG_MODE:
print(f" -> Compare Merged '{target_merged}' vs '{text_upper}' = {ratio_merged:.2f}")
if ratio_merged > best_ratio and ratio_merged > 0.5:
best_ratio = ratio_merged
best_match = candidate
# 2c. Compare Candidate ONLY (e.g. "BALI")
if len(candidate) > 3:
s_raw = difflib.SequenceMatcher(None, candidate, text_upper)
ratio_raw = s_raw.ratio()
# print(f" -> Compare Raw '{candidate}' vs '{text_upper}' = {ratio_raw:.2f}")
if ratio_raw > best_ratio and ratio_raw > 0.6:
best_ratio = ratio_raw
best_match = candidate
if DEBUG_MODE:
print(f"DEBUG Best Match: {best_match} ({best_ratio:.2f})")
return best_match, best_ratio
def _detect_image_size(self, ocr_results: List[Dict]) -> tuple:
"""Detect image dimensions from bounding boxes"""
@@ -93,10 +416,62 @@ class KTPExtractor:
# PROVINSI from header
if 'header_provinsi' in zone_texts:
print(f"DEBUG Zone Provinsi Content: {zone_texts['header_provinsi']}")
for text in zone_texts['header_provinsi']:
if 'provinsi' in text.lower():
val = re.sub(r'(?i)provinsi\s*', '', text).strip()
if val:
text_clean = text.strip()
# Use prefix strategy: "PROVINSI " + result vs text
match, score = self._search_best_match_in_text(text_clean, self.PROVINSI_LIST, prefix="PROVINSI")
# LOWER THRESHOLD to 0.5 because "PROVINSI BALI" vs "PROVNSIBALI" is roughly 0.5-0.6 range
if match and score > 0.5:
result['provinsi'] = match
# Remove the found province (and label) from text to see what's left
# If we matched "PROVINSI JAWA TIMUR", the text might be "PROVNSIJAWATMRKABUPATENSUMENEP"
# It's hard to cleanly remove "PROVISI JAWA TIMUR" if it was fuzzy matched.
# BUT, we can try to find "KABUPATEN" or "KOTA" in the original text
# independent of the province match
if 'kabupaten' in text_clean.lower() or 'kota' in text_clean.lower():
parts = re.split(r'(?i)\s*(kabupaten|kota)', text_clean)
if len(parts) > 1:
kab_part = "".join(parts[1:]).strip()
kab_val = re.sub(r'^(?i)(kabupaten|kota)\s*', '', kab_part).strip()
if kab_val and result['kabupaten_kota'] is None:
prefix = "KABUPATEN" if "kabupaten" in text_clean.lower() else "KOTA"
result['kabupaten_kota'] = f"{prefix} {kab_val.upper()}"
break
# Fallback to keyword splitting (Legacy/Blurry fallback)
text_lower = text.lower()
val = text
# If keyword exists, strip it
if 'provinsi' in text_lower:
split_prov = re.split(r'(?i)provinsi\s*', text, 1)
if len(split_prov) > 1:
val = split_prov[1].strip()
else:
val = ""
# Check for merged text
if 'kabupaten' in text_lower or 'kota' in text_lower:
parts = re.split(r'(?i)\s*(kabupaten|kota)', val)
val = parts[0].strip()
if len(parts) > 1:
kab_part = "".join(parts[1:]).strip()
kab_val = re.sub(r'^(?i)(kabupaten|kota)\s*', '', kab_part).strip()
if kab_val and result['kabupaten_kota'] is None:
prefix = "KABUPATEN" if "kabupaten" in text_lower else "KOTA"
result['kabupaten_kota'] = f"{prefix} {kab_val.upper()}"
if val and len(val) > 2:
# Try fuzzy match again on the cleaned value
best_match = self._find_best_match(val.upper(), self.PROVINSI_LIST, cutoff=0.6)
if best_match:
result['provinsi'] = best_match
else:
result['provinsi'] = val.upper()
break
@@ -104,13 +479,32 @@ class KTPExtractor:
if 'header_kabupaten' in zone_texts:
for text in zone_texts['header_kabupaten']:
text_lower = text.lower()
val = text
# Check keyword
if 'kabupaten' in text_lower or 'kota' in text_lower:
val = re.sub(r'(?i)(kabupaten|kota)\s*', '', text).strip()
if val:
result['kabupaten_kota'] = val.upper()
split_kab = re.split(r'(?i)\s*(kabupaten|kota)\s*', text, 1)
if len(split_kab) > 1:
val = split_kab[-1].strip()
else:
result['kabupaten_kota'] = text.upper()
break
val = ""
# If no keyword, but it's in the kabupaten zone, assume it's data
if val:
# Re-add prefix standard if we separated it or if it was missing
# Heuristic: if validation suggests it's a known regency, we are good.
# For now, standardize format.
if result['kabupaten_kota'] is None:
prefix = "KABUPATEN" if "kabupaten" in text_lower else "KOTA"
# If no keyword found, default to KABUPATEN? Or better check Wilayah?
# Let's default to detected keyword or KABUPATEN
if "kota" in text_lower:
prefix = "KOTA"
else:
prefix = "KABUPATEN"
result['kabupaten_kota'] = f"{prefix} {val.upper()}"
break
# NAMA from nama zone (skip label line)
if 'nama' in zone_texts:
@@ -161,6 +555,89 @@ class KTPExtractor:
result['alamat'] = val.upper()
break
# RT/RW
if 'rt_rw' in zone_texts:
for text in zone_texts['rt_rw']:
rt_rw_match = re.search(r'(\d{3})\s*/\s*(\d{3})', text)
if rt_rw_match:
result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}"
break
# KEL/DESA
if 'kel_desa' in zone_texts:
for text in zone_texts['kel_desa']:
if 'kel' in text.lower() or 'desa' in text.lower():
val = self._extract_value_from_text(text)
if val and 'kel' not in val.lower():
result['kel_desa'] = val.upper()
break
elif result['kel_desa'] is None:
# Fallback context: simple text
result['kel_desa'] = text.upper()
# KECAMATAN
if 'kecamatan' in zone_texts:
for text in zone_texts['kecamatan']:
if 'kec' in text.lower():
val = self._extract_value_from_text(text)
if val and 'kec' not in val.lower():
result['kecamatan'] = val.upper()
break
elif result['kecamatan'] is None:
result['kecamatan'] = text.upper()
# AGAMA
if 'agama' in zone_texts:
for text in zone_texts['agama']:
val = text.upper()
if 'agama' in text.lower():
val = self._extract_value_from_text(text).upper()
# Verify against valid list
for agama in self.AGAMA_LIST:
if agama.upper() in val:
result['agama'] = agama.upper()
break
if result['agama']: break
# STATUS PERKAWINAN
if 'status' in zone_texts:
for text in zone_texts['status']:
val = text.upper()
# Normalize common OCR errors (e.g. BELUMKAWIN)
val = val.replace("BELUMKAWIN", "BELUM KAWIN")
# Check against official list
found_status = False
for status in self.STATUS_PERKAWINAN_LIST:
if status in val:
result['status_perkawinan'] = status
found_status = True
break
if found_status: break
# PEKERJAAN
if 'pekerjaan' in zone_texts:
for text in zone_texts['pekerjaan']:
val = text.upper()
if 'pekerjaan' in text.lower():
val = self._extract_value_from_text(text).upper()
# Check against list or take value
if len(val) > 3 and 'pekerjaan' not in val.lower():
result['pekerjaan'] = val
break
# WNI
if 'wni' in zone_texts:
for text in zone_texts['wni']:
if 'wni' in text.lower():
result['kewarganegaraan'] = 'WNI'
break
elif 'wna' in text.lower():
result['kewarganegaraan'] = 'WNA'
break
# PENERBITAN area (tempat & tanggal dalam satu zona)
if 'penerbitan' in zone_texts:
for text in zone_texts['penerbitan']:
@@ -194,7 +671,7 @@ class KTPExtractor:
'status_perkawinan': None,
'pekerjaan': None,
'kewarganegaraan': None,
'berlaku_hingga': None,
'berlaku_hingga': 'SEUMUR HIDUP', # Default sesuai peraturan pemerintah e-KTP
'provinsi': None,
'kabupaten_kota': None,
'tanggal_penerbitan': None,
@@ -234,6 +711,14 @@ class KTPExtractor:
# Fallback: Parse line by line for fields not found by zone
for i, text in enumerate(texts):
# Skip baris yang hanya berisi punctuation atau kosong
text_stripped = text.strip()
if not text_stripped or text_stripped in [':', '', '.', '-', '/', '|']:
continue
# Skip baris yang terlalu pendek (hanya 1-2 karakter non-alfanumerik)
if len(text_stripped) <= 2 and not any(c.isalnum() for c in text_stripped):
continue
text_lower = text.lower()
# Normalize colons
@@ -242,19 +727,49 @@ class KTPExtractor:
# ===== PROVINSI =====
if 'provinsi' in text_lower and result['provinsi'] is None:
val = self._extract_after_label(text_normalized, 'provinsi')
if val:
result['provinsi'] = val.upper()
elif i + 1 < len(texts) and 'provinsi' not in texts[i+1].lower():
# Mungkin value di line berikutnya
result['provinsi'] = texts[i+1].strip().upper()
# Split by PROVINSI and take remainder
split_prov = re.split(r'(?i)provinsi\s*', text, 1)
if len(split_prov) > 1:
val = split_prov[1].strip()
# Check if it contains kabupaten/kota (merged line case)
if 'kabupaten' in val.lower() or 'kota' in val.lower():
parts = re.split(r'(?i)\s*(kabupaten|kota)', val)
val = parts[0].strip()
if val:
# Fuzzy match against valid provinces
best_match = self._find_best_match(val.upper(), self.PROVINSI_LIST, cutoff=0.6)
if best_match:
result['provinsi'] = best_match
else:
result['provinsi'] = val.upper()
# Check for next line if current line only had 'PROVINSI'
if result['provinsi'] is None and i + 1 < len(texts):
next_text = texts[i+1].strip()
next_lower = next_text.lower()
# Only take next line if it doesn't look like another field
if not any(kw in next_lower for kw in ['provinsi', 'kabupaten', 'kota', 'nik']):
# Fuzzy match next line
val = next_text.upper()
best_match = self._find_best_match(val, self.PROVINSI_LIST, cutoff=0.6)
if best_match:
result['provinsi'] = best_match
else:
result['provinsi'] = val
# ===== KABUPATEN/KOTA =====
if ('kabupaten' in text_lower or 'kota' in text_lower or 'jakarta' in text_lower) and result['kabupaten_kota'] is None:
if 'provinsi' not in text_lower: # Bukan bagian dari provinsi
val = self._extract_after_label(text_normalized, 'kabupaten|kota')
if val:
result['kabupaten_kota'] = val.upper()
# Split by KABUPATEN or KOTA and take remainder
split_kab = re.split(r'(?i)\s*(kabupaten|kota)\s*', text, 1)
if len(split_kab) > 1:
prefix = "KABUPATEN" if "kabupaten" in text_lower else "KOTA"
val = split_kab[-1].strip()
if val:
result['kabupaten_kota'] = f"{prefix} {val.upper()}"
else:
result['kabupaten_kota'] = text.strip().upper()
else:
result['kabupaten_kota'] = text.strip().upper()
@@ -312,13 +827,17 @@ class KTPExtractor:
if re.match(r'^[ABO]{1,2}[+\-]?$', text.strip(), re.IGNORECASE) and len(text.strip()) <= 3:
result['gol_darah'] = text.strip().upper()
# ===== ALAMAT =====
if 'alamat' in text_lower and result['alamat'] is None:
val = self._extract_after_label(text_normalized, 'alamat')
# ===== ALAMAT ===== (dengan fuzzy label matching)
if result['alamat'] is None and self._is_label_match(text, 'alamat'):
val = self._extract_after_label(text_normalized, r'a{1,2}l{0,2}a?m{0,2}a?t')
if val:
result['alamat'] = val.upper()
elif i + 1 < len(texts):
result['alamat'] = texts[i+1].strip().upper()
# Ambil nilai dari baris berikutnya
next_text = texts[i+1].strip()
# Pastikan bukan label field lain
if len(next_text) > 2 and not self._is_label_match(next_text, 'rt_rw'):
result['alamat'] = next_text.upper()
# ===== RT/RW =====
rt_rw_match = re.search(r'(\d{3})\s*/\s*(\d{3})', text)
@@ -346,9 +865,9 @@ class KTPExtractor:
if len(next_text) > 2 and not any(kw in next_text.lower() for kw in ['agama', 'status', 'pekerjaan']):
result['kecamatan'] = next_text.upper()
# ===== AGAMA =====
if 'agama' in text_lower:
val = self._extract_after_label(text_normalized, 'agama')
# ===== AGAMA ===== (dengan fuzzy label matching)
if self._is_label_match(text, 'agama'):
val = self._extract_after_label(text_normalized, r'a?g{0,2}a?m{0,2}a')
if val and result['agama'] is None:
result['agama'] = val.upper()
elif result['agama'] is None and i + 1 < len(texts):
@@ -367,17 +886,18 @@ class KTPExtractor:
# ===== STATUS PERKAWINAN =====
if 'kawin' in text_lower:
if result['status_perkawinan'] is None:
val = self._extract_after_label(text_normalized, 'status.*kawin|perkawinan')
if val:
result['status_perkawinan'] = val.upper()
elif 'belum' in text_lower:
result['status_perkawinan'] = 'BELUM KAWIN'
elif 'kawin' in text_lower and 'cerai' not in text_lower:
result['status_perkawinan'] = 'KAWIN'
elif 'cerai hidup' in text_lower:
result['status_perkawinan'] = 'CERAI HIDUP'
elif 'cerai mati' in text_lower:
result['status_perkawinan'] = 'CERAI MATI'
# Check against official list first
text_upper = text.upper().replace("BELUMKAWIN", "BELUM KAWIN")
for status in self.STATUS_PERKAWINAN_LIST:
if status in text_upper:
result['status_perkawinan'] = status
break
# Fallback to extraction if not found in list
if result['status_perkawinan'] is None:
val = self._extract_after_label(text_normalized, 'status.*kawin|perkawinan')
if val:
result['status_perkawinan'] = val.upper()
# ===== PEKERJAAN =====
if 'pekerjaan' in text_lower:
@@ -430,6 +950,88 @@ class KTPExtractor:
if result['berlaku_hingga'] or i > len(texts) * 0.7:
result['tanggal_penerbitan'] = found_date
# ============================================
# AGGRESSIVE SCAN: Cari agama dari semua teks OCR
# ============================================
# Indonesia hanya punya 6 agama resmi, mudah dideteksi
if result['agama'] is None:
# Daftar agama dengan variasi penulisan
agama_patterns = {
'ISLAM': ['ISLAM', 'ISLM', 'ISIAM', 'ISLAMI'],
'KRISTEN': ['KRISTEN', 'KRISTEN PROTESTAN', 'PROTESTAN', 'KRISTN'],
'KATOLIK': ['KATOLIK', 'KATHOLIK', 'KATHOLK', 'KATOLIK ROMA', 'KATOLIK.'],
'HINDU': ['HINDU', 'HNDU', 'HINDU DHARMA', 'HINDHU'],
'BUDDHA': ['BUDDHA', 'BUDHA', 'BUDDA', 'BUDDHIS'],
'KONGHUCU': ['KONGHUCU', 'KHONGHUCU', 'KONGHUCHU', 'CONFUCIUS'],
}
for text in texts:
text_upper = text.upper().strip()
# Skip jika teks terlalu pendek atau terlalu panjang
if len(text_upper) < 4 or len(text_upper) > 30:
continue
for agama_std, variants in agama_patterns.items():
for variant in variants:
if variant in text_upper:
result['agama'] = agama_std
print(f" [AGAMA SCAN] Found '{variant}' in '{text_upper}' -> {agama_std}")
break
if result['agama']:
break
if result['agama']:
break
# ============================================
# AGGRESSIVE SCAN: Cari golongan darah dari semua teks OCR
# ============================================
# Golongan darah hanya 4: A, B, AB, O (dengan/tanpa rhesus +/-)
if result['gol_darah'] is None:
gol_darah_patterns = ['AB+', 'AB-', 'A+', 'A-', 'B+', 'B-', 'O+', 'O-', 'AB', 'A', 'B', 'O']
for text in texts:
text_upper = text.upper().strip()
# Hapus punctuation umum
text_clean = re.sub(r'[:\.\,\s]+', '', text_upper)
# Konversi 0 (nol) menjadi O (huruf) - OCR sering salah baca
text_clean = text_clean.replace('0', 'O')
# Skip jika teks terlalu panjang (bukan gol darah)
if len(text_clean) > 10:
continue
# Cari match untuk gol darah (dari panjang ke pendek untuk prioritas AB sebelum A/B)
for gol in gol_darah_patterns:
# Exact match setelah dibersihkan
if text_clean == gol:
result['gol_darah'] = gol
print(f" [GOL DARAH SCAN] Found '{text_upper}' -> {gol}")
break
# Match dengan prefix GOL
if text_clean == f"GOL{gol}" or text_clean == f"GOLDARAH{gol}":
result['gol_darah'] = gol
print(f" [GOL DARAH SCAN] Found '{text_upper}' -> {gol}")
break
# Match sebagai single character di akhir teks pendek
if len(text_clean) <= 3 and text_clean.endswith(gol):
result['gol_darah'] = gol
print(f" [GOL DARAH SCAN] Found '{text_upper}' -> {gol}")
break
if result['gol_darah']:
break
# ============================================
# AGGRESSIVE SCAN: Cari berlaku hingga dari semua teks OCR
# ============================================
if result['berlaku_hingga'] is None:
for text in texts:
text_upper = text.upper().strip()
if 'SEUMUR' in text_upper or 'HIDUP' in text_upper:
result['berlaku_hingga'] = 'SEUMUR HIDUP'
print(f" [BERLAKU SCAN] Found '{text_upper}' -> SEUMUR HIDUP")
break
# Post-processing
result = self._post_process(result)
@@ -505,6 +1107,21 @@ class KTPExtractor:
else:
result['nik'] = None
# Fix format tanggal lahir yang salah
# Pattern: DDMM-YYYY (contoh: 1608-1976) -> DD-MM-YYYY (16-08-1976)
if result['tanggal_lahir']:
tl = result['tanggal_lahir']
# Match DDMM-YYYY format (salah)
wrong_format = re.match(r'^(\d{2})(\d{2})-(\d{4})$', tl)
if wrong_format:
result['tanggal_lahir'] = f"{wrong_format.group(1)}-{wrong_format.group(2)}-{wrong_format.group(3)}"
print(f" [DATE FIX] '{tl}' -> '{result['tanggal_lahir']}'")
# Match DDMMYYYY format (tanpa separator)
no_sep_format = re.match(r'^(\d{2})(\d{2})(\d{4})$', tl)
if no_sep_format:
result['tanggal_lahir'] = f"{no_sep_format.group(1)}-{no_sep_format.group(2)}-{no_sep_format.group(3)}"
print(f" [DATE FIX] '{tl}' -> '{result['tanggal_lahir']}'")
# Clean all string values - remove leading colons and extra whitespace
for field in result:
if result[field] and isinstance(result[field], str):
@@ -540,6 +1157,54 @@ class KTPExtractor:
result['berlaku_hingga'] = 'SEUMUR HIDUP'
else:
result['berlaku_hingga'] = bh
else:
# Fallback: Sesuai peraturan pemerintah, e-KTP berlaku seumur hidup
# Berlaku untuk e-KTP yang diterbitkan sejak 2011
result['berlaku_hingga'] = 'SEUMUR HIDUP'
print(" [FALLBACK] berlaku_hingga = SEUMUR HIDUP (peraturan pemerintah)")
# ============================================
# Parse nama Bali jika terdeteksi
# ============================================
# Deteksi apakah ini KTP Bali berdasarkan:
# 1. Provinsi = BALI
# 2. NIK dimulai dengan 51 (kode Bali)
# 3. Nama mengandung komponen khas Bali (NI, I GUSTI, dll)
is_bali = False
if result.get('provinsi') and 'BALI' in result['provinsi'].upper():
is_bali = True
elif result.get('nik') and result['nik'].startswith('51'):
is_bali = True
elif result.get('nama'):
nama_upper = result['nama'].upper()
# Cek apakah nama dimulai dengan prefix Bali
if nama_upper.startswith('NI') or nama_upper.startswith('IGUSTI') or \
nama_upper.startswith('IDABAGUS') or nama_upper.startswith('IDAAYU') or \
any(nama_upper.startswith(p) for p in ['GUSTI', 'WAYAN', 'MADE', 'NYOMAN', 'KETUT', 'PUTU', 'KADEK', 'KOMANG']):
is_bali = True
if is_bali and result.get('nama'):
result['nama'] = self._parse_balinese_name(result['nama'])
# ============================================
# Validasi dan koreksi Agama
# ============================================
if result.get('agama'):
agama = result['agama'].upper().strip()
# Fuzzy match terhadap daftar agama valid
agama_match = None
best_ratio = 0
for valid_agama in self.AGAMA_LIST:
ratio = difflib.SequenceMatcher(None, agama, valid_agama.upper()).ratio()
if ratio > best_ratio and ratio > 0.6:
best_ratio = ratio
agama_match = valid_agama.upper()
if agama_match:
if agama_match != agama:
print(f" [AGAMA VALIDATE] '{agama}' -> '{agama_match}' (ratio={best_ratio:.2f})")
result['agama'] = agama_match
# Tidak ada fallback otomatis untuk agama - harus dari OCR
# Fix merged kabupaten/kota names (e.g., JAKARTASELATAN -> JAKARTA SELATAN)
if result['kabupaten_kota']:
@@ -572,6 +1237,29 @@ class KTPExtractor:
alamat = re.sub(r'\b(NO|BLOK)(\d+|[A-Z])\b', r'\1 \2', alamat, flags=re.IGNORECASE)
result['alamat'] = alamat.upper()
# ============================================
# Cross-validation: Tempat Lahir vs Kel/Desa
# ============================================
# Pada KTP, tempat lahir sering sama dengan desa/kelurahan
# Jika tempat_lahir mirip dengan kel_desa, gunakan yang tervalidasi
if result.get('tempat_lahir') and result.get('kel_desa'):
tl = result['tempat_lahir'].upper()
kd = result['kel_desa'].upper()
# Hitung similarity
ratio = difflib.SequenceMatcher(None, tl, kd).ratio()
if ratio > 0.7:
# Tempat lahir mirip dengan kel/desa, gunakan kel/desa yang sudah divalidasi
print(f" [CROSS-VALIDATE] Tempat Lahir '{tl}' mirip dengan Kel/Desa '{kd}' (ratio={ratio:.2f})")
result['tempat_lahir'] = kd
elif ratio > 0.5:
# Cukup mirip, log untuk debugging
print(f" [CROSS-VALIDATE] Tempat Lahir '{tl}' mungkin sama dengan Kel/Desa '{kd}' (ratio={ratio:.2f})")
# Jika tempat_lahir kosong tapi kel_desa ada, mungkin sama
# (tidak otomatis mengisi karena bisa beda)
return result

48
migrate_db.py Normal file
View File

@@ -0,0 +1,48 @@
import os
import pymysql
from database import DB_CONFIG
def migrate_db():
conn = pymysql.connect(
host=DB_CONFIG['host'],
port=int(DB_CONFIG['port']),
user=DB_CONFIG['user'],
password=DB_CONFIG['password'],
database=DB_CONFIG['database']
)
try:
with conn.cursor() as cursor:
# Check if column exists
print("Checking schema...")
cursor.execute("SHOW COLUMNS FROM ktp_records LIKE 'image_path'")
result = cursor.fetchone()
if not result:
print("Adding image_path column to ktp_records...")
cursor.execute("ALTER TABLE ktp_records ADD COLUMN image_path VARCHAR(255) NULL AFTER berlaku_hingga")
conn.commit()
print("Migration successful: Added image_path column.")
else:
print("Column image_path already exists in KTP. No migration needed.")
# Check KK
print("Checking KK schema...")
cursor.execute("SHOW COLUMNS FROM kk_records LIKE 'image_path'")
result_kk = cursor.fetchone()
if not result_kk:
print("Adding image_path column to kk_records...")
cursor.execute("ALTER TABLE kk_records ADD COLUMN image_path VARCHAR(255) NULL AFTER kode_pos")
conn.commit()
print("Migration successful: Added image_path column to KK.")
else:
print("Column image_path already exists in KK.")
except Exception as e:
print(f"Migration error: {e}")
finally:
conn.close()
if __name__ == "__main__":
migrate_db()

138
models.py Normal file
View File

@@ -0,0 +1,138 @@
"""
Database Models for OCR Application
"""
from datetime import datetime
from database import db
class KTPRecord(db.Model):
"""Model untuk menyimpan data KTP hasil OCR"""
__tablename__ = 'ktp_records'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
nik = db.Column(db.String(16), unique=True, nullable=True, index=True)
nama = db.Column(db.String(100), nullable=True)
tempat_lahir = db.Column(db.String(50), nullable=True)
tanggal_lahir = db.Column(db.String(20), nullable=True)
jenis_kelamin = db.Column(db.String(20), nullable=True)
gol_darah = db.Column(db.String(5), nullable=True)
alamat = db.Column(db.Text, nullable=True)
rt_rw = db.Column(db.String(10), nullable=True)
kel_desa = db.Column(db.String(50), nullable=True)
kecamatan = db.Column(db.String(50), nullable=True)
kabupaten_kota = db.Column(db.String(50), nullable=True)
provinsi = db.Column(db.String(50), nullable=True)
agama = db.Column(db.String(20), nullable=True)
status_perkawinan = db.Column(db.String(30), nullable=True)
pekerjaan = db.Column(db.String(50), nullable=True)
kewarganegaraan = db.Column(db.String(10), nullable=True)
berlaku_hingga = db.Column(db.String(20), nullable=True)
image_path = db.Column(db.String(255), nullable=True) # Path to saved KTP image
raw_text = db.Column(db.Text, nullable=True)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
def to_dict(self):
"""Convert model to dictionary"""
return {
'id': self.id,
'nik': self.nik,
'nama': self.nama,
'tempat_lahir': self.tempat_lahir,
'tanggal_lahir': self.tanggal_lahir,
'jenis_kelamin': self.jenis_kelamin,
'gol_darah': self.gol_darah,
'alamat': self.alamat,
'rt_rw': self.rt_rw,
'kel_desa': self.kel_desa,
'kecamatan': self.kecamatan,
'kabupaten_kota': self.kabupaten_kota,
'provinsi': self.provinsi,
'agama': self.agama,
'status_perkawinan': self.status_perkawinan,
'pekerjaan': self.pekerjaan,
'kewarganegaraan': self.kewarganegaraan,
'berlaku_hingga': self.berlaku_hingga,
'image_path': self.image_path,
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}
@classmethod
def from_ocr_data(cls, ocr_data, raw_text=None):
"""Create KTPRecord from OCR extracted data"""
return cls(
nik=ocr_data.get('nik'),
nama=ocr_data.get('nama'),
tempat_lahir=ocr_data.get('tempat_lahir'),
tanggal_lahir=ocr_data.get('tanggal_lahir'),
jenis_kelamin=ocr_data.get('jenis_kelamin'),
gol_darah=ocr_data.get('gol_darah'),
alamat=ocr_data.get('alamat'),
rt_rw=ocr_data.get('rt_rw'),
kel_desa=ocr_data.get('kel_desa'),
kecamatan=ocr_data.get('kecamatan'),
kabupaten_kota=ocr_data.get('kabupaten_kota'),
provinsi=ocr_data.get('provinsi'),
agama=ocr_data.get('agama'),
status_perkawinan=ocr_data.get('status_perkawinan'),
pekerjaan=ocr_data.get('pekerjaan'),
kewarganegaraan=ocr_data.get('kewarganegaraan'),
berlaku_hingga=ocr_data.get('berlaku_hingga'),
raw_text=raw_text
)
class KKRecord(db.Model):
"""Model untuk menyimpan data Kartu Keluarga hasil OCR"""
__tablename__ = 'kk_records'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
no_kk = db.Column(db.String(16), unique=True, nullable=True, index=True)
kepala_keluarga = db.Column(db.String(100), nullable=True)
alamat = db.Column(db.Text, nullable=True)
rt_rw = db.Column(db.String(10), nullable=True)
kel_desa = db.Column(db.String(50), nullable=True)
kecamatan = db.Column(db.String(50), nullable=True)
kabupaten_kota = db.Column(db.String(50), nullable=True)
provinsi = db.Column(db.String(50), nullable=True)
kode_pos = db.Column(db.String(10), nullable=True)
image_path = db.Column(db.String(255), nullable=True) # Path to saved KK image
raw_text = db.Column(db.Text, nullable=True)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
def to_dict(self):
"""Convert model to dictionary"""
return {
'id': self.id,
'no_kk': self.no_kk,
'kepala_keluarga': self.kepala_keluarga,
'alamat': self.alamat,
'rt_rw': self.rt_rw,
'kel_desa': self.kel_desa,
'kecamatan': self.kecamatan,
'kabupaten_kota': self.kabupaten_kota,
'provinsi': self.provinsi,
'kode_pos': self.kode_pos,
'image_path': self.image_path,
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}
@classmethod
def from_ocr_data(cls, ocr_data, raw_text=None):
"""Create KKRecord from OCR extracted data"""
return cls(
no_kk=ocr_data.get('no_kk'),
kepala_keluarga=ocr_data.get('kepala_keluarga'),
alamat=ocr_data.get('alamat'),
rt_rw=ocr_data.get('rt_rw'),
kel_desa=ocr_data.get('kel_desa'),
kecamatan=ocr_data.get('kecamatan'),
kabupaten_kota=ocr_data.get('kabupaten_kota'),
provinsi=ocr_data.get('provinsi'),
kode_pos=ocr_data.get('kode_pos'),
raw_text=raw_text
)

View File

@@ -20,16 +20,19 @@ class OCREngine:
def preprocess_image(self, image_path: str) -> np.ndarray:
"""
Preprocessing gambar untuk hasil OCR lebih baik
Enhanced preprocessing untuk hasil OCR lebih baik
Based on Context7 OpenCV documentation:
- Resize jika terlalu besar
- Enhance contrast
- Denoising untuk mengurangi noise
- CLAHE untuk adaptive histogram equalization
- Sharpening untuk teks lebih jelas
"""
img = cv2.imread(image_path)
if img is None:
raise ValueError(f"Tidak dapat membaca gambar: {image_path}")
# Resize jika terlalu besar (max 2000px)
max_dim = 2000
# Resize jika terlalu besar (max 1500px - optimized for speed)
max_dim = 1500
height, width = img.shape[:2]
if max(height, width) > max_dim:
scale = max_dim / max(height, width)
@@ -38,12 +41,20 @@ class OCREngine:
# Convert ke grayscale untuk preprocessing
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Enhance contrast menggunakan CLAHE
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# Denoise (from Context7) - mengurangi noise tanpa blur teks
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
# Enhanced CLAHE untuk dokumen (from Context7)
# clipLimit lebih tinggi untuk kontras lebih baik
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
enhanced = clahe.apply(denoised)
# Sharpen using kernel (from Context7)
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], dtype=np.float32)
sharpened = cv2.filter2D(enhanced, -1, kernel)
# Convert kembali ke BGR untuk PaddleOCR
enhanced_bgr = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)
enhanced_bgr = cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR)
return enhanced_bgr

View File

@@ -3,3 +3,6 @@ paddleocr
flask
pillow
opencv-python
pymysql
flask-sqlalchemy
requests

View File

@@ -175,6 +175,121 @@ header h1 {
max-height: 400px;
border-radius: var(--radius);
cursor: pointer;
display: block;
margin: 0 auto;
}
/* Crop Container */
.crop-container {
position: relative;
max-width: 100%;
margin-bottom: 1rem;
overflow: hidden;
border-radius: var(--radius);
background: #000;
}
.crop-area {
position: absolute;
top: 0;
left: 0;
pointer-events: none;
/* Let clicks pass through, handles catch them */
}
/* Perspective Crop Handles */
.crop-handle {
position: absolute;
width: 20px;
height: 20px;
background: var(--accent-primary);
border: 2px solid #fff;
border-radius: 50%;
transform: translate(-50%, -50%);
cursor: move;
pointer-events: auto;
z-index: 10;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.5);
transition: transform 0.1s ease;
}
.crop-handle:hover,
.crop-handle.active {
background: #fff;
border-color: var(--accent-primary);
transform: translate(-50%, -50%) scale(1.2);
}
/* Crop Actions & Controls */
.crop-actions-container {
display: flex;
flex-direction: column;
gap: 1rem;
margin-top: 1rem;
padding: 1rem;
background: var(--bg-secondary);
border-radius: var(--radius);
border: 1px solid var(--border);
}
.rotation-control {
display: flex;
align-items: center;
gap: 1rem;
color: var(--text-secondary);
}
.rotation-control label {
font-weight: 500;
min-width: 80px;
}
.rotation-control input[type="range"] {
flex: 1;
cursor: pointer;
accent-color: var(--accent-primary);
}
.crop-buttons {
display: flex;
gap: 1rem;
justify-content: center;
}
.crop-action-btn {
padding: 0.5rem 1rem;
border: none;
border-radius: var(--radius);
cursor: pointer;
font-weight: 500;
transition: all 0.2s ease;
flex: 1;
}
.crop-action-btn.primary {
background: var(--accent-gradient);
color: white;
}
.crop-action-btn.primary:hover:not(:disabled) {
transform: translateY(-2px);
box-shadow: var(--shadow);
}
.crop-action-btn.primary:disabled {
opacity: 0.6;
cursor: not-allowed;
}
.crop-action-btn.secondary {
background: var(--bg-tertiary);
color: var(--text-secondary);
border: 1px solid var(--border);
}
.crop-action-btn.secondary:hover {
background: var(--bg-primary);
color: var(--text-primary);
}
/* Process Button */
@@ -533,6 +648,290 @@ footer a:hover {
border-radius: 4px;
}
::-webkit-scrollbar-thumb:hover {
background: var(--text-muted);
/* Archive Header Button */
.header-actions {
display: flex;
gap: 1rem;
justify-content: center;
margin-top: 1rem;
}
.archive-header-btn {
padding: 0.5rem 1.25rem;
background: transparent;
border: 1px solid var(--accent-secondary);
color: var(--accent-secondary);
border-radius: var(--radius);
cursor: pointer;
font-weight: 600;
transition: all 0.2s ease;
}
.archive-header-btn:hover {
background: var(--accent-primary);
color: white;
border-color: var(--accent-primary);
}
/* Modal Styles */
.modal {
position: fixed;
z-index: 1000;
left: 0;
top: 0;
width: 100%;
height: 100%;
background-color: rgba(0, 0, 0, 0.7);
backdrop-filter: blur(4px);
overflow: auto;
animation: fadeIn 0.3s;
}
@keyframes fadeIn {
from {
opacity: 0;
}
to {
opacity: 1;
}
}
.modal-content {
background-color: var(--bg-secondary);
margin: 5% auto;
padding: 2rem;
border: 1px solid var(--border);
border-radius: var(--radius-lg);
width: 90%;
max-width: 1000px;
box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.5);
position: relative;
animation: slideDown 0.3s;
}
@keyframes slideDown {
from {
transform: translateY(-50px);
opacity: 0;
}
to {
transform: translateY(0);
opacity: 1;
}
}
.modal-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 2rem;
padding-bottom: 1rem;
border-bottom: 1px solid var(--border);
}
.modal-header h2 {
color: var(--text-primary);
font-size: 1.5rem;
}
.close-btn {
background: transparent;
border: none;
color: var(--text-muted);
font-size: 2rem;
cursor: pointer;
line-height: 1;
transition: color 0.2s;
}
.close-btn:hover {
color: var(--text-primary);
}
/* Archive List Grid */
.archive-list {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(250px, 1fr));
gap: 1.5rem;
max-height: 70vh;
overflow-y: auto;
padding-right: 0.5rem;
}
.archive-card {
background: var(--bg-tertiary);
border: 1px solid var(--border);
border-radius: var(--radius);
overflow: hidden;
transition: transform 0.2s, box-shadow 0.2s;
display: flex;
flex-direction: column;
}
.archive-card:hover {
transform: translateY(-4px);
box-shadow: var(--shadow-lg);
border-color: var(--accent-primary);
}
.archive-card-img {
width: 100%;
height: 160px;
background: #000;
display: flex;
align-items: center;
justify-content: center;
overflow: hidden;
}
.archive-card-img img {
width: 100%;
height: 100%;
object-fit: cover;
transition: transform 0.3s;
}
.archive-card:hover .archive-card-img img {
transform: scale(1.05);
}
.archive-card-content {
padding: 1rem;
flex: 1;
display: flex;
flex-direction: column;
}
.archive-card-content h3 {
font-size: 1rem;
margin-bottom: 0.5rem;
color: var(--text-primary);
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.archive-card-meta {
margin-bottom: 1rem;
font-size: 0.8rem;
color: var(--text-muted);
display: flex;
flex-direction: column;
gap: 0.25rem;
}
.archive-card-actions {
margin-top: auto;
}
.view-btn {
width: 100%;
padding: 0.5rem;
background: var(--bg-primary);
border: 1px solid var(--border);
color: var(--text-secondary);
border-radius: 6px;
cursor: pointer;
font-size: 0.875rem;
transition: all 0.2s;
}
.view-btn:hover {
background: var(--accent-primary);
color: white;
border-color: var(--accent-primary);
}
.archive-loading,
.archive-empty {
text-align: center;
padding: 3rem;
color: var(--text-muted);
font-size: 1.1rem;
}
/* Print Styles */
@media print {
/* Reset Page */
@page {
margin: 0;
size: auto;
}
body {
margin: 0;
padding: 0;
background: white !important;
/* Ensure no scroll or extra pages from hidden content */
height: 100vh !important;
overflow: hidden !important;
}
/* Hide EVERYTHING initially with high specificity */
body * {
visibility: hidden !important;
display: none !important;
/* Force display none to remove layout space */
}
/* Show ONLY Print Area and its children */
#printArea,
#printArea * {
visibility: visible !important;
display: flex !important;
/* Restore display for parent */
}
/* Reset display for children of printArea specifically */
#printArea * {
display: block !important;
/* Default to block or whatever needed */
}
/* Specific fix for image inside */
#printArea img {
display: inline-block !important;
}
#printArea {
position: fixed !important;
/* Fixed helps detach from flow */
left: 0 !important;
top: 0 !important;
width: 100% !important;
height: 100% !important;
overflow: visible !important;
z-index: 99999 !important;
background: white !important;
display: flex !important;
justify-content: center;
align-items: flex-start;
padding-top: 5cm;
/* Adjust padding as needed */
}
.ktp-print-size {
/* Standar ISO/IEC 7810 ID-1: 85.60 × 53.98 mm */
width: 85.60mm !important;
height: 53.98mm !important;
max-width: none !important;
max-height: none !important;
border: 1px dashed #ccc;
box-shadow: none !important;
/* Remove any shadow */
}
.a4-print-size {
/* A4 Landscape: 297mm x 210mm */
/* Use slightly less to account for margins if necessary, but standard is distinct */
width: 297mm !important;
height: 210mm !important;
max-width: none !important;
max-height: none !important;
border: none;
}
}

View File

@@ -13,6 +13,12 @@
<header>
<h1>📄 OCR KTP/KK</h1>
<p class="subtitle">Pembaca Dokumen Indonesia Offline</p>
<div class="header-actions">
<button id="reloadBtn" class="archive-header-btn secondary" title="Reload halaman">🔄 Reset /
Baru</button>
<button id="archiveBtn" class="archive-header-btn">📂 Arsip KTP</button>
<button id="archiveKKBtn" class="archive-header-btn">📂 Arsip KK</button>
</div>
</header>
<main>
@@ -40,7 +46,42 @@
</label>
<p class="file-types">PNG, JPG, JPEG, BMP, WEBP (max 16MB)</p>
</div>
<img id="preview" class="preview-image" style="display: none;">
<!-- Crop Container -->
<div id="cropContainer" class="crop-container" style="display: none;">
<!-- Canvas for editing (rotation & crop) -->
<canvas id="cropCanvas" class="preview-image"></canvas>
<!-- Keep img for simple viewing if needed, or just use canvas. Let's rely on canvas for editor -->
<img id="preview" class="preview-image" style="display: none;">
<div id="cropArea" class="crop-area">
<svg width="100%" height="100%"
style="position: absolute; top:0; left:0; overflow:visible;">
<polygon id="cropPolygon" points=""
style="fill: rgba(255, 255, 255, 0.1); stroke: var(--accent-primary); stroke-width: 2; vector-effect: non-scaling-stroke;">
</polygon>
</svg>
<!-- Handles TL, TR, BR, BL -->
<div class="crop-handle" data-index="0"></div>
<div class="crop-handle" data-index="1"></div>
<div class="crop-handle" data-index="2"></div>
<div class="crop-handle" data-index="3"></div>
</div>
</div>
</div>
<!-- Crop Actions -->
<div id="cropActions" class="crop-actions-container" style="display: none;">
<div class="rotation-control">
<label for="rotationSlider">Rotasi: <span id="rotationValue"></span></label>
<input type="range" id="rotationSlider" min="-45" max="45" value="0" step="1">
</div>
<div class="crop-buttons">
<button type="button" id="resetCropBtn" class="crop-action-btn secondary">🔄 Reset</button>
<button type="button" id="applyCropBtn" class="crop-action-btn primary">✂️ Terapkan
Crop</button>
</div>
</div>
<button id="processBtn" class="process-btn" disabled>
@@ -54,8 +95,11 @@
<div class="results-header">
<h2>📋 Hasil Ekstraksi</h2>
<div class="results-actions">
<button class="action-btn" id="copyBtn" title="Copy JSON">📋 Copy</button>
<button class="action-btn" id="exportBtn" title="Export JSON">💾 Export</button>
<button class="action-btn secondary" id="printBtn">🖨️ Cetak</button>
<button class="action-btn secondary" id="downloadBtn">⬇️ Unduh</button>
<button class="action-btn primary" id="saveBtn" title="Simpan KTP">💾 Simpan</button>
<button class="action-btn" id="copyBtn" title="Copy Text (Word)">📋 Copy</button>
<button class="action-btn" id="exportBtn" title="Download Excel (.xlsx)">📤 Excel</button>
<button class="action-btn secondary" id="toggleRaw">📝 Raw Text</button>
</div>
</div>
@@ -88,6 +132,52 @@
</section>
</main>
<!-- Login Modal -->
<div id="loginModal" class="modal" style="display: none;">
<div class="modal-content" style="max-width: 400px;">
<div class="modal-header">
<h2>🔐 Login Arsip</h2>
<span class="close-btn" id="closeLoginBtn">&times;</span>
</div>
<div class="modal-body">
<p style="margin-bottom:1rem; color:var(--text-secondary);">Masukkan password untuk mengakses arsip
(Default: admin / 123).</p>
<div class="form-group">
<label>Username</label>
<input type="text" id="loginUser" class="form-control" value="admin">
</div>
<div class="form-group">
<label>Password</label>
<input type="password" id="loginPass" class="form-control" placeholder="Password">
</div>
<div id="loginError"
style="color:var(--text-error); display:none; margin-bottom:1rem; font-size:0.9rem;"></div>
<button id="submitLoginBtn" class="action-btn primary" style="width:100%;">Masuk</button>
</div>
</div>
</div>
<!-- Archive Modal -->
<div id="archiveModal" class="modal" style="display: none;">
<div class="modal-content">
<div class="modal-header">
<h2>📂 Arsip KTP</h2>
<button id="closeModalBtn" class="close-btn">&times;</button>
</div>
<div class="modal-body">
<div id="archiveList" class="archive-list">
<!-- Cards will be loaded here -->
</div>
<div id="archiveLoading" class="archive-loading" style="display: none;">
⏳ Memuat...
</div>
<div id="archiveEmpty" class="archive-empty" style="display: none;">
Belum ada KTP yang disimpan
</div>
</div>
</div>
</div>
<footer>
<p>OCR menggunakan <a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">PaddleOCR</a> • Data
diproses secara lokal</p>
@@ -97,8 +187,10 @@
<script>
// State
let selectedFile = null;
let docType = 'ktp';
let originalImageObject = null; // For cropping
let extractedData = null;
let currentDocType = 'ktp'; // Default
let currentArchiveType = 'ktp'; // Default for archive view
// Elements
const dropzone = document.getElementById('dropzone');
@@ -146,7 +238,7 @@
btn.addEventListener('click', () => {
docBtns.forEach(b => b.classList.remove('active'));
btn.classList.add('active');
docType = btn.dataset.type;
currentDocType = btn.dataset.type;
});
});
@@ -177,12 +269,20 @@
});
// Click on dropzone
// Click on dropzone - DISABLED (User request: Only 'Pilih File' button should work)
dropzone.addEventListener('click', (e) => {
if (e.target === dropzone || e.target.closest('.dropzone-content')) {
fileInput.click();
}
// Do nothing. Label 'file-btn' handles clicks on itself automatically.
// preventing accidental uploads when clicking background/crop area.
});
// Canvas & Rotation Variables
const cropCanvas = document.getElementById('cropCanvas');
const rotationSlider = document.getElementById('rotationSlider');
const rotationValue = document.getElementById('rotationValue');
let currentRotation = 0;
// let originalImageObject = null; // Store Image object for redraws - moved to global state
function handleFile(file) {
if (!file.type.startsWith('image/')) {
showError('File harus berupa gambar');
@@ -194,14 +294,36 @@
return;
}
originalFile = file;
selectedFile = file;
currentRotation = 0;
updateRotationUI();
// Show preview
// Load image
const reader = new FileReader();
reader.onload = (e) => {
preview.src = e.target.result;
preview.style.display = 'block';
dropzone.querySelector('.dropzone-content').style.display = 'none';
originalImageData = e.target.result;
// Create Image object
const img = new Image();
img.onload = () => {
originalImageObject = img;
preview.src = e.target.result; // Keep this for backup/debugging
// Render to canvas
renderEditor();
cropCanvas.style.display = 'block';
preview.style.display = 'none';
cropContainer.style.display = 'block';
dropzone.querySelector('.dropzone-content').style.display = 'none';
cropActions.style.display = 'flex';
// Init crop area after first render
// Small timeout to ensure layout is done
setTimeout(initCropArea, 50);
};
img.src = e.target.result;
};
reader.readAsDataURL(file);
@@ -210,6 +332,272 @@
resultsSection.style.display = 'none';
}
// Handle Rotation
rotationSlider.addEventListener('input', (e) => {
currentRotation = parseInt(e.target.value);
updateRotationUI();
renderEditor();
});
function updateRotationUI() {
rotationSlider.value = currentRotation;
rotationValue.textContent = currentRotation + '°';
}
function renderEditor() {
if (!originalImageObject) return;
const angleRad = currentRotation * Math.PI / 180;
const sin = Math.abs(Math.sin(angleRad));
const cos = Math.abs(Math.cos(angleRad));
// Calculate new bounding box size
const width = originalImageObject.naturalWidth;
const height = originalImageObject.naturalHeight;
const newWidth = width * cos + height * sin;
const newHeight = width * sin + height * cos;
// Set canvas internal size
cropCanvas.width = newWidth;
cropCanvas.height = newHeight;
const ctx = cropCanvas.getContext('2d');
// Clear & Draw
ctx.clearRect(0, 0, newWidth, newHeight);
ctx.save();
ctx.translate(newWidth / 2, newHeight / 2);
ctx.rotate(angleRad);
ctx.drawImage(originalImageObject, -width / 2, -height / 2);
ctx.restore();
}
// Crop functionality (Perspective / 4-Point)
const cropContainer = document.getElementById('cropContainer');
// Prevent clicks in crop area from triggering file upload
cropContainer.addEventListener('click', (e) => {
e.stopPropagation();
});
cropContainer.addEventListener('mousedown', (e) => {
e.stopPropagation();
});
const cropArea = document.getElementById('cropArea');
const cropPolygon = document.getElementById('cropPolygon');
const cropActions = document.getElementById('cropActions');
const resetCropBtn = document.getElementById('resetCropBtn');
const applyCropBtn = document.getElementById('applyCropBtn');
let originalFile = null;
let originalImageData = null;
let cropPoints = []; // [{x,y}, {x,y}, {x,y}, {x,y}]
let isDragging = false;
let activeHandleIndex = null;
// KTP aspect ratio: 85.6mm x 53.98mm = ~1.586
const KTP_ASPECT_RATIO = 85.6 / 53.98;
// KK aspect ratio (A4 Landscape): 297mm x 210mm = ~1.414
const KK_ASPECT_RATIO = 297 / 210;
function initCropArea() {
// Match cropArea size/pos to canvas size/pos
cropArea.style.left = cropCanvas.offsetLeft + 'px';
cropArea.style.top = cropCanvas.offsetTop + 'px';
cropArea.style.width = cropCanvas.offsetWidth + 'px';
cropArea.style.height = cropCanvas.offsetHeight + 'px';
const w = cropCanvas.offsetWidth;
const h = cropCanvas.offsetHeight;
// Initialize default box (Centered Rectangle with appropriate ratio)
const targetRatio = currentDocType === 'kk' ? KK_ASPECT_RATIO : KTP_ASPECT_RATIO;
let boxW, boxH;
if (w / h > targetRatio) {
boxH = h * 0.7;
boxW = boxH * targetRatio;
} else {
boxW = w * 0.7;
boxH = boxW / targetRatio;
}
const cx = w / 2;
const cy = h / 2;
// 0: TL, 1: TR, 2: BR, 3: BL
cropPoints = [
{ x: cx - boxW / 2, y: cy - boxH / 2 },
{ x: cx + boxW / 2, y: cy - boxH / 2 },
{ x: cx + boxW / 2, y: cy + boxH / 2 },
{ x: cx - boxW / 2, y: cy + boxH / 2 }
];
updateCropVisuals();
cropArea.style.display = 'block';
}
function updateCropVisuals() {
// Update Handles
const handles = cropArea.querySelectorAll('.crop-handle');
cropPoints.forEach((p, i) => {
if (handles[i]) {
handles[i].style.left = p.x + 'px';
handles[i].style.top = p.y + 'px';
}
});
// Update Polygon
// SVG points format: x1,y1 x2,y2 ...
const pointsStr = cropPoints.map(p => `${p.x},${p.y}`).join(' ');
cropPolygon.setAttribute('points', pointsStr);
}
// Handle Dragging
const handles = cropArea.querySelectorAll('.crop-handle');
handles.forEach(handle => {
handle.addEventListener('mousedown', (e) => {
e.stopPropagation();
activeHandleIndex = parseInt(handle.dataset.index);
isDragging = true;
});
handle.addEventListener('touchstart', (e) => {
e.stopPropagation();
activeHandleIndex = parseInt(handle.dataset.index);
isDragging = true;
}, { passive: false });
});
document.addEventListener('mousemove', handleDragMove);
document.addEventListener('touchmove', handleDragMove, { passive: false });
document.addEventListener('mouseup', handleDragEnd);
document.addEventListener('touchend', handleDragEnd);
// Also allow updating cropArea size on window resize (since canvas might resize)
window.addEventListener('resize', () => {
if (cropCanvas.offsetParent) {
cropArea.style.left = cropCanvas.offsetLeft + 'px';
cropArea.style.top = cropCanvas.offsetTop + 'px';
cropArea.style.width = cropCanvas.offsetWidth + 'px';
cropArea.style.height = cropCanvas.offsetHeight + 'px';
}
});
function getEventPos(e) {
const rect = cropArea.getBoundingClientRect();
let clientX, clientY;
if (e.touches && e.touches.length > 0) {
clientX = e.touches[0].clientX;
clientY = e.touches[0].clientY;
} else {
clientX = e.clientX;
clientY = e.clientY;
}
return {
x: clientX - rect.left,
y: clientY - rect.top
};
}
function handleDragMove(e) {
if (!isDragging || activeHandleIndex === null) return;
e.preventDefault();
const pos = getEventPos(e);
// Constrain to bounds
// Allow slightly outside? No, keep inside for UI sanity
const x = Math.max(0, Math.min(pos.x, cropArea.offsetWidth));
const y = Math.max(0, Math.min(pos.y, cropArea.offsetHeight));
cropPoints[activeHandleIndex] = { x, y };
updateCropVisuals();
}
function handleDragEnd() {
isDragging = false;
activeHandleIndex = null;
}
// Hook rotation to reset crop
rotationSlider.addEventListener('input', () => {
// We need to re-init crop area because canvas content changed
initCropArea();
});
// Reset crop button
resetCropBtn.addEventListener('click', () => {
currentRotation = 0;
updateRotationUI();
if (originalImageObject) {
renderEditor();
cropCanvas.style.display = 'block';
preview.style.display = 'none';
setTimeout(initCropArea, 50);
selectedFile = originalFile;
}
});
// Apply crop button (Perspective Transform API)
applyCropBtn.addEventListener('click', async () => {
if (!originalImageObject) return;
applyCropBtn.disabled = true;
applyCropBtn.textContent = '⏳ Memproses...';
try {
// 1. Get current Canvas blob (Rotated image)
const canvasBlob = await new Promise(resolve => cropCanvas.toBlob(resolve, 'image/jpeg', 0.95));
// 2. Calculate points relative to internal canvas resolution
const scaleX = cropCanvas.width / cropCanvas.offsetWidth;
const scaleY = cropCanvas.height / cropCanvas.offsetHeight;
// Map points to actual image coordinates
// Note: The backend expects [TL, TR, BR, BL] which is how cropPoints is ordered [0,1,2,3]
const realPoints = cropPoints.map(p => [p.x * scaleX, p.y * scaleY]);
// 3. Send to server
const formData = new FormData();
formData.append('image', canvasBlob, 'rotated_temp.jpg');
formData.append('image', canvasBlob, 'rotated_temp.jpg');
formData.append('points', JSON.stringify(realPoints));
formData.append('doc_type', currentDocType);
const response = await fetch('/api/transform-perspective', {
method: 'POST',
body: formData
});
const result = await response.json();
if (result.success) {
// Update preview with transformed image
preview.src = result.image_url;
preview.style.display = 'block';
cropCanvas.style.display = 'none';
cropArea.style.display = 'none';
// Fetch blob to update selectedFile
const resBlob = await fetch(result.image_url).then(r => r.blob());
selectedFile = new File([resBlob], result.filename || 'perspective_cropped.jpg', { type: 'image/jpeg' });
} else {
showError('Gagal transformasi: ' + result.error);
}
} catch (error) {
console.error('Crop error:', error);
showError('Gagal memproses crop: ' + error.message);
} finally {
applyCropBtn.disabled = false;
applyCropBtn.textContent = '✂️ Terapkan Crop';
}
});
// Process button
processBtn.addEventListener('click', async () => {
if (!selectedFile) return;
@@ -224,7 +612,7 @@
try {
const formData = new FormData();
formData.append('file', selectedFile);
formData.append('doc_type', docType);
formData.append('doc_type', currentDocType);
const response = await fetch('/upload', {
method: 'POST',
@@ -235,6 +623,10 @@
if (result.success) {
extractedData = result.data;
currentDocType = result.doc_type || 'ktp';
if (result.validation) {
validationResult = result.validation;
}
displayResults(result);
hideError();
} else {
@@ -526,24 +918,61 @@
rawTextSection.style.display = isVisible ? 'none' : 'block';
});
// Copy to clipboard
// Copy to clipboard (Formatted Text for Word)
document.getElementById('copyBtn').addEventListener('click', () => {
if (extractedData) {
navigator.clipboard.writeText(JSON.stringify(extractedData, null, 2))
.then(() => alert('Data berhasil disalin!'));
// Format as Key: Value text
const excludeKeys = ['raw_text', 'image_path', 'id', 'created_at', 'updated_at'];
const text = Object.entries(extractedData)
.filter(([k, v]) => !excludeKeys.includes(k) && v)
.map(([k, v]) => {
const label = k.replace(/_/g, ' ').toUpperCase();
return `${label}: ${v}`;
})
.join('\n');
navigator.clipboard.writeText(text)
.then(() => alert('Data berhasil disalin (Format Teks)! Bisa dipaste di Word/Notepad.'));
}
});
// Export JSON
document.getElementById('exportBtn').addEventListener('click', () => {
if (extractedData) {
const blob = new Blob([JSON.stringify(extractedData, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `${docType}_data.json`;
a.click();
URL.revokeObjectURL(url);
// Export Excel (Real .xlsx via Backend)
document.getElementById('exportBtn').addEventListener('click', async () => {
if (!extractedData) return;
const btn = document.getElementById('exportBtn');
const originalText = btn.innerHTML;
btn.innerHTML = '⏳...';
btn.disabled = true;
try {
const res = await fetch('/api/export-excel', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(extractedData)
});
if (res.ok) {
const blob = await res.blob();
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
const filename = extractedData.nik ? `Data_KTP_${extractedData.nik}.xlsx` : 'Data_KTP.xlsx';
a.download = filename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
window.URL.revokeObjectURL(url);
} else {
const err = await res.json();
alert('Gagal export excel: ' + (err.error || 'Unknown error'));
}
} catch (e) {
console.error(e);
alert('Error export excel.');
} finally {
btn.innerHTML = originalText;
btn.disabled = false;
}
});
@@ -556,14 +985,371 @@
errorSection.style.display = 'none';
}
// Reset on new file selection
preview.addEventListener('click', () => {
preview.style.display = 'none';
dropzone.querySelector('.dropzone-content').style.display = 'flex';
selectedFile = null;
processBtn.disabled = true;
fileInput.value = '';
// Save KTP Button
const saveBtn = document.getElementById('saveBtn');
saveBtn.addEventListener('click', async () => {
if (!extractedData || !selectedFile) return;
saveBtn.disabled = true;
saveBtn.innerHTML = '⏳ Menyimpan...';
// Determine endpoint based on currentDocType
const endpoint = currentDocType === 'kk' ? '/api/save-kk' : '/api/save-ktp';
try {
// Use selectedFile directly (it is already cropped/rotated by Apply Crop)
const formData = new FormData();
// Rename file based on type just for neatness
const filename = currentDocType === 'kk' ? 'kk_saved.jpg' : 'ktp_saved.jpg';
formData.append('image', selectedFile, filename);
formData.append('data', JSON.stringify(extractedData));
// Send to server
const response = await fetch(endpoint, {
method: 'POST',
body: formData
});
const result = await response.json();
if (result.success) {
const docName = currentDocType === 'kk' ? 'Kartu Keluarga' : 'KTP';
alert(`Data ${docName} berhasil disimpan!`);
saveBtn.innerHTML = '✅ Tersimpan';
} else {
alert('Gagal menyimpan: ' + result.error);
saveBtn.innerHTML = '💾 Simpan';
saveBtn.disabled = false;
}
} catch (error) {
alert('Terjadi kesalahan: ' + error.message);
saveBtn.innerHTML = '💾 Simpan';
saveBtn.disabled = false;
}
});
// Print functionality
const printBtn = document.getElementById('printBtn');
printBtn.addEventListener('click', () => {
const printArea = document.getElementById('printArea');
console.log('Print button clicked');
// Determine source: preview image or crop canvas?
const isPreviewVisible = preview.style.display !== 'none' && preview.getAttribute('src') !== '#' && preview.src;
const isCanvasVisible = cropCanvas.style.display !== 'none';
if (!isPreviewVisible) {
if (isCanvasVisible) {
if (!confirm('Gambar belum diterapkan (Apply). Cetak tampilan canvas saat ini?')) return;
// Use canvas data
const img = new Image();
img.src = cropCanvas.toDataURL('image/jpeg', 0.95);
img.className = currentDocType === 'kk' ? 'a4-print-size' : 'ktp-print-size';
printArea.innerHTML = '';
printArea.appendChild(img);
// Canvas data is instant, no onload needed usually, but to be safe:
setTimeout(() => window.print(), 100);
return;
}
alert('Tidak ada gambar KTP untuk dicetak! Silakan upload atau pilih dari arsip.');
return;
}
printArea.innerHTML = '';
const img = new Image();
// Use current preview src
img.src = preview.src;
img.className = currentDocType === 'kk' ? 'a4-print-size' : 'ktp-print-size';
printArea.appendChild(img);
// Robust print trigger
img.onload = () => {
// Short delay to ensure rendering
setTimeout(() => window.print(), 100);
};
// Fallback if image cached or instant
if (img.complete) {
img.onload();
}
// Error handling
img.onerror = () => {
alert('Gagal memuat gambar untuk dicetak.');
};
});
// Download functionality
const downloadBtn = document.getElementById('downloadBtn');
downloadBtn.addEventListener('click', () => {
// Check if preview is valid
const isPreviewVisible = preview.style.display !== 'none' && preview.getAttribute('src') !== '#' && preview.src;
if (!isPreviewVisible) {
if (cropCanvas.style.display !== 'none') {
// Allow download canvas
const link = document.createElement('a');
link.download = 'ktp_scan_raw.jpg';
link.href = cropCanvas.toDataURL('image/jpeg', 0.95);
link.click();
return;
}
alert('Tidak ada gambar untuk diunduh');
return;
}
const link = document.createElement('a');
link.href = preview.src;
// Construct filename from Extracted Data if available
let filename = 'ktp_scan.jpg';
const nikInput = document.getElementById('field-nik'); // ID format might be different, let's check render logic
// renderEditableFields generates identifiers? No, extractedData global var is safest.
if (typeof extractedData !== 'undefined' && extractedData.nik) {
filename = `KTP_${extractedData.nik}.jpg`;
} else {
// Try getting from DOM inputs if extractedData not set
const domNik = document.querySelector('input[data-key="nik"]');
if (domNik && domNik.value) filename = `KTP_${domNik.value}.jpg`;
}
link.download = filename;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
});
// Archive Modal Logic
const archiveBtn = document.getElementById('archiveBtn');
const archiveModal = document.getElementById('archiveModal');
const closeModalBtn = document.getElementById('closeModalBtn');
const archiveList = document.getElementById('archiveList');
const archiveLoading = document.getElementById('archiveLoading');
const archiveEmpty = document.getElementById('archiveEmpty');
// Login Logic Vars
const loginModal = document.getElementById('loginModal');
const loginUser = document.getElementById('loginUser');
const loginPass = document.getElementById('loginPass');
const submitLoginBtn = document.getElementById('submitLoginBtn');
const loginError = document.getElementById('loginError');
const closeLoginBtn = document.getElementById('closeLoginBtn');
// Check Auth Helper
async function checkAuth() {
try {
const res = await fetch('/api/check-auth');
if (res.ok) {
const data = await res.json();
return data.authenticated;
}
return false;
} catch (e) { return false; }
}
// Archive Button Logic
// const archiveBtn = document.getElementById('archiveBtn'); // Already declared
const archiveKKBtn = document.getElementById('archiveKKBtn');
async function openArchive(type) {
currentArchiveType = type;
const title = type === 'kk' ? 'Arsip Kartu Keluarga' : 'Arsip KTP';
document.querySelector('#archiveModal h2').textContent = '📂 ' + title;
if (await checkAuth()) {
archiveModal.style.display = 'block';
loadArchive();
} else {
loginModal.style.display = 'block';
// Reset login form
loginPass.value = '';
loginError.style.display = 'none';
loginPass.focus();
}
}
archiveBtn.addEventListener('click', () => openArchive('ktp'));
if (archiveKKBtn) {
archiveKKBtn.addEventListener('click', () => openArchive('kk'));
}
// Submit Login Logic
submitLoginBtn.addEventListener('click', async () => {
const user = loginUser.value;
const pass = loginPass.value;
if (!user || !pass) {
loginError.textContent = 'Username dan Password harus diisi';
loginError.style.display = 'block';
return;
}
loginError.style.display = 'none';
submitLoginBtn.disabled = true;
submitLoginBtn.innerHTML = 'Memeriksa...';
try {
const res = await fetch('/api/login', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ username: user, password: pass })
});
const data = await res.json();
if (data.success) {
loginModal.style.display = 'none';
archiveModal.style.display = 'block';
loadArchive();
} else {
loginError.textContent = data.error || 'Login gagal';
loginError.style.display = 'block';
}
} catch (e) {
loginError.textContent = 'Gagal terhubung ke server';
loginError.style.display = 'block';
} finally {
submitLoginBtn.disabled = false;
submitLoginBtn.innerHTML = 'Masuk';
}
});
// Handle Enter Key
loginPass.addEventListener('keypress', (e) => {
if (e.key === 'Enter') submitLoginBtn.click();
});
// Close Login Modal
closeLoginBtn.addEventListener('click', () => { loginModal.style.display = 'none'; });
// Close Archive Modal (Restored)
closeModalBtn.addEventListener('click', () => { archiveModal.style.display = 'none'; });
// Window click handler for both modals
window.addEventListener('click', (e) => {
if (e.target === archiveModal) archiveModal.style.display = 'none';
if (e.target === loginModal) loginModal.style.display = 'none';
});
async function loadArchive() {
archiveList.innerHTML = '';
archiveLoading.style.display = 'block';
archiveEmpty.style.display = 'none';
try {
const endpoint = `/api/${currentArchiveType}-archive?per_page=50`;
const response = await fetch(endpoint);
const result = await response.json();
archiveLoading.style.display = 'none';
if (result.success && result.data.length > 0) {
renderArchiveList(result.data);
} else {
archiveEmpty.style.display = 'block';
}
} catch (error) {
console.error('Archive load error:', error);
archiveLoading.innerHTML = '❌ Gagal memuat data';
}
}
function renderArchiveList(records) {
const imgPrefix = currentArchiveType === 'kk' ? '/kk-images/' : '/ktp-images/';
records.forEach(record => {
const card = document.createElement('div');
card.className = 'archive-card';
const date = new Date(record.created_at).toLocaleDateString('id-ID', {
year: 'numeric', month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit'
});
const title = currentArchiveType === 'kk' ? (record.kepala_keluarga || 'Tanpa Nama') : (record.nama || 'Tanpa Nama');
const idVal = currentArchiveType === 'kk' ? (record.no_kk || '-') : (record.nik || '-');
card.innerHTML = `
<div class="archive-card-img">
<img src="${imgPrefix}${record.image_path}" alt="${title}" loading="lazy">
</div>
<div class="archive-card-content">
<h3>${title}</h3>
<div class="archive-card-meta">
<span class="nik">${idVal}</span>
<span class="date">${date}</span>
</div>
<div class="archive-card-actions" style="display:flex; gap:0.5rem;">
<button class="view-btn" onclick='viewArchiveDetail(${JSON.stringify(record).replace(/'/g, "&#39;")})' style="flex:1;">👁️ Lihat</button>
<a href="${imgPrefix}${record.image_path}" download="${currentArchiveType.toUpperCase()}_${idVal}.jpg" class="view-btn" style="flex:1; text-align:center; text-decoration:none; display:flex; align-items:center; justify-content:center;">⬇️</a>
</div>
</div>
`;
archiveList.appendChild(card);
});
}
// Global function to view detail from archive
window.viewArchiveDetail = (record) => {
extractedData = record;
currentDocType = currentArchiveType; // Sync type so Save works correctly
// 1. Display results
displayResults({ data: record, raw_text: record.raw_text || '' });
// 2. Load image into preview
if (record.image_path) {
const imgPrefix = currentArchiveType === 'kk' ? '/kk-images/' : '/ktp-images/';
const imgUrl = `${imgPrefix}${record.image_path}`;
preview.src = imgUrl;
// Set originalImageData to allow re-cropping or re-saving if needed
fetch(imgUrl)
.then(res => res.blob())
.then(blob => {
const reader = new FileReader();
reader.onloadend = () => {
originalImageData = reader.result;
selectedFile = new File([blob], record.image_path, { type: blob.type });
// Initialize Image Object for rotation editor
const img = new Image();
img.onload = () => {
originalImageObject = img;
// We don't necessarily need to renderEditor() immediately if we are in "View" mode
// But having it ready is good for "Reset"
};
img.src = reader.result;
};
reader.readAsDataURL(blob);
});
// Show preview area (static image result), hide dropzone/canvas
preview.style.display = 'block';
cropCanvas.style.display = 'none';
dropzone.querySelector('.dropzone-content').style.display = 'none';
// Setup crop ui visibility
cropContainer.style.display = 'block';
cropArea.style.display = 'none';
cropActions.style.display = 'flex';
// Reset rotation slider for viewing (since we are viewing already cropped/straightened result)
currentRotation = 0;
updateRotationUI();
}
archiveModal.style.display = 'none';
};
// Reload Button
document.getElementById('reloadBtn').addEventListener('click', () => {
window.location.reload();
});
</script>
<!-- Print Area: Use visibility hidden/height 0 to ensure images load but are invisible on screen -->
<div id="printArea" style="visibility: hidden; height: 0; overflow: hidden; position: absolute; z-index: -1;"></div>
<script>
// ... (this comment is just marker, main script is above)
</script>
</body>

Binary file not shown.

After

Width:  |  Height:  |  Size: 204 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 204 KiB