feat: Add web UI for KTP OCR with modern dark theme, update extractor logic, and configure deployment.
5
.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
mysql-data/
|
||||
__pycache__/
|
||||
venv/
|
||||
.env
|
||||
.gemini/
|
||||
|
Before Width: | Height: | Size: 201 KiB |
|
Before Width: | Height: | Size: 142 KiB After Width: | Height: | Size: 83 KiB |
BIN
KTP/3518080907840001.jpg
Normal file
|
After Width: | Height: | Size: 92 KiB |
|
Before Width: | Height: | Size: 36 KiB |
|
Before Width: | Height: | Size: 255 KiB After Width: | Height: | Size: 260 KiB |
|
Before Width: | Height: | Size: 78 KiB After Width: | Height: | Size: 90 KiB |
|
Before Width: | Height: | Size: 72 KiB After Width: | Height: | Size: 77 KiB |
|
Before Width: | Height: | Size: 237 KiB |
|
Before Width: | Height: | Size: 216 KiB |
2
TODO.md
Normal file
@@ -0,0 +1,2 @@
|
||||
Hasil extrak belum sempurna
|
||||
otomatis simpan hasil proses extrak begitu selesai, jika setelah edit, tombol simpan di klik update hasil yg terdahulu
|
||||
@@ -1,5 +1,3 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
mysql:
|
||||
image: mysql:8.0
|
||||
@@ -13,9 +11,5 @@ services:
|
||||
ports:
|
||||
- "3306:3306"
|
||||
volumes:
|
||||
- mysql_data:/var/lib/mysql
|
||||
- ./mysql-data:/var/lib/mysql
|
||||
command: --default-authentication-plugin=mysql_native_password
|
||||
|
||||
volumes:
|
||||
mysql_data:
|
||||
driver: local
|
||||
|
||||
376
ktp_extractor.py
@@ -211,6 +211,12 @@ class KTPExtractor:
|
||||
|
||||
text_upper = text.upper().strip()
|
||||
|
||||
# Explicit conflict prevention
|
||||
if field_name == 'agama' and 'ALAMAT' in text_upper:
|
||||
return False
|
||||
if field_name == 'alamat' and 'AGAMA' in text_upper:
|
||||
return False
|
||||
|
||||
# Coba exact match dulu (lebih cepat)
|
||||
for label in self.FIELD_LABELS[field_name]:
|
||||
if label in text_upper:
|
||||
@@ -229,12 +235,39 @@ class KTPExtractor:
|
||||
continue
|
||||
# Bandingkan dengan kata pertama
|
||||
ratio = difflib.SequenceMatcher(None, first_word, label_parts[0]).ratio()
|
||||
if ratio >= cutoff:
|
||||
print(f" [FUZZY LABEL] '{first_word}' matched '{label}' (ratio={ratio:.2f})")
|
||||
|
||||
# Dynamic cutoff logic
|
||||
effective_cutoff = cutoff
|
||||
if len(first_word) < 7:
|
||||
# Use stricter cutoff for short words to prevent ALAMAT (6) matching AGAMA (5) -> ratio 0.73
|
||||
effective_cutoff = max(cutoff, 0.82)
|
||||
|
||||
if ratio >= effective_cutoff:
|
||||
if DEBUG_MODE:
|
||||
print(f" [FUZZY LABEL] '{first_word}' matched '{label}' (ratio={ratio:.2f})")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _extract_after_label(self, text: str, label_pattern: str) -> Optional[str]:
|
||||
"""
|
||||
Extract content after a label (fuzzy/regex match).
|
||||
Handles cases with/without colons.
|
||||
"""
|
||||
if not text: return None
|
||||
|
||||
# 1. Try Regex Search if pattern provided
|
||||
if label_pattern:
|
||||
# Construct regex: Label + optional spaces/colon + (Group 1: Value)
|
||||
# flags=re.IGNORECASE should be used
|
||||
# We want to find the END of the label
|
||||
match = re.search(f"({label_pattern})[:\\s]*", text, re.IGNORECASE)
|
||||
if match:
|
||||
# Return everything after the match end
|
||||
return text[match.end():].strip()
|
||||
|
||||
return None
|
||||
|
||||
def _parse_balinese_name(self, name: str) -> str:
|
||||
"""
|
||||
Parse nama Bali yang digabung OCR dan tambahkan spasi yang tepat.
|
||||
@@ -521,11 +554,21 @@ class KTPExtractor:
|
||||
# TTL from ttl zone
|
||||
if 'ttl' in zone_texts:
|
||||
for text in zone_texts['ttl']:
|
||||
if 'tempat' in text.lower() or 'lahir' in text.lower():
|
||||
# Skip if text is JUST the label (length check or fuzzy match)
|
||||
if len(text) < 15 and self._is_label_match(text, 'tempat_lahir'):
|
||||
continue
|
||||
|
||||
if 'tempat' in text.lower() or 'lahir' in text.lower() or 'tgl' in text.lower() or len(text) > 5:
|
||||
val = self._extract_value_from_text(text)
|
||||
if val:
|
||||
# Don't accept if val looks like label
|
||||
if self._is_label_match(val, 'tempat_lahir') and len(val) < 20:
|
||||
continue
|
||||
|
||||
self._parse_ttl(val, result)
|
||||
break
|
||||
# Only break if we actually got a birth date, otherwise keep looking
|
||||
if result['tanggal_lahir']:
|
||||
break
|
||||
|
||||
# JENIS KELAMIN
|
||||
if 'jenis_kelamin' in zone_texts:
|
||||
@@ -534,7 +577,7 @@ class KTPExtractor:
|
||||
if 'laki' in text_lower:
|
||||
result['jenis_kelamin'] = 'LAKI-LAKI'
|
||||
break
|
||||
elif 'perempuan' in text_lower:
|
||||
elif 'perempuan' in text_lower or 'wanita' in text_lower:
|
||||
result['jenis_kelamin'] = 'PEREMPUAN'
|
||||
break
|
||||
|
||||
@@ -618,15 +661,36 @@ class KTPExtractor:
|
||||
|
||||
# PEKERJAAN
|
||||
if 'pekerjaan' in zone_texts:
|
||||
best_job = None
|
||||
potential_job = None
|
||||
|
||||
for text in zone_texts['pekerjaan']:
|
||||
val = text.upper()
|
||||
if 'pekerjaan' in text.lower():
|
||||
val = self._extract_value_from_text(text).upper()
|
||||
|
||||
# Check against list or take value
|
||||
if len(val) > 3 and 'pekerjaan' not in val.lower():
|
||||
result['pekerjaan'] = val
|
||||
break
|
||||
# Clean up
|
||||
val = val.strip()
|
||||
if not val or len(val) < 3 or 'PEKERJAAN' in val:
|
||||
continue
|
||||
|
||||
# 1. Check against wildcard/list (Priority)
|
||||
# Buruh, Karyawan, Pelajar, dll
|
||||
if any(job.upper() in val for job in self.PEKERJAAN_LIST):
|
||||
best_job = val
|
||||
break # Found a definitive job
|
||||
|
||||
# 2. Save as potential if it's NOT a known bad value (like City names)
|
||||
# Avoid capturing 'TABANAN', 'JAKARTA', date strings
|
||||
if not any(city in val for city in ['KABUPATEN', 'KOTA', 'TABANAN', 'BADUNG', 'DENPASAR', 'JAKARTA', 'BANDUNG']):
|
||||
if not re.search(r'\d{2}-\d{2}-\d{4}', val): # Avoid dates
|
||||
if potential_job is None:
|
||||
potential_job = val
|
||||
|
||||
if best_job:
|
||||
result['pekerjaan'] = best_job
|
||||
elif potential_job:
|
||||
result['pekerjaan'] = potential_job
|
||||
|
||||
# WNI
|
||||
if 'wni' in zone_texts:
|
||||
@@ -774,76 +838,144 @@ class KTPExtractor:
|
||||
result['kabupaten_kota'] = text.strip().upper()
|
||||
|
||||
# ===== NAMA =====
|
||||
if 'nama' in text_lower and result['nama'] is None:
|
||||
if result['nama'] is None and self._is_label_match(text, 'nama'):
|
||||
val = self._extract_after_label(text_normalized, 'nama')
|
||||
if val and len(val) > 2:
|
||||
result['nama'] = val.upper()
|
||||
elif i + 1 < len(texts):
|
||||
# Nama di line berikutnya
|
||||
next_text = texts[i+1].strip()
|
||||
if len(next_text) > 2 and not any(kw in next_text.lower() for kw in ['tempat', 'lahir', 'jenis']):
|
||||
result['nama'] = next_text.upper()
|
||||
current_name = ""
|
||||
|
||||
if val:
|
||||
current_name = val.upper()
|
||||
|
||||
# Loop check baris berikutnya for Name (handle 2-3 lines)
|
||||
offset = 1
|
||||
# Batasi maksimal 2 baris tambahan untuk Nama (total 3 baris)
|
||||
while i + offset < len(texts) and offset <= 2:
|
||||
next_text = texts[i+offset].strip()
|
||||
next_lower = next_text.lower()
|
||||
|
||||
is_stop = False
|
||||
|
||||
# 1. Check Stop Keywords (Field Labels below Name)
|
||||
# Stop if next line is Tempat Lahir, Jenis Kelamin, Alamat, etc.
|
||||
stop_keywords = ['tempat', 'lahir', 'tgl', 'jenis', 'kelamin', 'alamat', 'rt/rw', 'nik']
|
||||
if any(kw in next_lower for kw in stop_keywords):
|
||||
is_stop = True
|
||||
print(f" [NAMA STOP] Matched stop keyword in '{next_text}'")
|
||||
|
||||
# 2. Check Case Sensitivity (Heuristic)
|
||||
if not is_stop:
|
||||
letters = [c for c in next_text if c.isalpha()]
|
||||
if letters:
|
||||
upper_count = sum(1 for c in letters if c.isupper())
|
||||
upper_ratio = upper_count / len(letters)
|
||||
# If mostly lowercase/title case, likely a label (e.g. "Tempat Lahir")
|
||||
if upper_ratio < 0.4 and len(letters) > 3:
|
||||
is_stop = True
|
||||
print(f" [NAMA STOP] Likely Label based on Case (Ratio={upper_ratio:.2f})")
|
||||
|
||||
if not is_stop:
|
||||
if len(next_text) > 2:
|
||||
print(f" [NAMA MERGE] Merging '{next_text}'")
|
||||
if current_name:
|
||||
current_name += " " + next_text.upper()
|
||||
else:
|
||||
current_name = next_text.upper()
|
||||
offset += 1
|
||||
else:
|
||||
print(f" [NAMA SKIP] Too short '{next_text}'")
|
||||
# Kalau terlalu pendek (noise), boleh skip atau stop?
|
||||
# Biasanya nama tidak putus jadi 1 huruf. Anggap stop utk aman, atau skip.
|
||||
# Kita skip saja increment offset.
|
||||
offset += 1
|
||||
else:
|
||||
break
|
||||
|
||||
if current_name:
|
||||
# Fix Spacing Issues (e.g. BAGUSGEDE -> BAGUS GEDE)
|
||||
current_name = re.sub(r'(BAGUS)(GEDE)', r'\1 \2', current_name)
|
||||
current_name = re.sub(r'(ANAK)(AGUNG)', r'\1 \2', current_name) # Common issue
|
||||
result['nama'] = current_name
|
||||
|
||||
# ===== TEMPAT/TANGGAL LAHIR =====
|
||||
# Match "Tempat/Tgl Lahir" or "Tempat Lahir" or similar labels
|
||||
if 'tempat' in text_lower or ('lahir' in text_lower and 'berlaku' not in text_lower):
|
||||
if result['tempat_lahir'] is None or result['tanggal_lahir'] is None:
|
||||
# Extract value after label using full-width or standard colon
|
||||
ttl = self._extract_after_label(text_normalized, r'tempat[/\s]*tgl[/\s]*lahir|tempat[/\s]*lahir|lahir')
|
||||
if ttl:
|
||||
self._parse_ttl(ttl, result)
|
||||
elif ':' in text or ':' in text:
|
||||
# Value is after colon but _extract_after_label didn't catch it
|
||||
parts = re.split(r'[::]', text, 1)
|
||||
if len(parts) > 1 and parts[1].strip():
|
||||
self._parse_ttl(parts[1].strip(), result)
|
||||
elif i + 1 < len(texts):
|
||||
# TTL di line berikutnya
|
||||
next_text = texts[i+1].strip()
|
||||
if not any(kw in next_text.lower() for kw in ['jenis', 'kelamin', 'alamat', 'gol']):
|
||||
self._parse_ttl(next_text, result)
|
||||
|
||||
# ===== JENIS KELAMIN =====
|
||||
if any(kw in text_lower for kw in self.MALE_KEYWORDS):
|
||||
if result['jenis_kelamin'] is None:
|
||||
result['jenis_kelamin'] = 'LAKI-LAKI'
|
||||
elif any(kw in text_lower for kw in self.FEMALE_KEYWORDS):
|
||||
if result['jenis_kelamin'] is None:
|
||||
result['jenis_kelamin'] = 'PEREMPUAN'
|
||||
|
||||
# ===== GOLONGAN DARAH =====
|
||||
if 'darah' in text_lower or 'gol.' in text_lower:
|
||||
# Try to find blood type on same line
|
||||
gol_match = re.search(r'(?:gol|darah)[.\s::]*([ABO]{1,2}[+\-]?)', text, re.IGNORECASE)
|
||||
if gol_match and result['gol_darah'] is None:
|
||||
result['gol_darah'] = gol_match.group(1).upper()
|
||||
elif result['gol_darah'] is None and i + 1 < len(texts):
|
||||
# Blood type might be on next line (real KTP pattern)
|
||||
next_text = texts[i+1].strip()
|
||||
if re.match(r'^[ABO]{1,2}[+\-]?$', next_text, re.IGNORECASE):
|
||||
result['gol_darah'] = next_text.upper()
|
||||
# Standalone blood type (e.g., just "O" or "A+" on its own line)
|
||||
if result['gol_darah'] is None:
|
||||
if re.match(r'^[ABO]{1,2}[+\-]?$', text.strip(), re.IGNORECASE) and len(text.strip()) <= 3:
|
||||
result['gol_darah'] = text.strip().upper()
|
||||
# ... (starts around line 830 in original) ...
|
||||
|
||||
# (Skipping down to ALAMAT section for the replacement block)
|
||||
# ... regex find ...
|
||||
|
||||
# ===== ALAMAT ===== (dengan fuzzy label matching)
|
||||
if result['alamat'] is None and self._is_label_match(text, 'alamat'):
|
||||
val = self._extract_after_label(text_normalized, r'a{1,2}l{0,2}a?m{0,2}a?t')
|
||||
|
||||
# Logic multi-line
|
||||
current_addr = ""
|
||||
if val:
|
||||
result['alamat'] = val.upper()
|
||||
elif i + 1 < len(texts):
|
||||
# Ambil nilai dari baris berikutnya
|
||||
next_text = texts[i+1].strip()
|
||||
# Pastikan bukan label field lain
|
||||
if len(next_text) > 2 and not self._is_label_match(next_text, 'rt_rw'):
|
||||
result['alamat'] = next_text.upper()
|
||||
current_addr = val.upper()
|
||||
|
||||
# Loop check baris berikutnya (bisa ambil i+1, i+2, dst selama bukan label)
|
||||
offset = 1
|
||||
while i + offset < len(texts):
|
||||
next_text = texts[i+offset].strip()
|
||||
print(f" [ALAMAT CHECK] Offset +{offset}: '{next_text}'")
|
||||
|
||||
next_lower = next_text.lower()
|
||||
is_stop = False
|
||||
|
||||
# 1. Cek Pola RT/RW (angka/angka) -> Pasti STOP
|
||||
if re.search(r'\d{3}\s*/\s*\d{3}', next_text) or re.match(r'^[.\-]+\s*/\s*[.\-]+$', next_text):
|
||||
is_stop = True
|
||||
print(" [ALAMAT STOP] Matched RT/RW pattern")
|
||||
|
||||
# 2. Cek Keywords Label Pembatas
|
||||
elif any(next_lower.startswith(prefix) for prefix in ['rt/', 'rw', 'rt/rw', 'kel', 'desa', 'kec', 'agama', 'status', 'kawin']):
|
||||
is_stop = True
|
||||
print(" [ALAMAT STOP] Matched label prefix")
|
||||
|
||||
# 3. Cek Keywords Spesifik Full Word
|
||||
elif any(kw in next_lower for kw in ['kelurahan', 'kecamatan', 'perkawinan', 'kewarganegaraan']):
|
||||
is_stop = True
|
||||
print(" [ALAMAT STOP] Matched distinct label word")
|
||||
|
||||
# 4. Check Case Sensitivity
|
||||
if not is_stop:
|
||||
letters = [c for c in next_text if c.isalpha()]
|
||||
if letters:
|
||||
upper_count = sum(1 for c in letters if c.isupper())
|
||||
upper_ratio = upper_count / len(letters)
|
||||
# Jika hampir semua huruf kecil/Title Case (ratio < 0.4), dicurigai sebagai Label
|
||||
# Kecuali kata-kata pendek (< 5 chars)
|
||||
if upper_ratio < 0.4 and len(letters) > 4:
|
||||
is_stop = True
|
||||
print(f" [ALAMAT STOP] Detected Title Case/Lowercase (Ratio={upper_ratio:.2f}) -> Likely Label")
|
||||
|
||||
# Jika BUKAN pembatas, AMBIL sebagai lanjutan alamat
|
||||
if not is_stop:
|
||||
if len(next_text) > 1:
|
||||
print(f" [ALAMAT MERGE] Merging '{next_text}'")
|
||||
if current_addr:
|
||||
current_addr += " " + next_text.upper()
|
||||
else:
|
||||
current_addr = next_text.upper()
|
||||
offset += 1 # Lanjut cek baris berikutnya
|
||||
else:
|
||||
print(f" [ALAMAT SKIP] Line too short '{next_text}'")
|
||||
offset += 1 # Skip noise, try next line? Or stop? usually skip noise is safer to continue
|
||||
else:
|
||||
print(f" [ALAMAT STOP] Hit Stop Condition '{next_text}'")
|
||||
break # Stop loop
|
||||
|
||||
if current_addr:
|
||||
result['alamat'] = current_addr
|
||||
|
||||
if current_addr:
|
||||
result['alamat'] = current_addr
|
||||
|
||||
# ===== RT/RW =====
|
||||
rt_rw_match = re.search(r'(\d{3})\s*/\s*(\d{3})', text)
|
||||
if rt_rw_match:
|
||||
result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}"
|
||||
|
||||
# Relaxed pattern to handle -/- or 000/000
|
||||
if result['rt_rw'] is None:
|
||||
rt_rw_match = re.search(r'(\d{1,3}|-)\s*/\s*(\d{1,3}|-)', text)
|
||||
if rt_rw_match:
|
||||
result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}"
|
||||
print(f" [RT/RW] Found {result['rt_rw']}")
|
||||
|
||||
# ===== KELURAHAN/DESA =====
|
||||
if ('kel' in text_lower or 'desa' in text_lower) and 'kelamin' not in text_lower:
|
||||
if result['kel_desa'] is None:
|
||||
@@ -853,6 +985,77 @@ class KTPExtractor:
|
||||
elif i + 1 < len(texts):
|
||||
result['kel_desa'] = texts[i+1].strip().upper()
|
||||
|
||||
# ===== TEMPAT/TANGGAL LAHIR =====
|
||||
# Gunakan _is_label_match untuk fleksibilitas (e.g. Tempat/Tgl Lahir, Tmpt Lahir)
|
||||
if result['tempat_lahir'] is None and self._is_label_match(text, 'ttl'):
|
||||
print(f" [TTL DEBUG] Matched Label on line {i}: '{text}'")
|
||||
# Regex pattern yang SANGAT fleksibel untuk label TTL
|
||||
# Menangani berbagai variasi: Tmpat/Tgl Lahir, Tempat. Tgl. Lahir, dll
|
||||
# Intinya: T...mp...t <junk> L...hir
|
||||
val = self._extract_after_label(text_normalized, r't[ea]m?p?a?t.*?l[a@]hi?r?|tgl.*?l[a@]hi?r?')
|
||||
|
||||
# Jika val kosong, coba ambil dari baris berikutnya
|
||||
if not val and i + 1 < len(texts):
|
||||
next_text = texts[i+1].strip()
|
||||
next_lower = next_text.lower()
|
||||
stop_keywords = ['jenis', 'kelamin', 'alamat', 'gol', 'darah']
|
||||
if not any(kw in next_lower for kw in stop_keywords):
|
||||
val = next_text.upper()
|
||||
print(f" [TTL DEBUG] Took next line: '{val}'")
|
||||
|
||||
if val:
|
||||
print(f" [TTL DEBUG] Parsing value: '{val}'")
|
||||
self._parse_ttl(val, result)
|
||||
if result['tanggal_lahir']:
|
||||
print(f" [TTL DEBUG] Success: {result['tanggal_lahir']}")
|
||||
|
||||
# ===== JENIS KELAMIN =====
|
||||
if result['jenis_kelamin'] is None:
|
||||
# 1. Coba cari Label dulu
|
||||
if self._is_label_match(text, 'jenis_kelamin'):
|
||||
val = self._extract_after_label(text_normalized, r'j[ea]ni?s\s*k[ea]l[a@]?mi?n')
|
||||
if val:
|
||||
if 'LAKI' in val.upper(): result['jenis_kelamin'] = 'LAKI-LAKI'
|
||||
elif 'PEREMPUAN' in val.upper() or 'WANITA' in val.upper(): result['jenis_kelamin'] = 'PEREMPUAN'
|
||||
|
||||
if result['jenis_kelamin'] is None and i + 1 < len(texts):
|
||||
next_text = texts[i+1].upper()
|
||||
if 'LAKI' in next_text: result['jenis_kelamin'] = 'LAKI-LAKI'
|
||||
elif 'PEREMPUAN' in next_text or 'WANITA' in next_text: result['jenis_kelamin'] = 'PEREMPUAN'
|
||||
|
||||
# 2. Fallback: Cari langsung keyword VALUES
|
||||
if result['jenis_kelamin'] is None:
|
||||
text_upper = text.upper()
|
||||
if 'LAKI-LAKI' in text_upper or 'LAKI - LAKI' in text_upper:
|
||||
result['jenis_kelamin'] = 'LAKI-LAKI'
|
||||
elif 'PEREMPUAN' in text_upper:
|
||||
result['jenis_kelamin'] = 'PEREMPUAN'
|
||||
|
||||
# ===== GOLONGAN DARAH =====
|
||||
if result['gol_darah'] is None:
|
||||
# Cek label
|
||||
if self._is_label_match(text, 'gol_darah'):
|
||||
val = self._extract_after_label(text_normalized, r'g?o?l\.?\s*d?a?r?a?h')
|
||||
# Jika label ketemu tapi val kosong, mungkin nempel (Gol.Darah : O)
|
||||
# atau ada di baris ini
|
||||
if val:
|
||||
gd_match = re.search(r'([ABO]{1,2}[+\-]?)', val)
|
||||
if gd_match:
|
||||
result['gol_darah'] = gd_match.group(1).upper()
|
||||
else:
|
||||
# Coba cari pattern gol darah di baris yang sama dengan label
|
||||
gd_match = re.search(r'([ABO]{1,2}[+\-]?)', text.upper().replace('0','O'))
|
||||
if gd_match:
|
||||
result['gol_darah'] = gd_match.group(1).upper()
|
||||
|
||||
# Cek next line jika baris ini cuma label "Gol Darah"
|
||||
if result['gol_darah'] is None and self._is_label_match(text, 'gol_darah') and i+1 < len(texts):
|
||||
next_text = texts[i+1].strip().upper()
|
||||
if len(next_text) < 5: # Pendek, asumsi gol darah
|
||||
gd_match = re.search(r'([ABO]{1,2}[+\-]?)', next_text)
|
||||
if gd_match:
|
||||
result['gol_darah'] = gd_match.group(1).upper()
|
||||
|
||||
# ===== KECAMATAN =====
|
||||
if 'kecamatan' in text_lower or ('kec' in text_lower and 'kelamin' not in text_lower):
|
||||
if result['kecamatan'] is None:
|
||||
@@ -940,15 +1143,25 @@ class KTPExtractor:
|
||||
# ===== TANGGAL PENERBITAN (biasanya format DD-MM-YYYY di akhir) =====
|
||||
# Look for date that is NOT tanggal lahir (different date)
|
||||
if result['tanggal_penerbitan'] is None:
|
||||
# Match date format at end of text or standalone date
|
||||
date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})$', text.strip())
|
||||
if date_match:
|
||||
found_date = date_match.group(1)
|
||||
# Make sure it's not the same as tanggal_lahir
|
||||
if result['tanggal_lahir'] != found_date:
|
||||
# Likely penerbitan if after berlaku_hingga was found
|
||||
if result['berlaku_hingga'] or i > len(texts) * 0.7:
|
||||
result['tanggal_penerbitan'] = found_date
|
||||
# 1. Skip if contains Keywords of other date fields
|
||||
# Jangan ambil jika ada kata 'LAHIR', 'TGL', 'BERLAKU', 'SEUMUR', 'HINGGA'
|
||||
line_clean = text.lower()
|
||||
if any(kw in line_clean for kw in ['lahir', 'lahlr', 'tgl', 'tempat', 'berlaku', 'seumur', 'hingga', 'hidup']):
|
||||
pass # Skip
|
||||
else:
|
||||
# Match date format at end of text or standalone date
|
||||
date_match = re.search(r'(\d{2}[-\s/]\d{2}[-\s/]\d{4})$', text.strip())
|
||||
if date_match:
|
||||
found_date = date_match.group(1).replace(' ', '-')
|
||||
# Make sure it's not the same as tanggal_lahir
|
||||
if result['tanggal_lahir'] != found_date:
|
||||
# Strict Position Check: MUST be in the bottom 30% of lines
|
||||
# (Untuk menghindari salah ambil tanggal lahir yg mungkin gagal diparsing sbg TTL)
|
||||
if i > len(texts) * 0.7:
|
||||
result['tanggal_penerbitan'] = found_date
|
||||
print(f" [TGL TERBIT] Found '{found_date}' at index {i}/{len(texts)}")
|
||||
else:
|
||||
print(f" [TGL TERBIT SKIP] Date '{found_date}' is too high ({i}/{len(texts)})")
|
||||
|
||||
# ============================================
|
||||
# AGGRESSIVE SCAN: Cari agama dari semua teks OCR
|
||||
@@ -1068,6 +1281,9 @@ class KTPExtractor:
|
||||
ttl_text = re.sub(r'(\d{2})[\s]+(\d{2})[-/](\d{4})', r'\1-\2-\3', ttl_text)
|
||||
ttl_text = re.sub(r'(\d{2})[-/](\d{2})[\s]+(\d{4})', r'\1-\2-\3', ttl_text)
|
||||
|
||||
# Handle "0508-1978" -> "05-08-1978" (Missing separator between day/month)
|
||||
ttl_text = re.sub(r'(\d{2})(\d{2})[-/](\d{4})', r'\1-\2-\3', ttl_text)
|
||||
|
||||
# Handle 8-digit date without separator: "05081978" -> "05-08-1978"
|
||||
date_8digit = re.search(r'(\d{8})', ttl_text)
|
||||
if date_8digit:
|
||||
@@ -1077,13 +1293,15 @@ class KTPExtractor:
|
||||
|
||||
# Handle merged city+date like "JAKARTA05-08-1978" - add space before digits
|
||||
ttl_text = re.sub(r'([A-Z])(\d{2}[-/])', r'\1 \2', ttl_text, flags=re.IGNORECASE)
|
||||
# Handle merged city+date like "JAKARTA.05-08-1978" -> replace dot with space
|
||||
ttl_text = re.sub(r'([A-Z])\.(\d)', r'\1 \2', ttl_text, flags=re.IGNORECASE)
|
||||
|
||||
# Format: "TEMPAT, DD-MM-YYYY" atau "TEMPAT DD-MM-YYYY"
|
||||
date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})', ttl_text)
|
||||
if date_match:
|
||||
result['tanggal_lahir'] = date_match.group(1)
|
||||
# Tempat adalah bagian sebelum tanggal
|
||||
place = ttl_text[:date_match.start()].strip(' ,:-/')
|
||||
place = ttl_text[:date_match.start()].strip(' ,:-/.')
|
||||
# Clean up label remnants
|
||||
place = re.sub(r'^(tempat|tgl|lahir|:|:)[/\s::]*', '', place, flags=re.IGNORECASE).strip()
|
||||
if place and len(place) > 2:
|
||||
|
||||
@@ -852,10 +852,9 @@ footer a:hover {
|
||||
font-size: 1.1rem;
|
||||
}
|
||||
|
||||
/* Print Styles */
|
||||
/* Print Styles */
|
||||
@media print {
|
||||
|
||||
/* Reset Page */
|
||||
@page {
|
||||
margin: 0;
|
||||
size: auto;
|
||||
@@ -865,72 +864,61 @@ footer a:hover {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
background: white !important;
|
||||
/* Ensure no scroll or extra pages from hidden content */
|
||||
height: 100vh !important;
|
||||
overflow: hidden !important;
|
||||
height: auto !important;
|
||||
overflow: visible !important;
|
||||
}
|
||||
|
||||
/* Hide EVERYTHING initially with high specificity */
|
||||
body * {
|
||||
visibility: hidden !important;
|
||||
/* Hide everything by default using display:none to collapse space */
|
||||
body> :not(#printArea) {
|
||||
display: none !important;
|
||||
/* Force display none to remove layout space */
|
||||
}
|
||||
|
||||
/* Show ONLY Print Area and its children */
|
||||
#printArea,
|
||||
/* Show Print Area */
|
||||
#printArea {
|
||||
display: grid !important;
|
||||
visibility: visible !important;
|
||||
}
|
||||
|
||||
#printArea * {
|
||||
visibility: visible !important;
|
||||
display: flex !important;
|
||||
/* Restore display for parent */
|
||||
}
|
||||
|
||||
/* Reset display for children of printArea specifically */
|
||||
#printArea * {
|
||||
display: block !important;
|
||||
/* Default to block or whatever needed */
|
||||
}
|
||||
|
||||
/* Specific fix for image inside */
|
||||
#printArea img {
|
||||
display: inline-block !important;
|
||||
}
|
||||
|
||||
/* Print Area Layout */
|
||||
#printArea {
|
||||
position: fixed !important;
|
||||
/* Fixed helps detach from flow */
|
||||
position: relative !important;
|
||||
left: 0 !important;
|
||||
top: 0 !important;
|
||||
width: 100% !important;
|
||||
height: 100% !important;
|
||||
overflow: visible !important;
|
||||
z-index: 99999 !important;
|
||||
background: white !important;
|
||||
margin: 0 !important;
|
||||
padding: 10px !important;
|
||||
|
||||
display: flex !important;
|
||||
justify-content: center !important;
|
||||
align-items: center !important;
|
||||
padding: 0 !important;
|
||||
display: grid !important;
|
||||
grid-template-columns: repeat(auto-fill, 85.6mm) !important;
|
||||
gap: 10px !important;
|
||||
align-content: start !important;
|
||||
justify-content: start !important;
|
||||
}
|
||||
|
||||
.ktp-print-size {
|
||||
/* Standar ISO/IEC 7810 ID-1: 85.60 × 53.98 mm */
|
||||
width: 85.60mm !important;
|
||||
/* KTP Item */
|
||||
.ktp-print-item {
|
||||
width: 85.6mm !important;
|
||||
height: 53.98mm !important;
|
||||
max-width: none !important;
|
||||
max-height: none !important;
|
||||
border: 1px dashed #ccc;
|
||||
box-shadow: none !important;
|
||||
/* Remove any shadow */
|
||||
object-fit: contain !important;
|
||||
border: 1px dashed #999;
|
||||
box-sizing: border-box !important;
|
||||
break-inside: avoid !important;
|
||||
}
|
||||
|
||||
/* KK Item (Full Page) */
|
||||
.a4-print-size {
|
||||
/* A4 Landscape: 297mm x 210mm */
|
||||
/* Use slightly less to account for margins if necessary, but standard is distinct */
|
||||
width: 297mm !important;
|
||||
height: 210mm !important;
|
||||
max-width: none !important;
|
||||
max-height: none !important;
|
||||
border: none;
|
||||
width: 100% !important;
|
||||
height: auto !important;
|
||||
display: block !important;
|
||||
page-break-after: always !important;
|
||||
}
|
||||
|
||||
/* Ensure modal is hidden */
|
||||
.modal {
|
||||
display: none !important;
|
||||
}
|
||||
}
|
||||
@@ -178,9 +178,53 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Print Settings Modal -->
|
||||
<div id="printSettingsModal" class="modal" style="display: none;">
|
||||
<div class="modal-content" style="max-width: 500px;">
|
||||
<div class="modal-header">
|
||||
<h2>🖨️ Pengaturan Cetak</h2>
|
||||
<span class="close-btn" id="closePrintBtn">×</span>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<div class="form-group">
|
||||
<label>Jumlah Salinan (Copies)</label>
|
||||
<input type="number" id="printCopies" class="form-control" value="1" min="1" max="50">
|
||||
<p class="suggestion-text">Masukkan jumlah KTP yang ingin dicetak dalam satu halaman.</p>
|
||||
</div>
|
||||
<div class="form-group" style="margin-top: 1rem;">
|
||||
<label style="display: flex; align-items: center; gap: 0.5rem; cursor: pointer;">
|
||||
<input type="checkbox" id="printAutoArrange" checked>
|
||||
<span>Atur Otomatis (Hemat Kertas)</span>
|
||||
</label>
|
||||
<p class="suggestion-text">Otomatis menata gambar berderet untuk memaksimalkan ruang kertas A4.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="print-preview-info"
|
||||
style="margin-top: 1.5rem; padding: 1rem; background: var(--bg-primary); border-radius: var(--radius);">
|
||||
<p>ℹ️ <strong>Tips:</strong> Saat dialog print muncul:</p>
|
||||
<ul style="margin-left: 1.5rem; margin-top: 0.5rem; color: var(--text-secondary);">
|
||||
<li>Pilih Paper Size: <strong>A4</strong></li>
|
||||
<li>Margins: <strong>Minimum / None</strong></li>
|
||||
<li>Scale: <strong>100%</strong></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div style="margin-top: 2rem; display: flex; gap: 1rem;">
|
||||
<button id="cancelPrintBtn" class="action-btn secondary" style="flex: 1;">Batal</button>
|
||||
<button id="confirmPrintBtn" class="action-btn primary" style="flex: 1;">🖨️ Cetak
|
||||
Sekarang</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<footer>
|
||||
<p>OCR menggunakan <a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">PaddleOCR</a> • Data
|
||||
diproses secara lokal</p>
|
||||
<p style="margin-top: 0.5rem; font-size: 0.9em; opacity: 0.8;">© Copyright by Wartana</p>
|
||||
</footer>
|
||||
</div>
|
||||
|
||||
@@ -686,6 +730,20 @@
|
||||
return indexA - indexB;
|
||||
});
|
||||
|
||||
// Disable Save button initially (since it's auto-saved)
|
||||
const saveBtn = document.getElementById('saveBtn');
|
||||
if (result.saved_to_db) {
|
||||
saveBtn.disabled = true;
|
||||
saveBtn.textContent = '💾 Tersimpan';
|
||||
saveBtn.classList.add('secondary');
|
||||
saveBtn.classList.remove('primary');
|
||||
} else {
|
||||
saveBtn.disabled = false;
|
||||
saveBtn.textContent = '💾 Simpan';
|
||||
saveBtn.classList.remove('secondary');
|
||||
saveBtn.classList.add('primary');
|
||||
}
|
||||
|
||||
for (const key of sortedKeys) {
|
||||
const value = data[key];
|
||||
if (key === 'anggota_keluarga') {
|
||||
@@ -701,11 +759,24 @@
|
||||
}
|
||||
}
|
||||
|
||||
// Add change listener to all inputs to re-enable save button
|
||||
document.querySelectorAll('.editable-field').forEach(input => {
|
||||
input.addEventListener('input', enableSaveButton);
|
||||
});
|
||||
|
||||
rawText.textContent = result.raw_text;
|
||||
resultsSection.style.display = 'block';
|
||||
resultsSection.scrollIntoView({ behavior: 'smooth' });
|
||||
}
|
||||
|
||||
function enableSaveButton() {
|
||||
const saveBtn = document.getElementById('saveBtn');
|
||||
saveBtn.disabled = false;
|
||||
saveBtn.textContent = '💾 Simpan Perubahan';
|
||||
saveBtn.classList.remove('secondary');
|
||||
saveBtn.classList.add('primary');
|
||||
}
|
||||
|
||||
async function validateRegionData(data) {
|
||||
try {
|
||||
const response = await fetch('/api/validate-region', {
|
||||
@@ -1030,54 +1101,106 @@
|
||||
|
||||
// Print functionality
|
||||
const printBtn = document.getElementById('printBtn');
|
||||
const printSettingsModal = document.getElementById('printSettingsModal');
|
||||
const closePrintBtn = document.getElementById('closePrintBtn');
|
||||
const cancelPrintBtn = document.getElementById('cancelPrintBtn');
|
||||
const confirmPrintBtn = document.getElementById('confirmPrintBtn');
|
||||
const printCopiesInput = document.getElementById('printCopies');
|
||||
const printAutoArrange = document.getElementById('printAutoArrange');
|
||||
|
||||
// Open Print Settings
|
||||
printBtn.addEventListener('click', () => {
|
||||
const printArea = document.getElementById('printArea');
|
||||
console.log('Print button clicked');
|
||||
|
||||
// Determine source: preview image or crop canvas?
|
||||
// Validate if there is something to print
|
||||
const isPreviewVisible = preview.style.display !== 'none' && preview.getAttribute('src') !== '#' && preview.src;
|
||||
const isCanvasVisible = cropCanvas.style.display !== 'none';
|
||||
|
||||
if (!isPreviewVisible) {
|
||||
if (isCanvasVisible) {
|
||||
if (!confirm('Gambar belum diterapkan (Apply). Cetak tampilan canvas saat ini?')) return;
|
||||
// Use canvas data
|
||||
const img = new Image();
|
||||
img.src = cropCanvas.toDataURL('image/jpeg', 0.95);
|
||||
img.className = currentDocType === 'kk' ? 'a4-print-size' : 'ktp-print-size';
|
||||
printArea.innerHTML = '';
|
||||
printArea.appendChild(img);
|
||||
// Canvas data is instant, no onload needed usually, but to be safe:
|
||||
setTimeout(() => window.print(), 100);
|
||||
return;
|
||||
}
|
||||
alert('Tidak ada gambar KTP untuk dicetak! Silakan upload atau pilih dari arsip.');
|
||||
if (!isPreviewVisible && !isCanvasVisible) {
|
||||
alert('Tidak ada gambar untuk dicetak!');
|
||||
return;
|
||||
}
|
||||
|
||||
printArea.innerHTML = '';
|
||||
const img = new Image();
|
||||
// Use current preview src
|
||||
img.src = preview.src;
|
||||
img.className = currentDocType === 'kk' ? 'a4-print-size' : 'ktp-print-size';
|
||||
printArea.appendChild(img);
|
||||
// Defaults
|
||||
printCopiesInput.value = 1;
|
||||
printAutoArrange.checked = currentDocType === 'ktp'; // Default on for KTP
|
||||
printSettingsModal.style.display = 'block';
|
||||
});
|
||||
|
||||
// Robust print trigger
|
||||
img.onload = () => {
|
||||
// Short delay to ensure rendering
|
||||
setTimeout(() => window.print(), 100);
|
||||
};
|
||||
// Close Print Settings
|
||||
function closePrintModal() {
|
||||
printSettingsModal.style.display = 'none';
|
||||
}
|
||||
closePrintBtn.addEventListener('click', closePrintModal);
|
||||
cancelPrintBtn.addEventListener('click', closePrintModal);
|
||||
|
||||
// Fallback if image cached or instant
|
||||
if (img.complete) {
|
||||
img.onload();
|
||||
// Confirm Print
|
||||
confirmPrintBtn.addEventListener('click', () => {
|
||||
const printArea = document.getElementById('printArea');
|
||||
printArea.innerHTML = ''; // Clear previous
|
||||
|
||||
// Determine Source Image
|
||||
let imgSrc = '';
|
||||
let isCropCanvas = false;
|
||||
|
||||
// Prioritize Preview (Result/Archive), then Crop Canvas (Editing)
|
||||
if (preview.style.display !== 'none' && preview.getAttribute('src') !== '#' && preview.src) {
|
||||
imgSrc = preview.src;
|
||||
} else if (cropCanvas.style.display !== 'none') {
|
||||
// Use Canvas Data
|
||||
imgSrc = cropCanvas.toDataURL('image/jpeg', 0.95);
|
||||
isCropCanvas = true;
|
||||
}
|
||||
|
||||
// Error handling
|
||||
img.onerror = () => {
|
||||
alert('Gagal memuat gambar untuk dicetak.');
|
||||
};
|
||||
if (!imgSrc) {
|
||||
alert('Gagal mengambil gambar source.');
|
||||
closePrintModal();
|
||||
return;
|
||||
}
|
||||
|
||||
const copies = parseInt(printCopiesInput.value) || 1;
|
||||
|
||||
// KTP Logic: Duplicate N times
|
||||
if (currentDocType === 'ktp') {
|
||||
for (let i = 0; i < copies; i++) {
|
||||
const img = document.createElement('img');
|
||||
img.src = imgSrc;
|
||||
img.className = 'ktp-print-item';
|
||||
printArea.appendChild(img);
|
||||
}
|
||||
|
||||
// Apply Grid? CSS handles #printArea display: grid by default in @media print
|
||||
// We can force toggle if needed, but CSS is cleaner.
|
||||
// If User unchecks "Auto Arrange", we could change class to block?
|
||||
if (!printAutoArrange.checked) {
|
||||
printArea.style.display = 'block'; // Override grid
|
||||
// Add page breaks or margins?
|
||||
// For non-arranged, maybe just list them?
|
||||
// Let's stick to Grid as default default. If unchecked, maybe just normal flow?
|
||||
// Actually, user wants "Save Paper" (Grid) vs "One per page"?
|
||||
// Let's assume unchecked means "Standard Flow" which might just be grid anyway but maybe less aggressive?
|
||||
// For now, let's keep it simple: Grid is always active for KTP if multiple copies.
|
||||
// A simple way to respect "No Auto Arrange" is to force page break?
|
||||
// User request: "Jangan ditaruh di tengah... hemat kertas" -> Default Grid is the solution.
|
||||
} else {
|
||||
printArea.style.removeProperty('display'); // Use CSS default (grid)
|
||||
}
|
||||
|
||||
} else {
|
||||
// KK / A4 Logic
|
||||
// Usually 1 copy per page, or just N copies
|
||||
for (let i = 0; i < copies; i++) {
|
||||
const img = document.createElement('img');
|
||||
img.src = imgSrc;
|
||||
img.className = 'a4-print-size';
|
||||
printArea.appendChild(img);
|
||||
}
|
||||
}
|
||||
|
||||
closePrintModal();
|
||||
|
||||
// Wait a bit for images to render in hidden DOM
|
||||
setTimeout(() => {
|
||||
window.print();
|
||||
}, 300);
|
||||
});
|
||||
|
||||
// Download functionality
|
||||
@@ -1228,6 +1351,7 @@
|
||||
window.addEventListener('click', (e) => {
|
||||
if (e.target === archiveModal) archiveModal.style.display = 'none';
|
||||
if (e.target === loginModal) loginModal.style.display = 'none';
|
||||
if (e.target === printSettingsModal) printSettingsModal.style.display = 'none';
|
||||
});
|
||||
|
||||
async function loadArchive() {
|
||||
@@ -1345,8 +1469,8 @@
|
||||
window.location.reload();
|
||||
});
|
||||
</script>
|
||||
<!-- Print Area: Use visibility hidden/height 0 to ensure images load but are invisible on screen -->
|
||||
<div id="printArea" style="visibility: hidden; height: 0; overflow: hidden; position: absolute; z-index: -1;"></div>
|
||||
<!-- Print Area -->
|
||||
<div id="printArea"></div>
|
||||
|
||||
<script>
|
||||
// ... (this comment is just marker, main script is above)
|
||||
|
||||
|
Before Width: | Height: | Size: 204 KiB After Width: | Height: | Size: 87 KiB |