feat: Add web UI for KTP OCR with modern dark theme, update extractor logic, and configure deployment.

2025-12-31 13:54:57 +08:00
parent 2f85088467
commit 108ff1c1fe
18 changed files with 504 additions and 173 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,5 @@
+mysql-data/
+__pycache__/
+venv/
+.env
+.gemini/
--- a/KK/5103040808220001.jpg
+++ b/KK/5103040808220001.jpg
--- a/KTP/3303080307040003.jpg
+++ b/KTP/3303080307040003.jpg
--- a/KTP/3518080907840001.jpg
+++ b/KTP/3518080907840001.jpg
--- a/KTP/3529245512000002.jpg
+++ b/KTP/3529245512000002.jpg
--- a/KTP/3671092111950003.jpg
+++ b/KTP/3671092111950003.jpg
--- a/KTP/5102045811690001.jpg
+++ b/KTP/5102045811690001.jpg
--- a/KTP/5103022906800001.jpg
+++ b/KTP/5103022906800001.jpg
--- a/KTP/5171042004950004.jpg
+++ b/KTP/5171042004950004.jpg
--- a/KTP/7306046502850001.jpg
+++ b/KTP/7306046502850001.jpg
--- a/TODO.md
+++ b/TODO.md
@@ -0,0 +1,2 @@
+Hasil extrak belum sempurna
+otomatis simpan hasil proses extrak begitu selesai, jika setelah edit, tombol simpan di klik update hasil yg terdahulu
--- a/pycache/app.cpython-313.pyc
+++ b/pycache/app.cpython-313.pyc
--- a/pycache/ktp_extractor.cpython-313.pyc
+++ b/pycache/ktp_extractor.cpython-313.pyc
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,5 +1,3 @@
-version: '3.8'
-
 services:
  mysql:
    image: mysql:8.0
@@ -13,9 +11,5 @@ services:
    ports:
      - "3306:3306"
    volumes:
-      - mysql_data:/var/lib/mysql
+      - ./mysql-data:/var/lib/mysql
    command: --default-authentication-plugin=mysql_native_password
-
-volumes:
-  mysql_data:
-    driver: local
--- a/ktp_extractor.py
+++ b/ktp_extractor.py
@@ -211,6 +211,12 @@ class KTPExtractor:
        
        text_upper = text.upper().strip()
        
+        # Explicit conflict prevention
+        if field_name == 'agama' and 'ALAMAT' in text_upper:
+            return False
+        if field_name == 'alamat' and 'AGAMA' in text_upper:
+            return False
+        
        # Coba exact match dulu (lebih cepat)
        for label in self.FIELD_LABELS[field_name]:
            if label in text_upper:
@@ -229,12 +235,39 @@ class KTPExtractor:
                continue
            # Bandingkan dengan kata pertama
            ratio = difflib.SequenceMatcher(None, first_word, label_parts[0]).ratio()
-            if ratio >= cutoff:
-                print(f"  [FUZZY LABEL] '{first_word}' matched '{label}' (ratio={ratio:.2f})")
+            
+            # Dynamic cutoff logic
+            effective_cutoff = cutoff
+            if len(first_word) < 7:
+                # Use stricter cutoff for short words to prevent ALAMAT (6) matching AGAMA (5) -> ratio 0.73
+                effective_cutoff = max(cutoff, 0.82)
+            
+            if ratio >= effective_cutoff:
+                if DEBUG_MODE:
+                    print(f"  [FUZZY LABEL] '{first_word}' matched '{label}' (ratio={ratio:.2f})")
                return True
        
        return False
    
+    def _extract_after_label(self, text: str, label_pattern: str) -> Optional[str]:
+        """
+        Extract content after a label (fuzzy/regex match).
+        Handles cases with/without colons.
+        """
+        if not text: return None
+        
+        # 1. Try Regex Search if pattern provided
+        if label_pattern:
+            # Construct regex: Label + optional spaces/colon + (Group 1: Value)
+            # flags=re.IGNORECASE should be used
+            # We want to find the END of the label
+            match = re.search(f"({label_pattern})[:\\s]*", text, re.IGNORECASE)
+            if match:
+                # Return everything after the match end
+                return text[match.end():].strip()
+        
+        return None
+
    def _parse_balinese_name(self, name: str) -> str:
        """
        Parse nama Bali yang digabung OCR dan tambahkan spasi yang tepat.
@@ -521,11 +554,21 @@ class KTPExtractor:
        # TTL from ttl zone
        if 'ttl' in zone_texts:
            for text in zone_texts['ttl']:
-                if 'tempat' in text.lower() or 'lahir' in text.lower():
+                # Skip if text is JUST the label (length check or fuzzy match)
+                if len(text) < 15 and self._is_label_match(text, 'tempat_lahir'):
+                     continue
+                
+                if 'tempat' in text.lower() or 'lahir' in text.lower() or 'tgl' in text.lower() or len(text) > 5:
                    val = self._extract_value_from_text(text)
                    if val:
+                        # Don't accept if val looks like label
+                        if self._is_label_match(val, 'tempat_lahir') and len(val) < 20: 
+                             continue
+                        
                        self._parse_ttl(val, result)
-                        break
+                        # Only break if we actually got a birth date, otherwise keep looking
+                        if result['tanggal_lahir']:
+                            break
        
        # JENIS KELAMIN
        if 'jenis_kelamin' in zone_texts:
@@ -534,7 +577,7 @@ class KTPExtractor:
                if 'laki' in text_lower:
                    result['jenis_kelamin'] = 'LAKI-LAKI'
                    break
-                elif 'perempuan' in text_lower:
+                elif 'perempuan' in text_lower or 'wanita' in text_lower:
                    result['jenis_kelamin'] = 'PEREMPUAN'
                    break
        
@@ -618,15 +661,36 @@ class KTPExtractor:

        # PEKERJAAN
        if 'pekerjaan' in zone_texts:
+            best_job = None
+            potential_job = None
+            
            for text in zone_texts['pekerjaan']:
                val = text.upper()
                if 'pekerjaan' in text.lower():
                    val = self._extract_value_from_text(text).upper()
                
-                # Check against list or take value
-                if len(val) > 3 and 'pekerjaan' not in val.lower():
-                    result['pekerjaan'] = val
-                    break
+                # Clean up
+                val = val.strip()
+                if not val or len(val) < 3 or 'PEKERJAAN' in val:
+                    continue
+
+                # 1. Check against wildcard/list (Priority)
+                # Buruh, Karyawan, Pelajar, dll
+                if any(job.upper() in val for job in self.PEKERJAAN_LIST):
+                    best_job = val
+                    break # Found a definitive job
+                
+                # 2. Save as potential if it's NOT a known bad value (like City names)
+                # Avoid capturing 'TABANAN', 'JAKARTA', date strings
+                if not any(city in val for city in ['KABUPATEN', 'KOTA', 'TABANAN', 'BADUNG', 'DENPASAR', 'JAKARTA', 'BANDUNG']):
+                     if not re.search(r'\d{2}-\d{2}-\d{4}', val): # Avoid dates
+                         if potential_job is None:
+                             potential_job = val
+            
+            if best_job:
+                result['pekerjaan'] = best_job
+            elif potential_job:
+                result['pekerjaan'] = potential_job

        # WNI
        if 'wni' in zone_texts:
@@ -774,76 +838,144 @@ class KTPExtractor:
                        result['kabupaten_kota'] = text.strip().upper()
            
            # ===== NAMA =====
-            if 'nama' in text_lower and result['nama'] is None:
+            if result['nama'] is None and self._is_label_match(text, 'nama'):
                val = self._extract_after_label(text_normalized, 'nama')
-                if val and len(val) > 2:
-                    result['nama'] = val.upper()
-                elif i + 1 < len(texts):
-                    # Nama di line berikutnya
-                    next_text = texts[i+1].strip()
-                    if len(next_text) > 2 and not any(kw in next_text.lower() for kw in ['tempat', 'lahir', 'jenis']):
-                        result['nama'] = next_text.upper()
+                current_name = ""
+                
+                if val:
+                    current_name = val.upper()
+                
+                # Loop check baris berikutnya for Name (handle 2-3 lines)
+                offset = 1
+                # Batasi maksimal 2 baris tambahan untuk Nama (total 3 baris)
+                while i + offset < len(texts) and offset <= 2:
+                    next_text = texts[i+offset].strip()
+                    next_lower = next_text.lower()
+                    
+                    is_stop = False
+                    
+                    # 1. Check Stop Keywords (Field Labels below Name)
+                    # Stop if next line is Tempat Lahir, Jenis Kelamin, Alamat, etc.
+                    stop_keywords = ['tempat', 'lahir', 'tgl', 'jenis', 'kelamin', 'alamat', 'rt/rw', 'nik']
+                    if any(kw in next_lower for kw in stop_keywords):
+                        is_stop = True
+                        print(f"  [NAMA STOP] Matched stop keyword in '{next_text}'")
+
+                    # 2. Check Case Sensitivity (Heuristic)
+                    if not is_stop:
+                        letters = [c for c in next_text if c.isalpha()]
+                        if letters:
+                            upper_count = sum(1 for c in letters if c.isupper())
+                            upper_ratio = upper_count / len(letters)
+                            # If mostly lowercase/title case, likely a label (e.g. "Tempat Lahir")
+                            if upper_ratio < 0.4 and len(letters) > 3:
+                                is_stop = True
+                                print(f"  [NAMA STOP] Likely Label based on Case (Ratio={upper_ratio:.2f})")
+
+                    if not is_stop:
+                        if len(next_text) > 2:
+                             print(f"  [NAMA MERGE] Merging '{next_text}'")
+                             if current_name:
+                                 current_name += " " + next_text.upper()
+                             else:
+                                 current_name = next_text.upper()
+                             offset += 1
+                        else:
+                             print(f"  [NAMA SKIP] Too short '{next_text}'")
+                             # Kalau terlalu pendek (noise), boleh skip atau stop? 
+                             # Biasanya nama tidak putus jadi 1 huruf. Anggap stop utk aman, atau skip.
+                             # Kita skip saja increment offset.
+                             offset += 1
+                    else:
+                        break
+
+                if current_name:
+                    # Fix Spacing Issues (e.g. BAGUSGEDE -> BAGUS GEDE)
+                    current_name = re.sub(r'(BAGUS)(GEDE)', r'\1 \2', current_name)
+                    current_name = re.sub(r'(ANAK)(AGUNG)', r'\1 \2', current_name) # Common issue
+                    result['nama'] = current_name
            
            # ===== TEMPAT/TANGGAL LAHIR =====
-            # Match "Tempat/Tgl Lahir" or "Tempat Lahir" or similar labels
-            if 'tempat' in text_lower or ('lahir' in text_lower and 'berlaku' not in text_lower):
-                if result['tempat_lahir'] is None or result['tanggal_lahir'] is None:
-                    # Extract value after label using full-width or standard colon
-                    ttl = self._extract_after_label(text_normalized, r'tempat[/\s]*tgl[/\s]*lahir|tempat[/\s]*lahir|lahir')
-                    if ttl:
-                        self._parse_ttl(ttl, result)
-                    elif '：' in text or ':' in text:
-                        # Value is after colon but _extract_after_label didn't catch it
-                        parts = re.split(r'[：:]', text, 1)
-                        if len(parts) > 1 and parts[1].strip():
-                            self._parse_ttl(parts[1].strip(), result)
-                    elif i + 1 < len(texts):
-                        # TTL di line berikutnya
-                        next_text = texts[i+1].strip()
-                        if not any(kw in next_text.lower() for kw in ['jenis', 'kelamin', 'alamat', 'gol']):
-                            self._parse_ttl(next_text, result)
-            
-            # ===== JENIS KELAMIN =====
-            if any(kw in text_lower for kw in self.MALE_KEYWORDS):
-                if result['jenis_kelamin'] is None:
-                    result['jenis_kelamin'] = 'LAKI-LAKI'
-            elif any(kw in text_lower for kw in self.FEMALE_KEYWORDS):
-                if result['jenis_kelamin'] is None:
-                    result['jenis_kelamin'] = 'PEREMPUAN'
-            
-            # ===== GOLONGAN DARAH =====
-            if 'darah' in text_lower or 'gol.' in text_lower:
-                # Try to find blood type on same line
-                gol_match = re.search(r'(?:gol|darah)[.\s:：]*([ABO]{1,2}[+\-]?)', text, re.IGNORECASE)
-                if gol_match and result['gol_darah'] is None:
-                    result['gol_darah'] = gol_match.group(1).upper()
-                elif result['gol_darah'] is None and i + 1 < len(texts):
-                    # Blood type might be on next line (real KTP pattern)
-                    next_text = texts[i+1].strip()
-                    if re.match(r'^[ABO]{1,2}[+\-]?$', next_text, re.IGNORECASE):
-                        result['gol_darah'] = next_text.upper()
-            # Standalone blood type (e.g., just "O" or "A+" on its own line)
-            if result['gol_darah'] is None:
-                if re.match(r'^[ABO]{1,2}[+\-]?$', text.strip(), re.IGNORECASE) and len(text.strip()) <= 3:
-                    result['gol_darah'] = text.strip().upper()
+            # ... (starts around line 830 in original) ...
            
+            # (Skipping down to ALAMAT section for the replacement block)
+            # ... regex find ...
+
            # ===== ALAMAT ===== (dengan fuzzy label matching)
            if result['alamat'] is None and self._is_label_match(text, 'alamat'):
                val = self._extract_after_label(text_normalized, r'a{1,2}l{0,2}a?m{0,2}a?t')
+                
+                # Logic multi-line
+                current_addr = ""
                if val:
-                    result['alamat'] = val.upper()
-                elif i + 1 < len(texts):
-                    # Ambil nilai dari baris berikutnya
-                    next_text = texts[i+1].strip()
-                    # Pastikan bukan label field lain
-                    if len(next_text) > 2 and not self._is_label_match(next_text, 'rt_rw'):
-                        result['alamat'] = next_text.upper()
+                    current_addr = val.upper()
+                
+                # Loop check baris berikutnya (bisa ambil i+1, i+2, dst selama bukan label)
+                offset = 1
+                while i + offset < len(texts):
+                    next_text = texts[i+offset].strip()
+                    print(f"  [ALAMAT CHECK] Offset +{offset}: '{next_text}'")
+                    
+                    next_lower = next_text.lower()
+                    is_stop = False
+                    
+                    # 1. Cek Pola RT/RW (angka/angka) -> Pasti STOP
+                    if re.search(r'\d{3}\s*/\s*\d{3}', next_text) or re.match(r'^[.\-]+\s*/\s*[.\-]+$', next_text):
+                         is_stop = True
+                         print("  [ALAMAT STOP] Matched RT/RW pattern")
+                    
+                    # 2. Cek Keywords Label Pembatas
+                    elif any(next_lower.startswith(prefix) for prefix in ['rt/', 'rw', 'rt/rw', 'kel', 'desa', 'kec', 'agama', 'status', 'kawin']):
+                         is_stop = True
+                         print("  [ALAMAT STOP] Matched label prefix")
+
+                    # 3. Cek Keywords Spesifik Full Word
+                    elif any(kw in next_lower for kw in ['kelurahan', 'kecamatan', 'perkawinan', 'kewarganegaraan']):
+                         is_stop = True
+                         print("  [ALAMAT STOP] Matched distinct label word")
+
+                    # 4. Check Case Sensitivity
+                    if not is_stop:
+                        letters = [c for c in next_text if c.isalpha()]
+                        if letters:
+                            upper_count = sum(1 for c in letters if c.isupper())
+                            upper_ratio = upper_count / len(letters)
+                            # Jika hampir semua huruf kecil/Title Case (ratio < 0.4), dicurigai sebagai Label
+                            # Kecuali kata-kata pendek (< 5 chars)
+                            if upper_ratio < 0.4 and len(letters) > 4:
+                                is_stop = True
+                                print(f"  [ALAMAT STOP] Detected Title Case/Lowercase (Ratio={upper_ratio:.2f}) -> Likely Label")
+
+                    # Jika BUKAN pembatas, AMBIL sebagai lanjutan alamat
+                    if not is_stop:
+                        if len(next_text) > 1:
+                            print(f"  [ALAMAT MERGE] Merging '{next_text}'")
+                            if current_addr:
+                                 current_addr += " " + next_text.upper()
+                            else:
+                                 current_addr = next_text.upper()
+                            offset += 1 # Lanjut cek baris berikutnya
+                        else:
+                             print(f"  [ALAMAT SKIP] Line too short '{next_text}'")
+                             offset += 1 # Skip noise, try next line? Or stop? usually skip noise is safer to continue
+                    else:
+                        print(f"  [ALAMAT STOP] Hit Stop Condition '{next_text}'")
+                        break # Stop loop
+
+                if current_addr:
+                    result['alamat'] = current_addr
+
+                if current_addr:
+                    result['alamat'] = current_addr
            
            # ===== RT/RW =====
-            rt_rw_match = re.search(r'(\d{3})\s*/\s*(\d{3})', text)
-            if rt_rw_match:
-                result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}"
-            
+            # Relaxed pattern to handle -/- or 000/000
+            if result['rt_rw'] is None:
+                rt_rw_match = re.search(r'(\d{1,3}|-)\s*/\s*(\d{1,3}|-)', text)
+                if rt_rw_match:
+                    result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}"
+                    print(f"  [RT/RW] Found {result['rt_rw']}")
+
            # ===== KELURAHAN/DESA =====
            if ('kel' in text_lower or 'desa' in text_lower) and 'kelamin' not in text_lower:
                if result['kel_desa'] is None:
@@ -853,6 +985,77 @@ class KTPExtractor:
                    elif i + 1 < len(texts):
                        result['kel_desa'] = texts[i+1].strip().upper()
            
+            # ===== TEMPAT/TANGGAL LAHIR =====
+            # Gunakan _is_label_match untuk fleksibilitas (e.g. Tempat/Tgl Lahir, Tmpt Lahir)
+            if result['tempat_lahir'] is None and self._is_label_match(text, 'ttl'):
+                print(f"  [TTL DEBUG] Matched Label on line {i}: '{text}'")
+                # Regex pattern yang SANGAT fleksibel untuk label TTL
+                # Menangani berbagai variasi: Tmpat/Tgl Lahir, Tempat. Tgl. Lahir, dll
+                # Intinya: T...mp...t <junk> L...hir
+                val = self._extract_after_label(text_normalized, r't[ea]m?p?a?t.*?l[a@]hi?r?|tgl.*?l[a@]hi?r?')
+                
+                # Jika val kosong, coba ambil dari baris berikutnya
+                if not val and i + 1 < len(texts):
+                    next_text = texts[i+1].strip()
+                    next_lower = next_text.lower()
+                    stop_keywords = ['jenis', 'kelamin', 'alamat', 'gol', 'darah']
+                    if not any(kw in next_lower for kw in stop_keywords):
+                        val = next_text.upper()
+                        print(f"  [TTL DEBUG] Took next line: '{val}'")
+                
+                if val:
+                    print(f"  [TTL DEBUG] Parsing value: '{val}'")
+                    self._parse_ttl(val, result)
+                    if result['tanggal_lahir']:
+                        print(f"  [TTL DEBUG] Success: {result['tanggal_lahir']}")
+
+            # ===== JENIS KELAMIN =====
+            if result['jenis_kelamin'] is None:
+                # 1. Coba cari Label dulu
+                if self._is_label_match(text, 'jenis_kelamin'):
+                    val = self._extract_after_label(text_normalized, r'j[ea]ni?s\s*k[ea]l[a@]?mi?n')
+                    if val:
+                        if 'LAKI' in val.upper(): result['jenis_kelamin'] = 'LAKI-LAKI'
+                        elif 'PEREMPUAN' in val.upper() or 'WANITA' in val.upper(): result['jenis_kelamin'] = 'PEREMPUAN'
+                    
+                    if result['jenis_kelamin'] is None and i + 1 < len(texts):
+                         next_text = texts[i+1].upper()
+                         if 'LAKI' in next_text: result['jenis_kelamin'] = 'LAKI-LAKI'
+                         elif 'PEREMPUAN' in next_text or 'WANITA' in next_text: result['jenis_kelamin'] = 'PEREMPUAN'
+                
+                # 2. Fallback: Cari langsung keyword VALUES
+                if result['jenis_kelamin'] is None:
+                    text_upper = text.upper()
+                    if 'LAKI-LAKI' in text_upper or 'LAKI - LAKI' in text_upper:
+                         result['jenis_kelamin'] = 'LAKI-LAKI'
+                    elif 'PEREMPUAN' in text_upper:
+                         result['jenis_kelamin'] = 'PEREMPUAN'
+
+            # ===== GOLONGAN DARAH =====
+            if result['gol_darah'] is None:
+                # Cek label
+                if self._is_label_match(text, 'gol_darah'):
+                     val = self._extract_after_label(text_normalized, r'g?o?l\.?\s*d?a?r?a?h')
+                     # Jika label ketemu tapi val kosong, mungkin nempel (Gol.Darah : O)
+                     # atau ada di baris ini
+                     if val:
+                         gd_match = re.search(r'([ABO]{1,2}[+\-]?)', val)
+                         if gd_match:
+                             result['gol_darah'] = gd_match.group(1).upper()
+                     else:
+                         # Coba cari pattern gol darah di baris yang sama dengan label
+                         gd_match = re.search(r'([ABO]{1,2}[+\-]?)', text.upper().replace('0','O'))
+                         if gd_match:
+                             result['gol_darah'] = gd_match.group(1).upper()
+                
+                # Cek next line jika baris ini cuma label "Gol Darah"
+                if result['gol_darah'] is None and self._is_label_match(text, 'gol_darah') and i+1 < len(texts):
+                    next_text = texts[i+1].strip().upper()
+                    if len(next_text) < 5: # Pendek, asumsi gol darah
+                        gd_match = re.search(r'([ABO]{1,2}[+\-]?)', next_text)
+                        if gd_match:
+                            result['gol_darah'] = gd_match.group(1).upper()
+            
            # ===== KECAMATAN =====
            if 'kecamatan' in text_lower or ('kec' in text_lower and 'kelamin' not in text_lower):
                if result['kecamatan'] is None:
@@ -940,15 +1143,25 @@ class KTPExtractor:
            # ===== TANGGAL PENERBITAN (biasanya format DD-MM-YYYY di akhir) =====
            # Look for date that is NOT tanggal lahir (different date)
            if result['tanggal_penerbitan'] is None:
-                # Match date format at end of text or standalone date
-                date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})$', text.strip())
-                if date_match:
-                    found_date = date_match.group(1)
-                    # Make sure it's not the same as tanggal_lahir
-                    if result['tanggal_lahir'] != found_date:
-                        # Likely penerbitan if after berlaku_hingga was found
-                        if result['berlaku_hingga'] or i > len(texts) * 0.7:
-                            result['tanggal_penerbitan'] = found_date
+                # 1. Skip if contains Keywords of other date fields
+                # Jangan ambil jika ada kata 'LAHIR', 'TGL', 'BERLAKU', 'SEUMUR', 'HINGGA'
+                line_clean = text.lower()
+                if any(kw in line_clean for kw in ['lahir', 'lahlr', 'tgl', 'tempat', 'berlaku', 'seumur', 'hingga', 'hidup']):
+                    pass # Skip
+                else:
+                    # Match date format at end of text or standalone date
+                    date_match = re.search(r'(\d{2}[-\s/]\d{2}[-\s/]\d{4})$', text.strip())
+                    if date_match:
+                        found_date = date_match.group(1).replace(' ', '-')
+                        # Make sure it's not the same as tanggal_lahir
+                        if result['tanggal_lahir'] != found_date:
+                            # Strict Position Check: MUST be in the bottom 30% of lines
+                            # (Untuk menghindari salah ambil tanggal lahir yg mungkin gagal diparsing sbg TTL)
+                            if i > len(texts) * 0.7:
+                                result['tanggal_penerbitan'] = found_date
+                                print(f"  [TGL TERBIT] Found '{found_date}' at index {i}/{len(texts)}")
+                            else:
+                                print(f"  [TGL TERBIT SKIP] Date '{found_date}' is too high ({i}/{len(texts)})")
        
        # ============================================
        # AGGRESSIVE SCAN: Cari agama dari semua teks OCR
@@ -1068,6 +1281,9 @@ class KTPExtractor:
        ttl_text = re.sub(r'(\d{2})[\s]+(\d{2})[-/](\d{4})', r'\1-\2-\3', ttl_text)
        ttl_text = re.sub(r'(\d{2})[-/](\d{2})[\s]+(\d{4})', r'\1-\2-\3', ttl_text)
        
+        # Handle "0508-1978" -> "05-08-1978" (Missing separator between day/month)
+        ttl_text = re.sub(r'(\d{2})(\d{2})[-/](\d{4})', r'\1-\2-\3', ttl_text)
+
        # Handle 8-digit date without separator: "05081978" -> "05-08-1978"
        date_8digit = re.search(r'(\d{8})', ttl_text)
        if date_8digit:
@@ -1077,13 +1293,15 @@ class KTPExtractor:
        
        # Handle merged city+date like "JAKARTA05-08-1978" - add space before digits
        ttl_text = re.sub(r'([A-Z])(\d{2}[-/])', r'\1 \2', ttl_text, flags=re.IGNORECASE)
+        # Handle merged city+date like "JAKARTA.05-08-1978" -> replace dot with space
+        ttl_text = re.sub(r'([A-Z])\.(\d)', r'\1 \2', ttl_text, flags=re.IGNORECASE)
        
        # Format: "TEMPAT, DD-MM-YYYY" atau "TEMPAT DD-MM-YYYY"
        date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})', ttl_text)
        if date_match:
            result['tanggal_lahir'] = date_match.group(1)
            # Tempat adalah bagian sebelum tanggal
-            place = ttl_text[:date_match.start()].strip(' ,:-/')
+            place = ttl_text[:date_match.start()].strip(' ,:-/.')
            # Clean up label remnants
            place = re.sub(r'^(tempat|tgl|lahir|：|:)[/\s:：]*', '', place, flags=re.IGNORECASE).strip()
            if place and len(place) > 2:
--- a/static/style.css
+++ b/static/style.css
@@ -852,10 +852,9 @@ footer a:hover {
    font-size: 1.1rem;
 }

+/* Print Styles */
 /* Print Styles */
@media print {
-
-    /* Reset Page */
    @page {
        margin: 0;
        size: auto;
@@ -865,72 +864,61 @@ footer a:hover {
        margin: 0;
        padding: 0;
        background: white !important;
-        /* Ensure no scroll or extra pages from hidden content */
-        height: 100vh !important;
-        overflow: hidden !important;
+        height: auto !important;
+        overflow: visible !important;
    }

-    /* Hide EVERYTHING initially with high specificity */
-    body * {
-        visibility: hidden !important;
+    /* Hide everything by default using display:none to collapse space */
+    body> :not(#printArea) {
        display: none !important;
-        /* Force display none to remove layout space */
    }

-    /* Show ONLY Print Area and its children */
-    #printArea,
+    /* Show Print Area */
+    #printArea {
+        display: grid !important;
+        visibility: visible !important;
+    }
+
    #printArea * {
        visibility: visible !important;
-        display: flex !important;
-        /* Restore display for parent */
-    }
-
-    /* Reset display for children of printArea specifically */
-    #printArea * {
-        display: block !important;
-        /* Default to block or whatever needed */
-    }
-
-    /* Specific fix for image inside */
-    #printArea img {
-        display: inline-block !important;
    }

+    /* Print Area Layout */
    #printArea {
-        position: fixed !important;
-        /* Fixed helps detach from flow */
+        position: relative !important;
        left: 0 !important;
        top: 0 !important;
        width: 100% !important;
-        height: 100% !important;
-        overflow: visible !important;
-        z-index: 99999 !important;
-        background: white !important;
+        margin: 0 !important;
+        padding: 10px !important;

-        display: flex !important;
-        justify-content: center !important;
-        align-items: center !important;
-        padding: 0 !important;
+        display: grid !important;
+        grid-template-columns: repeat(auto-fill, 85.6mm) !important;
+        gap: 10px !important;
+        align-content: start !important;
+        justify-content: start !important;
    }

-    .ktp-print-size {
-        /* Standar ISO/IEC 7810 ID-1: 85.60 × 53.98 mm */
-        width: 85.60mm !important;
+    /* KTP Item */
+    .ktp-print-item {
+        width: 85.6mm !important;
        height: 53.98mm !important;
-        max-width: none !important;
-        max-height: none !important;
-        border: 1px dashed #ccc;
-        box-shadow: none !important;
-        /* Remove any shadow */
+        object-fit: contain !important;
+        border: 1px dashed #999;
+        box-sizing: border-box !important;
+        break-inside: avoid !important;
    }

+    /* KK Item (Full Page) */
    .a4-print-size {
-        /* A4 Landscape: 297mm x 210mm */
-        /* Use slightly less to account for margins if necessary, but standard is distinct */
-        width: 297mm !important;
-        height: 210mm !important;
-        max-width: none !important;
-        max-height: none !important;
-        border: none;
+        width: 100% !important;
+        height: auto !important;
+        display: block !important;
+        page-break-after: always !important;
+    }
+
+    /* Ensure modal is hidden */
+    .modal {
+        display: none !important;
    }
 }
--- a/templates/index.html
+++ b/templates/index.html
@@ -178,9 +178,53 @@
            </div>
        </div>

+        <!-- Print Settings Modal -->
+        <div id="printSettingsModal" class="modal" style="display: none;">
+            <div class="modal-content" style="max-width: 500px;">
+                <div class="modal-header">
+                    <h2>🖨️ Pengaturan Cetak</h2>
+                    <span class="close-btn" id="closePrintBtn">&times;</span>
+                </div>
+                <div class="modal-body">
+                    <div class="form-group">
+                        <label>Jumlah Salinan (Copies)</label>
+                        <input type="number" id="printCopies" class="form-control" value="1" min="1" max="50">
+                        <p class="suggestion-text">Masukkan jumlah KTP yang ingin dicetak dalam satu halaman.</p>
+                    </div>
+                    <div class="form-group" style="margin-top: 1rem;">
+                        <label style="display: flex; align-items: center; gap: 0.5rem; cursor: pointer;">
+                            <input type="checkbox" id="printAutoArrange" checked>
+                            <span>Atur Otomatis (Hemat Kertas)</span>
+                        </label>
+                        <p class="suggestion-text">Otomatis menata gambar berderet untuk memaksimalkan ruang kertas A4.
+                        </p>
+                    </div>
+
+                    <div class="print-preview-info"
+                        style="margin-top: 1.5rem; padding: 1rem; background: var(--bg-primary); border-radius: var(--radius);">
+                        <p>ℹ️ <strong>Tips:</strong> Saat dialog print muncul:</p>
+                        <ul style="margin-left: 1.5rem; margin-top: 0.5rem; color: var(--text-secondary);">
+                            <li>Pilih Paper Size: <strong>A4</strong></li>
+                            <li>Margins: <strong>Minimum / None</strong></li>
+                            <li>Scale: <strong>100%</strong></li>
+                        </ul>
+                    </div>
+
+                    <div style="margin-top: 2rem; display: flex; gap: 1rem;">
+                        <button id="cancelPrintBtn" class="action-btn secondary" style="flex: 1;">Batal</button>
+                        <button id="confirmPrintBtn" class="action-btn primary" style="flex: 1;">🖨️ Cetak
+                            Sekarang</button>
+                    </div>
+                </div>
+            </div>
+        </div>
+
+
+
        <footer>
            <p>OCR menggunakan <a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">PaddleOCR</a> • Data
                diproses secara lokal</p>
+            <p style="margin-top: 0.5rem; font-size: 0.9em; opacity: 0.8;">&copy; Copyright by Wartana</p>
        </footer>
    </div>

@@ -686,6 +730,20 @@
                return indexA - indexB;
            });

+            // Disable Save button initially (since it's auto-saved)
+            const saveBtn = document.getElementById('saveBtn');
+            if (result.saved_to_db) {
+                saveBtn.disabled = true;
+                saveBtn.textContent = '💾 Tersimpan';
+                saveBtn.classList.add('secondary');
+                saveBtn.classList.remove('primary');
+            } else {
+                saveBtn.disabled = false;
+                saveBtn.textContent = '💾 Simpan';
+                saveBtn.classList.remove('secondary');
+                saveBtn.classList.add('primary');
+            }
+
            for (const key of sortedKeys) {
                const value = data[key];
                if (key === 'anggota_keluarga') {
@@ -701,11 +759,24 @@
                }
            }

+            // Add change listener to all inputs to re-enable save button
+            document.querySelectorAll('.editable-field').forEach(input => {
+                input.addEventListener('input', enableSaveButton);
+            });
+
            rawText.textContent = result.raw_text;
            resultsSection.style.display = 'block';
            resultsSection.scrollIntoView({ behavior: 'smooth' });
        }

+        function enableSaveButton() {
+            const saveBtn = document.getElementById('saveBtn');
+            saveBtn.disabled = false;
+            saveBtn.textContent = '💾 Simpan Perubahan';
+            saveBtn.classList.remove('secondary');
+            saveBtn.classList.add('primary');
+        }
+
        async function validateRegionData(data) {
            try {
                const response = await fetch('/api/validate-region', {
@@ -1030,54 +1101,106 @@

        // Print functionality
        const printBtn = document.getElementById('printBtn');
+        const printSettingsModal = document.getElementById('printSettingsModal');
+        const closePrintBtn = document.getElementById('closePrintBtn');
+        const cancelPrintBtn = document.getElementById('cancelPrintBtn');
+        const confirmPrintBtn = document.getElementById('confirmPrintBtn');
+        const printCopiesInput = document.getElementById('printCopies');
+        const printAutoArrange = document.getElementById('printAutoArrange');

+        // Open Print Settings
        printBtn.addEventListener('click', () => {
-            const printArea = document.getElementById('printArea');
-            console.log('Print button clicked');
-
-            // Determine source: preview image or crop canvas?
+            // Validate if there is something to print
            const isPreviewVisible = preview.style.display !== 'none' && preview.getAttribute('src') !== '#' && preview.src;
            const isCanvasVisible = cropCanvas.style.display !== 'none';

-            if (!isPreviewVisible) {
-                if (isCanvasVisible) {
-                    if (!confirm('Gambar belum diterapkan (Apply). Cetak tampilan canvas saat ini?')) return;
-                    // Use canvas data
-                    const img = new Image();
-                    img.src = cropCanvas.toDataURL('image/jpeg', 0.95);
-                    img.className = currentDocType === 'kk' ? 'a4-print-size' : 'ktp-print-size';
-                    printArea.innerHTML = '';
-                    printArea.appendChild(img);
-                    // Canvas data is instant, no onload needed usually, but to be safe:
-                    setTimeout(() => window.print(), 100);
-                    return;
-                }
-                alert('Tidak ada gambar KTP untuk dicetak! Silakan upload atau pilih dari arsip.');
+            if (!isPreviewVisible && !isCanvasVisible) {
+                alert('Tidak ada gambar untuk dicetak!');
                return;
            }

-            printArea.innerHTML = '';
-            const img = new Image();
-            // Use current preview src
-            img.src = preview.src;
-            img.className = currentDocType === 'kk' ? 'a4-print-size' : 'ktp-print-size';
-            printArea.appendChild(img);
+            // Defaults
+            printCopiesInput.value = 1;
+            printAutoArrange.checked = currentDocType === 'ktp'; // Default on for KTP
+            printSettingsModal.style.display = 'block';
+        });

-            // Robust print trigger
-            img.onload = () => {
-                // Short delay to ensure rendering
-                setTimeout(() => window.print(), 100);
-            };
+        // Close Print Settings
+        function closePrintModal() {
+            printSettingsModal.style.display = 'none';
+        }
+        closePrintBtn.addEventListener('click', closePrintModal);
+        cancelPrintBtn.addEventListener('click', closePrintModal);

-            // Fallback if image cached or instant
-            if (img.complete) {
-                img.onload();
+        // Confirm Print
+        confirmPrintBtn.addEventListener('click', () => {
+            const printArea = document.getElementById('printArea');
+            printArea.innerHTML = ''; // Clear previous
+
+            // Determine Source Image
+            let imgSrc = '';
+            let isCropCanvas = false;
+
+            // Prioritize Preview (Result/Archive), then Crop Canvas (Editing)
+            if (preview.style.display !== 'none' && preview.getAttribute('src') !== '#' && preview.src) {
+                imgSrc = preview.src;
+            } else if (cropCanvas.style.display !== 'none') {
+                // Use Canvas Data
+                imgSrc = cropCanvas.toDataURL('image/jpeg', 0.95);
+                isCropCanvas = true;
            }

-            // Error handling
-            img.onerror = () => {
-                alert('Gagal memuat gambar untuk dicetak.');
-            };
+            if (!imgSrc) {
+                alert('Gagal mengambil gambar source.');
+                closePrintModal();
+                return;
+            }
+
+            const copies = parseInt(printCopiesInput.value) || 1;
+
+            // KTP Logic: Duplicate N times
+            if (currentDocType === 'ktp') {
+                for (let i = 0; i < copies; i++) {
+                    const img = document.createElement('img');
+                    img.src = imgSrc;
+                    img.className = 'ktp-print-item';
+                    printArea.appendChild(img);
+                }
+
+                // Apply Grid? CSS handles #printArea display: grid by default in @media print
+                // We can force toggle if needed, but CSS is cleaner.
+                // If User unchecks "Auto Arrange", we could change class to block?
+                if (!printAutoArrange.checked) {
+                    printArea.style.display = 'block'; // Override grid
+                    // Add page breaks or margins? 
+                    // For non-arranged, maybe just list them? 
+                    // Let's stick to Grid as default default. If unchecked, maybe just normal flow?
+                    // Actually, user wants "Save Paper" (Grid) vs "One per page"?
+                    // Let's assume unchecked means "Standard Flow" which might just be grid anyway but maybe less aggressive?
+                    // For now, let's keep it simple: Grid is always active for KTP if multiple copies.
+                    // A simple way to respect "No Auto Arrange" is to force page break?
+                    // User request: "Jangan ditaruh di tengah... hemat kertas" -> Default Grid is the solution.
+                } else {
+                    printArea.style.removeProperty('display'); // Use CSS default (grid)
+                }
+
+            } else {
+                // KK / A4 Logic
+                // Usually 1 copy per page, or just N copies
+                for (let i = 0; i < copies; i++) {
+                    const img = document.createElement('img');
+                    img.src = imgSrc;
+                    img.className = 'a4-print-size';
+                    printArea.appendChild(img);
+                }
+            }
+
+            closePrintModal();
+
+            // Wait a bit for images to render in hidden DOM
+            setTimeout(() => {
+                window.print();
+            }, 300);
        });

        // Download functionality
@@ -1228,6 +1351,7 @@
        window.addEventListener('click', (e) => {
            if (e.target === archiveModal) archiveModal.style.display = 'none';
            if (e.target === loginModal) loginModal.style.display = 'none';
+            if (e.target === printSettingsModal) printSettingsModal.style.display = 'none';
        });

        async function loadArchive() {
@@ -1345,8 +1469,8 @@
            window.location.reload();
        });
    </script>
-    <!-- Print Area: Use visibility hidden/height 0 to ensure images load but are invisible on screen -->
-    <div id="printArea" style="visibility: hidden; height: 0; overflow: hidden; position: absolute; z-index: -1;"></div>
+    <!-- Print Area -->
+    <div id="printArea"></div>

    <script>
        // ... (this comment is just marker, main script is above)
--- a/uploads/temp_transformed_rotated_temp.jpg
+++ b/uploads/temp_transformed_rotated_temp.jpg