diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0f10af7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +mysql-data/ +__pycache__/ +venv/ +.env +.gemini/ diff --git a/KK/5103040808220001.jpg b/KK/5103040808220001.jpg deleted file mode 100644 index 94c9f89..0000000 Binary files a/KK/5103040808220001.jpg and /dev/null differ diff --git a/KTP/3303080307040003.jpg b/KTP/3303080307040003.jpg index 10e7884..3bc121f 100644 Binary files a/KTP/3303080307040003.jpg and b/KTP/3303080307040003.jpg differ diff --git a/KTP/3518080907840001.jpg b/KTP/3518080907840001.jpg new file mode 100644 index 0000000..d2ba4dc Binary files /dev/null and b/KTP/3518080907840001.jpg differ diff --git a/KTP/3529245512000002.jpg b/KTP/3529245512000002.jpg deleted file mode 100644 index 302eaab..0000000 Binary files a/KTP/3529245512000002.jpg and /dev/null differ diff --git a/KTP/3671092111950003.jpg b/KTP/3671092111950003.jpg index 2487edd..2cae32f 100644 Binary files a/KTP/3671092111950003.jpg and b/KTP/3671092111950003.jpg differ diff --git a/KTP/5102045811690001.jpg b/KTP/5102045811690001.jpg index f355b82..fda09ae 100644 Binary files a/KTP/5102045811690001.jpg and b/KTP/5102045811690001.jpg differ diff --git a/KTP/5103022906800001.jpg b/KTP/5103022906800001.jpg index 3231655..262bcba 100644 Binary files a/KTP/5103022906800001.jpg and b/KTP/5103022906800001.jpg differ diff --git a/KTP/5171042004950004.jpg b/KTP/5171042004950004.jpg deleted file mode 100644 index d935a9e..0000000 Binary files a/KTP/5171042004950004.jpg and /dev/null differ diff --git a/KTP/7306046502850001.jpg b/KTP/7306046502850001.jpg deleted file mode 100644 index 07979a4..0000000 Binary files a/KTP/7306046502850001.jpg and /dev/null differ diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..94593cd --- /dev/null +++ b/TODO.md @@ -0,0 +1,2 @@ +Hasil extrak belum sempurna +otomatis simpan hasil proses extrak begitu selesai, jika setelah edit, tombol simpan di klik update hasil yg terdahulu \ No newline at end of file diff --git a/__pycache__/app.cpython-313.pyc b/__pycache__/app.cpython-313.pyc index 8170d49..f3679b3 100644 Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ diff --git a/__pycache__/ktp_extractor.cpython-313.pyc b/__pycache__/ktp_extractor.cpython-313.pyc index dd1d062..ee84574 100644 Binary files a/__pycache__/ktp_extractor.cpython-313.pyc and b/__pycache__/ktp_extractor.cpython-313.pyc differ diff --git a/docker-compose.yml b/docker-compose.yml index 19f66f2..deffa6d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: mysql: image: mysql:8.0 @@ -13,9 +11,5 @@ services: ports: - "3306:3306" volumes: - - mysql_data:/var/lib/mysql + - ./mysql-data:/var/lib/mysql command: --default-authentication-plugin=mysql_native_password - -volumes: - mysql_data: - driver: local diff --git a/ktp_extractor.py b/ktp_extractor.py index d330829..52c14e4 100644 --- a/ktp_extractor.py +++ b/ktp_extractor.py @@ -211,6 +211,12 @@ class KTPExtractor: text_upper = text.upper().strip() + # Explicit conflict prevention + if field_name == 'agama' and 'ALAMAT' in text_upper: + return False + if field_name == 'alamat' and 'AGAMA' in text_upper: + return False + # Coba exact match dulu (lebih cepat) for label in self.FIELD_LABELS[field_name]: if label in text_upper: @@ -229,12 +235,39 @@ class KTPExtractor: continue # Bandingkan dengan kata pertama ratio = difflib.SequenceMatcher(None, first_word, label_parts[0]).ratio() - if ratio >= cutoff: - print(f" [FUZZY LABEL] '{first_word}' matched '{label}' (ratio={ratio:.2f})") + + # Dynamic cutoff logic + effective_cutoff = cutoff + if len(first_word) < 7: + # Use stricter cutoff for short words to prevent ALAMAT (6) matching AGAMA (5) -> ratio 0.73 + effective_cutoff = max(cutoff, 0.82) + + if ratio >= effective_cutoff: + if DEBUG_MODE: + print(f" [FUZZY LABEL] '{first_word}' matched '{label}' (ratio={ratio:.2f})") return True return False + def _extract_after_label(self, text: str, label_pattern: str) -> Optional[str]: + """ + Extract content after a label (fuzzy/regex match). + Handles cases with/without colons. + """ + if not text: return None + + # 1. Try Regex Search if pattern provided + if label_pattern: + # Construct regex: Label + optional spaces/colon + (Group 1: Value) + # flags=re.IGNORECASE should be used + # We want to find the END of the label + match = re.search(f"({label_pattern})[:\\s]*", text, re.IGNORECASE) + if match: + # Return everything after the match end + return text[match.end():].strip() + + return None + def _parse_balinese_name(self, name: str) -> str: """ Parse nama Bali yang digabung OCR dan tambahkan spasi yang tepat. @@ -521,11 +554,21 @@ class KTPExtractor: # TTL from ttl zone if 'ttl' in zone_texts: for text in zone_texts['ttl']: - if 'tempat' in text.lower() or 'lahir' in text.lower(): + # Skip if text is JUST the label (length check or fuzzy match) + if len(text) < 15 and self._is_label_match(text, 'tempat_lahir'): + continue + + if 'tempat' in text.lower() or 'lahir' in text.lower() or 'tgl' in text.lower() or len(text) > 5: val = self._extract_value_from_text(text) if val: + # Don't accept if val looks like label + if self._is_label_match(val, 'tempat_lahir') and len(val) < 20: + continue + self._parse_ttl(val, result) - break + # Only break if we actually got a birth date, otherwise keep looking + if result['tanggal_lahir']: + break # JENIS KELAMIN if 'jenis_kelamin' in zone_texts: @@ -534,7 +577,7 @@ class KTPExtractor: if 'laki' in text_lower: result['jenis_kelamin'] = 'LAKI-LAKI' break - elif 'perempuan' in text_lower: + elif 'perempuan' in text_lower or 'wanita' in text_lower: result['jenis_kelamin'] = 'PEREMPUAN' break @@ -618,15 +661,36 @@ class KTPExtractor: # PEKERJAAN if 'pekerjaan' in zone_texts: + best_job = None + potential_job = None + for text in zone_texts['pekerjaan']: val = text.upper() if 'pekerjaan' in text.lower(): val = self._extract_value_from_text(text).upper() - # Check against list or take value - if len(val) > 3 and 'pekerjaan' not in val.lower(): - result['pekerjaan'] = val - break + # Clean up + val = val.strip() + if not val or len(val) < 3 or 'PEKERJAAN' in val: + continue + + # 1. Check against wildcard/list (Priority) + # Buruh, Karyawan, Pelajar, dll + if any(job.upper() in val for job in self.PEKERJAAN_LIST): + best_job = val + break # Found a definitive job + + # 2. Save as potential if it's NOT a known bad value (like City names) + # Avoid capturing 'TABANAN', 'JAKARTA', date strings + if not any(city in val for city in ['KABUPATEN', 'KOTA', 'TABANAN', 'BADUNG', 'DENPASAR', 'JAKARTA', 'BANDUNG']): + if not re.search(r'\d{2}-\d{2}-\d{4}', val): # Avoid dates + if potential_job is None: + potential_job = val + + if best_job: + result['pekerjaan'] = best_job + elif potential_job: + result['pekerjaan'] = potential_job # WNI if 'wni' in zone_texts: @@ -774,76 +838,144 @@ class KTPExtractor: result['kabupaten_kota'] = text.strip().upper() # ===== NAMA ===== - if 'nama' in text_lower and result['nama'] is None: + if result['nama'] is None and self._is_label_match(text, 'nama'): val = self._extract_after_label(text_normalized, 'nama') - if val and len(val) > 2: - result['nama'] = val.upper() - elif i + 1 < len(texts): - # Nama di line berikutnya - next_text = texts[i+1].strip() - if len(next_text) > 2 and not any(kw in next_text.lower() for kw in ['tempat', 'lahir', 'jenis']): - result['nama'] = next_text.upper() + current_name = "" + + if val: + current_name = val.upper() + + # Loop check baris berikutnya for Name (handle 2-3 lines) + offset = 1 + # Batasi maksimal 2 baris tambahan untuk Nama (total 3 baris) + while i + offset < len(texts) and offset <= 2: + next_text = texts[i+offset].strip() + next_lower = next_text.lower() + + is_stop = False + + # 1. Check Stop Keywords (Field Labels below Name) + # Stop if next line is Tempat Lahir, Jenis Kelamin, Alamat, etc. + stop_keywords = ['tempat', 'lahir', 'tgl', 'jenis', 'kelamin', 'alamat', 'rt/rw', 'nik'] + if any(kw in next_lower for kw in stop_keywords): + is_stop = True + print(f" [NAMA STOP] Matched stop keyword in '{next_text}'") + + # 2. Check Case Sensitivity (Heuristic) + if not is_stop: + letters = [c for c in next_text if c.isalpha()] + if letters: + upper_count = sum(1 for c in letters if c.isupper()) + upper_ratio = upper_count / len(letters) + # If mostly lowercase/title case, likely a label (e.g. "Tempat Lahir") + if upper_ratio < 0.4 and len(letters) > 3: + is_stop = True + print(f" [NAMA STOP] Likely Label based on Case (Ratio={upper_ratio:.2f})") + + if not is_stop: + if len(next_text) > 2: + print(f" [NAMA MERGE] Merging '{next_text}'") + if current_name: + current_name += " " + next_text.upper() + else: + current_name = next_text.upper() + offset += 1 + else: + print(f" [NAMA SKIP] Too short '{next_text}'") + # Kalau terlalu pendek (noise), boleh skip atau stop? + # Biasanya nama tidak putus jadi 1 huruf. Anggap stop utk aman, atau skip. + # Kita skip saja increment offset. + offset += 1 + else: + break + + if current_name: + # Fix Spacing Issues (e.g. BAGUSGEDE -> BAGUS GEDE) + current_name = re.sub(r'(BAGUS)(GEDE)', r'\1 \2', current_name) + current_name = re.sub(r'(ANAK)(AGUNG)', r'\1 \2', current_name) # Common issue + result['nama'] = current_name # ===== TEMPAT/TANGGAL LAHIR ===== - # Match "Tempat/Tgl Lahir" or "Tempat Lahir" or similar labels - if 'tempat' in text_lower or ('lahir' in text_lower and 'berlaku' not in text_lower): - if result['tempat_lahir'] is None or result['tanggal_lahir'] is None: - # Extract value after label using full-width or standard colon - ttl = self._extract_after_label(text_normalized, r'tempat[/\s]*tgl[/\s]*lahir|tempat[/\s]*lahir|lahir') - if ttl: - self._parse_ttl(ttl, result) - elif ':' in text or ':' in text: - # Value is after colon but _extract_after_label didn't catch it - parts = re.split(r'[::]', text, 1) - if len(parts) > 1 and parts[1].strip(): - self._parse_ttl(parts[1].strip(), result) - elif i + 1 < len(texts): - # TTL di line berikutnya - next_text = texts[i+1].strip() - if not any(kw in next_text.lower() for kw in ['jenis', 'kelamin', 'alamat', 'gol']): - self._parse_ttl(next_text, result) - - # ===== JENIS KELAMIN ===== - if any(kw in text_lower for kw in self.MALE_KEYWORDS): - if result['jenis_kelamin'] is None: - result['jenis_kelamin'] = 'LAKI-LAKI' - elif any(kw in text_lower for kw in self.FEMALE_KEYWORDS): - if result['jenis_kelamin'] is None: - result['jenis_kelamin'] = 'PEREMPUAN' - - # ===== GOLONGAN DARAH ===== - if 'darah' in text_lower or 'gol.' in text_lower: - # Try to find blood type on same line - gol_match = re.search(r'(?:gol|darah)[.\s::]*([ABO]{1,2}[+\-]?)', text, re.IGNORECASE) - if gol_match and result['gol_darah'] is None: - result['gol_darah'] = gol_match.group(1).upper() - elif result['gol_darah'] is None and i + 1 < len(texts): - # Blood type might be on next line (real KTP pattern) - next_text = texts[i+1].strip() - if re.match(r'^[ABO]{1,2}[+\-]?$', next_text, re.IGNORECASE): - result['gol_darah'] = next_text.upper() - # Standalone blood type (e.g., just "O" or "A+" on its own line) - if result['gol_darah'] is None: - if re.match(r'^[ABO]{1,2}[+\-]?$', text.strip(), re.IGNORECASE) and len(text.strip()) <= 3: - result['gol_darah'] = text.strip().upper() + # ... (starts around line 830 in original) ... + # (Skipping down to ALAMAT section for the replacement block) + # ... regex find ... + # ===== ALAMAT ===== (dengan fuzzy label matching) if result['alamat'] is None and self._is_label_match(text, 'alamat'): val = self._extract_after_label(text_normalized, r'a{1,2}l{0,2}a?m{0,2}a?t') + + # Logic multi-line + current_addr = "" if val: - result['alamat'] = val.upper() - elif i + 1 < len(texts): - # Ambil nilai dari baris berikutnya - next_text = texts[i+1].strip() - # Pastikan bukan label field lain - if len(next_text) > 2 and not self._is_label_match(next_text, 'rt_rw'): - result['alamat'] = next_text.upper() + current_addr = val.upper() + + # Loop check baris berikutnya (bisa ambil i+1, i+2, dst selama bukan label) + offset = 1 + while i + offset < len(texts): + next_text = texts[i+offset].strip() + print(f" [ALAMAT CHECK] Offset +{offset}: '{next_text}'") + + next_lower = next_text.lower() + is_stop = False + + # 1. Cek Pola RT/RW (angka/angka) -> Pasti STOP + if re.search(r'\d{3}\s*/\s*\d{3}', next_text) or re.match(r'^[.\-]+\s*/\s*[.\-]+$', next_text): + is_stop = True + print(" [ALAMAT STOP] Matched RT/RW pattern") + + # 2. Cek Keywords Label Pembatas + elif any(next_lower.startswith(prefix) for prefix in ['rt/', 'rw', 'rt/rw', 'kel', 'desa', 'kec', 'agama', 'status', 'kawin']): + is_stop = True + print(" [ALAMAT STOP] Matched label prefix") + + # 3. Cek Keywords Spesifik Full Word + elif any(kw in next_lower for kw in ['kelurahan', 'kecamatan', 'perkawinan', 'kewarganegaraan']): + is_stop = True + print(" [ALAMAT STOP] Matched distinct label word") + + # 4. Check Case Sensitivity + if not is_stop: + letters = [c for c in next_text if c.isalpha()] + if letters: + upper_count = sum(1 for c in letters if c.isupper()) + upper_ratio = upper_count / len(letters) + # Jika hampir semua huruf kecil/Title Case (ratio < 0.4), dicurigai sebagai Label + # Kecuali kata-kata pendek (< 5 chars) + if upper_ratio < 0.4 and len(letters) > 4: + is_stop = True + print(f" [ALAMAT STOP] Detected Title Case/Lowercase (Ratio={upper_ratio:.2f}) -> Likely Label") + + # Jika BUKAN pembatas, AMBIL sebagai lanjutan alamat + if not is_stop: + if len(next_text) > 1: + print(f" [ALAMAT MERGE] Merging '{next_text}'") + if current_addr: + current_addr += " " + next_text.upper() + else: + current_addr = next_text.upper() + offset += 1 # Lanjut cek baris berikutnya + else: + print(f" [ALAMAT SKIP] Line too short '{next_text}'") + offset += 1 # Skip noise, try next line? Or stop? usually skip noise is safer to continue + else: + print(f" [ALAMAT STOP] Hit Stop Condition '{next_text}'") + break # Stop loop + + if current_addr: + result['alamat'] = current_addr + + if current_addr: + result['alamat'] = current_addr # ===== RT/RW ===== - rt_rw_match = re.search(r'(\d{3})\s*/\s*(\d{3})', text) - if rt_rw_match: - result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}" - + # Relaxed pattern to handle -/- or 000/000 + if result['rt_rw'] is None: + rt_rw_match = re.search(r'(\d{1,3}|-)\s*/\s*(\d{1,3}|-)', text) + if rt_rw_match: + result['rt_rw'] = f"{rt_rw_match.group(1)}/{rt_rw_match.group(2)}" + print(f" [RT/RW] Found {result['rt_rw']}") + # ===== KELURAHAN/DESA ===== if ('kel' in text_lower or 'desa' in text_lower) and 'kelamin' not in text_lower: if result['kel_desa'] is None: @@ -853,6 +985,77 @@ class KTPExtractor: elif i + 1 < len(texts): result['kel_desa'] = texts[i+1].strip().upper() + # ===== TEMPAT/TANGGAL LAHIR ===== + # Gunakan _is_label_match untuk fleksibilitas (e.g. Tempat/Tgl Lahir, Tmpt Lahir) + if result['tempat_lahir'] is None and self._is_label_match(text, 'ttl'): + print(f" [TTL DEBUG] Matched Label on line {i}: '{text}'") + # Regex pattern yang SANGAT fleksibel untuk label TTL + # Menangani berbagai variasi: Tmpat/Tgl Lahir, Tempat. Tgl. Lahir, dll + # Intinya: T...mp...t L...hir + val = self._extract_after_label(text_normalized, r't[ea]m?p?a?t.*?l[a@]hi?r?|tgl.*?l[a@]hi?r?') + + # Jika val kosong, coba ambil dari baris berikutnya + if not val and i + 1 < len(texts): + next_text = texts[i+1].strip() + next_lower = next_text.lower() + stop_keywords = ['jenis', 'kelamin', 'alamat', 'gol', 'darah'] + if not any(kw in next_lower for kw in stop_keywords): + val = next_text.upper() + print(f" [TTL DEBUG] Took next line: '{val}'") + + if val: + print(f" [TTL DEBUG] Parsing value: '{val}'") + self._parse_ttl(val, result) + if result['tanggal_lahir']: + print(f" [TTL DEBUG] Success: {result['tanggal_lahir']}") + + # ===== JENIS KELAMIN ===== + if result['jenis_kelamin'] is None: + # 1. Coba cari Label dulu + if self._is_label_match(text, 'jenis_kelamin'): + val = self._extract_after_label(text_normalized, r'j[ea]ni?s\s*k[ea]l[a@]?mi?n') + if val: + if 'LAKI' in val.upper(): result['jenis_kelamin'] = 'LAKI-LAKI' + elif 'PEREMPUAN' in val.upper() or 'WANITA' in val.upper(): result['jenis_kelamin'] = 'PEREMPUAN' + + if result['jenis_kelamin'] is None and i + 1 < len(texts): + next_text = texts[i+1].upper() + if 'LAKI' in next_text: result['jenis_kelamin'] = 'LAKI-LAKI' + elif 'PEREMPUAN' in next_text or 'WANITA' in next_text: result['jenis_kelamin'] = 'PEREMPUAN' + + # 2. Fallback: Cari langsung keyword VALUES + if result['jenis_kelamin'] is None: + text_upper = text.upper() + if 'LAKI-LAKI' in text_upper or 'LAKI - LAKI' in text_upper: + result['jenis_kelamin'] = 'LAKI-LAKI' + elif 'PEREMPUAN' in text_upper: + result['jenis_kelamin'] = 'PEREMPUAN' + + # ===== GOLONGAN DARAH ===== + if result['gol_darah'] is None: + # Cek label + if self._is_label_match(text, 'gol_darah'): + val = self._extract_after_label(text_normalized, r'g?o?l\.?\s*d?a?r?a?h') + # Jika label ketemu tapi val kosong, mungkin nempel (Gol.Darah : O) + # atau ada di baris ini + if val: + gd_match = re.search(r'([ABO]{1,2}[+\-]?)', val) + if gd_match: + result['gol_darah'] = gd_match.group(1).upper() + else: + # Coba cari pattern gol darah di baris yang sama dengan label + gd_match = re.search(r'([ABO]{1,2}[+\-]?)', text.upper().replace('0','O')) + if gd_match: + result['gol_darah'] = gd_match.group(1).upper() + + # Cek next line jika baris ini cuma label "Gol Darah" + if result['gol_darah'] is None and self._is_label_match(text, 'gol_darah') and i+1 < len(texts): + next_text = texts[i+1].strip().upper() + if len(next_text) < 5: # Pendek, asumsi gol darah + gd_match = re.search(r'([ABO]{1,2}[+\-]?)', next_text) + if gd_match: + result['gol_darah'] = gd_match.group(1).upper() + # ===== KECAMATAN ===== if 'kecamatan' in text_lower or ('kec' in text_lower and 'kelamin' not in text_lower): if result['kecamatan'] is None: @@ -940,15 +1143,25 @@ class KTPExtractor: # ===== TANGGAL PENERBITAN (biasanya format DD-MM-YYYY di akhir) ===== # Look for date that is NOT tanggal lahir (different date) if result['tanggal_penerbitan'] is None: - # Match date format at end of text or standalone date - date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})$', text.strip()) - if date_match: - found_date = date_match.group(1) - # Make sure it's not the same as tanggal_lahir - if result['tanggal_lahir'] != found_date: - # Likely penerbitan if after berlaku_hingga was found - if result['berlaku_hingga'] or i > len(texts) * 0.7: - result['tanggal_penerbitan'] = found_date + # 1. Skip if contains Keywords of other date fields + # Jangan ambil jika ada kata 'LAHIR', 'TGL', 'BERLAKU', 'SEUMUR', 'HINGGA' + line_clean = text.lower() + if any(kw in line_clean for kw in ['lahir', 'lahlr', 'tgl', 'tempat', 'berlaku', 'seumur', 'hingga', 'hidup']): + pass # Skip + else: + # Match date format at end of text or standalone date + date_match = re.search(r'(\d{2}[-\s/]\d{2}[-\s/]\d{4})$', text.strip()) + if date_match: + found_date = date_match.group(1).replace(' ', '-') + # Make sure it's not the same as tanggal_lahir + if result['tanggal_lahir'] != found_date: + # Strict Position Check: MUST be in the bottom 30% of lines + # (Untuk menghindari salah ambil tanggal lahir yg mungkin gagal diparsing sbg TTL) + if i > len(texts) * 0.7: + result['tanggal_penerbitan'] = found_date + print(f" [TGL TERBIT] Found '{found_date}' at index {i}/{len(texts)}") + else: + print(f" [TGL TERBIT SKIP] Date '{found_date}' is too high ({i}/{len(texts)})") # ============================================ # AGGRESSIVE SCAN: Cari agama dari semua teks OCR @@ -1068,6 +1281,9 @@ class KTPExtractor: ttl_text = re.sub(r'(\d{2})[\s]+(\d{2})[-/](\d{4})', r'\1-\2-\3', ttl_text) ttl_text = re.sub(r'(\d{2})[-/](\d{2})[\s]+(\d{4})', r'\1-\2-\3', ttl_text) + # Handle "0508-1978" -> "05-08-1978" (Missing separator between day/month) + ttl_text = re.sub(r'(\d{2})(\d{2})[-/](\d{4})', r'\1-\2-\3', ttl_text) + # Handle 8-digit date without separator: "05081978" -> "05-08-1978" date_8digit = re.search(r'(\d{8})', ttl_text) if date_8digit: @@ -1077,13 +1293,15 @@ class KTPExtractor: # Handle merged city+date like "JAKARTA05-08-1978" - add space before digits ttl_text = re.sub(r'([A-Z])(\d{2}[-/])', r'\1 \2', ttl_text, flags=re.IGNORECASE) + # Handle merged city+date like "JAKARTA.05-08-1978" -> replace dot with space + ttl_text = re.sub(r'([A-Z])\.(\d)', r'\1 \2', ttl_text, flags=re.IGNORECASE) # Format: "TEMPAT, DD-MM-YYYY" atau "TEMPAT DD-MM-YYYY" date_match = re.search(r'(\d{2}[-/]\d{2}[-/]\d{4})', ttl_text) if date_match: result['tanggal_lahir'] = date_match.group(1) # Tempat adalah bagian sebelum tanggal - place = ttl_text[:date_match.start()].strip(' ,:-/') + place = ttl_text[:date_match.start()].strip(' ,:-/.') # Clean up label remnants place = re.sub(r'^(tempat|tgl|lahir|:|:)[/\s::]*', '', place, flags=re.IGNORECASE).strip() if place and len(place) > 2: diff --git a/static/style.css b/static/style.css index dcd86e0..28ed886 100644 --- a/static/style.css +++ b/static/style.css @@ -852,10 +852,9 @@ footer a:hover { font-size: 1.1rem; } +/* Print Styles */ /* Print Styles */ @media print { - - /* Reset Page */ @page { margin: 0; size: auto; @@ -865,72 +864,61 @@ footer a:hover { margin: 0; padding: 0; background: white !important; - /* Ensure no scroll or extra pages from hidden content */ - height: 100vh !important; - overflow: hidden !important; + height: auto !important; + overflow: visible !important; } - /* Hide EVERYTHING initially with high specificity */ - body * { - visibility: hidden !important; + /* Hide everything by default using display:none to collapse space */ + body> :not(#printArea) { display: none !important; - /* Force display none to remove layout space */ } - /* Show ONLY Print Area and its children */ - #printArea, + /* Show Print Area */ + #printArea { + display: grid !important; + visibility: visible !important; + } + #printArea * { visibility: visible !important; - display: flex !important; - /* Restore display for parent */ - } - - /* Reset display for children of printArea specifically */ - #printArea * { - display: block !important; - /* Default to block or whatever needed */ - } - - /* Specific fix for image inside */ - #printArea img { - display: inline-block !important; } + /* Print Area Layout */ #printArea { - position: fixed !important; - /* Fixed helps detach from flow */ + position: relative !important; left: 0 !important; top: 0 !important; width: 100% !important; - height: 100% !important; - overflow: visible !important; - z-index: 99999 !important; - background: white !important; + margin: 0 !important; + padding: 10px !important; - display: flex !important; - justify-content: center !important; - align-items: center !important; - padding: 0 !important; + display: grid !important; + grid-template-columns: repeat(auto-fill, 85.6mm) !important; + gap: 10px !important; + align-content: start !important; + justify-content: start !important; } - .ktp-print-size { - /* Standar ISO/IEC 7810 ID-1: 85.60 × 53.98 mm */ - width: 85.60mm !important; + /* KTP Item */ + .ktp-print-item { + width: 85.6mm !important; height: 53.98mm !important; - max-width: none !important; - max-height: none !important; - border: 1px dashed #ccc; - box-shadow: none !important; - /* Remove any shadow */ + object-fit: contain !important; + border: 1px dashed #999; + box-sizing: border-box !important; + break-inside: avoid !important; } + /* KK Item (Full Page) */ .a4-print-size { - /* A4 Landscape: 297mm x 210mm */ - /* Use slightly less to account for margins if necessary, but standard is distinct */ - width: 297mm !important; - height: 210mm !important; - max-width: none !important; - max-height: none !important; - border: none; + width: 100% !important; + height: auto !important; + display: block !important; + page-break-after: always !important; + } + + /* Ensure modal is hidden */ + .modal { + display: none !important; } } \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index e303d3c..39c8298 100644 --- a/templates/index.html +++ b/templates/index.html @@ -178,9 +178,53 @@ + + + + + @@ -686,6 +730,20 @@ return indexA - indexB; }); + // Disable Save button initially (since it's auto-saved) + const saveBtn = document.getElementById('saveBtn'); + if (result.saved_to_db) { + saveBtn.disabled = true; + saveBtn.textContent = '💾 Tersimpan'; + saveBtn.classList.add('secondary'); + saveBtn.classList.remove('primary'); + } else { + saveBtn.disabled = false; + saveBtn.textContent = '💾 Simpan'; + saveBtn.classList.remove('secondary'); + saveBtn.classList.add('primary'); + } + for (const key of sortedKeys) { const value = data[key]; if (key === 'anggota_keluarga') { @@ -701,11 +759,24 @@ } } + // Add change listener to all inputs to re-enable save button + document.querySelectorAll('.editable-field').forEach(input => { + input.addEventListener('input', enableSaveButton); + }); + rawText.textContent = result.raw_text; resultsSection.style.display = 'block'; resultsSection.scrollIntoView({ behavior: 'smooth' }); } + function enableSaveButton() { + const saveBtn = document.getElementById('saveBtn'); + saveBtn.disabled = false; + saveBtn.textContent = '💾 Simpan Perubahan'; + saveBtn.classList.remove('secondary'); + saveBtn.classList.add('primary'); + } + async function validateRegionData(data) { try { const response = await fetch('/api/validate-region', { @@ -1030,54 +1101,106 @@ // Print functionality const printBtn = document.getElementById('printBtn'); + const printSettingsModal = document.getElementById('printSettingsModal'); + const closePrintBtn = document.getElementById('closePrintBtn'); + const cancelPrintBtn = document.getElementById('cancelPrintBtn'); + const confirmPrintBtn = document.getElementById('confirmPrintBtn'); + const printCopiesInput = document.getElementById('printCopies'); + const printAutoArrange = document.getElementById('printAutoArrange'); + // Open Print Settings printBtn.addEventListener('click', () => { - const printArea = document.getElementById('printArea'); - console.log('Print button clicked'); - - // Determine source: preview image or crop canvas? + // Validate if there is something to print const isPreviewVisible = preview.style.display !== 'none' && preview.getAttribute('src') !== '#' && preview.src; const isCanvasVisible = cropCanvas.style.display !== 'none'; - if (!isPreviewVisible) { - if (isCanvasVisible) { - if (!confirm('Gambar belum diterapkan (Apply). Cetak tampilan canvas saat ini?')) return; - // Use canvas data - const img = new Image(); - img.src = cropCanvas.toDataURL('image/jpeg', 0.95); - img.className = currentDocType === 'kk' ? 'a4-print-size' : 'ktp-print-size'; - printArea.innerHTML = ''; - printArea.appendChild(img); - // Canvas data is instant, no onload needed usually, but to be safe: - setTimeout(() => window.print(), 100); - return; - } - alert('Tidak ada gambar KTP untuk dicetak! Silakan upload atau pilih dari arsip.'); + if (!isPreviewVisible && !isCanvasVisible) { + alert('Tidak ada gambar untuk dicetak!'); return; } - printArea.innerHTML = ''; - const img = new Image(); - // Use current preview src - img.src = preview.src; - img.className = currentDocType === 'kk' ? 'a4-print-size' : 'ktp-print-size'; - printArea.appendChild(img); + // Defaults + printCopiesInput.value = 1; + printAutoArrange.checked = currentDocType === 'ktp'; // Default on for KTP + printSettingsModal.style.display = 'block'; + }); - // Robust print trigger - img.onload = () => { - // Short delay to ensure rendering - setTimeout(() => window.print(), 100); - }; + // Close Print Settings + function closePrintModal() { + printSettingsModal.style.display = 'none'; + } + closePrintBtn.addEventListener('click', closePrintModal); + cancelPrintBtn.addEventListener('click', closePrintModal); - // Fallback if image cached or instant - if (img.complete) { - img.onload(); + // Confirm Print + confirmPrintBtn.addEventListener('click', () => { + const printArea = document.getElementById('printArea'); + printArea.innerHTML = ''; // Clear previous + + // Determine Source Image + let imgSrc = ''; + let isCropCanvas = false; + + // Prioritize Preview (Result/Archive), then Crop Canvas (Editing) + if (preview.style.display !== 'none' && preview.getAttribute('src') !== '#' && preview.src) { + imgSrc = preview.src; + } else if (cropCanvas.style.display !== 'none') { + // Use Canvas Data + imgSrc = cropCanvas.toDataURL('image/jpeg', 0.95); + isCropCanvas = true; } - // Error handling - img.onerror = () => { - alert('Gagal memuat gambar untuk dicetak.'); - }; + if (!imgSrc) { + alert('Gagal mengambil gambar source.'); + closePrintModal(); + return; + } + + const copies = parseInt(printCopiesInput.value) || 1; + + // KTP Logic: Duplicate N times + if (currentDocType === 'ktp') { + for (let i = 0; i < copies; i++) { + const img = document.createElement('img'); + img.src = imgSrc; + img.className = 'ktp-print-item'; + printArea.appendChild(img); + } + + // Apply Grid? CSS handles #printArea display: grid by default in @media print + // We can force toggle if needed, but CSS is cleaner. + // If User unchecks "Auto Arrange", we could change class to block? + if (!printAutoArrange.checked) { + printArea.style.display = 'block'; // Override grid + // Add page breaks or margins? + // For non-arranged, maybe just list them? + // Let's stick to Grid as default default. If unchecked, maybe just normal flow? + // Actually, user wants "Save Paper" (Grid) vs "One per page"? + // Let's assume unchecked means "Standard Flow" which might just be grid anyway but maybe less aggressive? + // For now, let's keep it simple: Grid is always active for KTP if multiple copies. + // A simple way to respect "No Auto Arrange" is to force page break? + // User request: "Jangan ditaruh di tengah... hemat kertas" -> Default Grid is the solution. + } else { + printArea.style.removeProperty('display'); // Use CSS default (grid) + } + + } else { + // KK / A4 Logic + // Usually 1 copy per page, or just N copies + for (let i = 0; i < copies; i++) { + const img = document.createElement('img'); + img.src = imgSrc; + img.className = 'a4-print-size'; + printArea.appendChild(img); + } + } + + closePrintModal(); + + // Wait a bit for images to render in hidden DOM + setTimeout(() => { + window.print(); + }, 300); }); // Download functionality @@ -1228,6 +1351,7 @@ window.addEventListener('click', (e) => { if (e.target === archiveModal) archiveModal.style.display = 'none'; if (e.target === loginModal) loginModal.style.display = 'none'; + if (e.target === printSettingsModal) printSettingsModal.style.display = 'none'; }); async function loadArchive() { @@ -1345,8 +1469,8 @@ window.location.reload(); }); - - + +