""" OCR Engine menggunakan PaddleOCR 3.x Untuk membaca teks dari gambar dokumen Indonesia (KTP, KK) """ from paddleocr import PaddleOCR import cv2 import numpy as np from PIL import Image class OCREngine: def __init__(self): """Inisialisasi PaddleOCR 3.x dengan konfigurasi untuk dokumen Indonesia""" self.ocr = PaddleOCR( use_doc_orientation_classify=True, # Deteksi rotasi (0°/90°/180°/270°) use_doc_unwarping=True, # Koreksi perspektif (trapezium → persegi) use_textline_orientation=True, # Orientasi per baris teks ) def preprocess_image(self, image_path: str) -> np.ndarray: """ Enhanced preprocessing untuk hasil OCR lebih baik Based on Context7 OpenCV documentation: - Resize jika terlalu besar - Denoising untuk mengurangi noise - CLAHE untuk adaptive histogram equalization - Sharpening untuk teks lebih jelas """ img = cv2.imread(image_path) if img is None: raise ValueError(f"Tidak dapat membaca gambar: {image_path}") # Resize jika terlalu besar (max 1500px - optimized for speed) max_dim = 1500 height, width = img.shape[:2] if max(height, width) > max_dim: scale = max_dim / max(height, width) img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA) # Convert ke grayscale untuk preprocessing gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Denoise (from Context7) - mengurangi noise tanpa blur teks denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21) # Enhanced CLAHE untuk dokumen (from Context7) # clipLimit lebih tinggi untuk kontras lebih baik clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) enhanced = clahe.apply(denoised) # Sharpen using kernel (from Context7) kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], dtype=np.float32) sharpened = cv2.filter2D(enhanced, -1, kernel) # Convert kembali ke BGR untuk PaddleOCR enhanced_bgr = cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR) return enhanced_bgr def extract_text(self, image_path: str, preprocess: bool = False) -> list: """ Ekstraksi teks dari gambar menggunakan PaddleOCR 3.x API Args: image_path: Path ke file gambar preprocess: Apakah melakukan preprocessing Returns: List of dict dengan keys: 'text', 'confidence', 'bbox' """ try: # Jalankan OCR dengan API baru (predict) result = self.ocr.predict(input=image_path) if not result: return [] extracted = [] # Parse hasil dari PaddleOCR 3.x for res in result: # Akses data dari result object if hasattr(res, 'rec_texts') and hasattr(res, 'rec_scores') and hasattr(res, 'dt_polys'): texts = res.rec_texts if res.rec_texts else [] scores = res.rec_scores if res.rec_scores else [] polys = res.dt_polys if res.dt_polys else [] for i, text in enumerate(texts): confidence = scores[i] if i < len(scores) else 0.0 bbox = polys[i].tolist() if i < len(polys) and hasattr(polys[i], 'tolist') else [] # Calculate center for sorting if bbox and len(bbox) >= 4: y_center = (bbox[0][1] + bbox[2][1]) / 2 x_center = (bbox[0][0] + bbox[2][0]) / 2 else: y_center = 0 x_center = 0 extracted.append({ 'text': text, 'confidence': float(confidence), 'bbox': bbox, 'y_center': y_center, 'x_center': x_center, }) # Fallback: try dict-like access elif hasattr(res, '__getitem__'): try: texts = res.get('rec_texts', res.get('texts', [])) scores = res.get('rec_scores', res.get('scores', [])) for i, text in enumerate(texts): confidence = scores[i] if i < len(scores) else 0.0 extracted.append({ 'text': text, 'confidence': float(confidence), 'bbox': [], 'y_center': i * 10, # Simple ordering fallback 'x_center': 0, }) except Exception: pass # Sort berdasarkan posisi Y (atas ke bawah) if extracted: extracted.sort(key=lambda x: (x['y_center'], x['x_center'])) return extracted except Exception as e: print(f"Error OCR: {e}") import traceback traceback.print_exc() return [] def get_raw_text(self, image_path: str) -> str: """ Mendapatkan semua teks dari gambar sebagai string """ results = self.extract_text(image_path) return '\n'.join([r['text'] for r in results]) # Singleton instance _ocr_engine = None def get_ocr_engine() -> OCREngine: """Get singleton OCR engine instance""" global _ocr_engine if _ocr_engine is None: _ocr_engine = OCREngine() return _ocr_engine if __name__ == "__main__": # Test OCR import sys if len(sys.argv) > 1: engine = get_ocr_engine() results = engine.extract_text(sys.argv[1]) for r in results: print(f"[{r['confidence']:.2f}] {r['text']}")