KK KTP arsip

2025-12-31 01:38:01 +08:00
parent 4fe381b3f0
commit 1de94bfeb4
28 changed files with 3296 additions and 191 deletions
--- a/ocr_engine.py
+++ b/ocr_engine.py
@@ -20,16 +20,19 @@ class OCREngine:
    
    def preprocess_image(self, image_path: str) -> np.ndarray:
        """
-        Preprocessing gambar untuk hasil OCR lebih baik
+        Enhanced preprocessing untuk hasil OCR lebih baik
+        Based on Context7 OpenCV documentation:
        - Resize jika terlalu besar
-        - Enhance contrast
+        - Denoising untuk mengurangi noise
+        - CLAHE untuk adaptive histogram equalization
+        - Sharpening untuk teks lebih jelas
        """
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError(f"Tidak dapat membaca gambar: {image_path}")
        
-        # Resize jika terlalu besar (max 2000px)
-        max_dim = 2000
+        # Resize jika terlalu besar (max 1500px - optimized for speed)
+        max_dim = 1500
        height, width = img.shape[:2]
        if max(height, width) > max_dim:
            scale = max_dim / max(height, width)
@@ -38,12 +41,20 @@ class OCREngine:
        # Convert ke grayscale untuk preprocessing
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
-        # Enhance contrast menggunakan CLAHE
-        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
-        enhanced = clahe.apply(gray)
+        # Denoise (from Context7) - mengurangi noise tanpa blur teks
+        denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
+        
+        # Enhanced CLAHE untuk dokumen (from Context7)
+        # clipLimit lebih tinggi untuk kontras lebih baik
+        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
+        enhanced = clahe.apply(denoised)
+        
+        # Sharpen using kernel (from Context7)
+        kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], dtype=np.float32)
+        sharpened = cv2.filter2D(enhanced, -1, kernel)
        
        # Convert kembali ke BGR untuk PaddleOCR
-        enhanced_bgr = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)
+        enhanced_bgr = cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR)
        
        return enhanced_bgr