local-ocr/image_processor.py

"""
KTP Image Processor - Enhanced Version
Crop, resize, dan enhanced preprocessing untuk OCR yang lebih akurat

Standar e-KTP: 85.6mm x 53.98mm = 1011x638 px @300dpi

Improvements based on Context7 documentation:
- Pillow ImageEnhance for contrast/sharpness
- OpenCV CLAHE for adaptive histogram equalization
- Denoising for cleaner text detection
"""

import cv2
import numpy as np
import os
from PIL import Image, ImageEnhance, ImageFilter

KTP_WIDTH = 1011
KTP_HEIGHT = 638


def enhance_image_pil(image_path: str, output_path: str = None) -> str:
    """
    Enhance image using Pillow (from Context7 docs)
    - Contrast enhancement
    - Sharpness enhancement
    - Detail filter for text clarity

    Args:
        image_path: Path to input image
        output_path: Optional path to save enhanced image

    Returns:
        Path to enhanced image
    """
    try:
        img = Image.open(image_path)

        # Contrast enhancement (factor 1.3 from Context7)
        contrast = ImageEnhance.Contrast(img)
        img = contrast.enhance(1.3)

        # Sharpness enhancement
        sharpness = ImageEnhance.Sharpness(img)
        img = sharpness.enhance(1.2)

        # Apply detail filter for text clarity
        img = img.filter(ImageFilter.DETAIL)

        # Save
        if output_path is None:
            base, ext = os.path.splitext(image_path)
            output_path = f"{base}_enhanced.jpg"

        img.save(output_path, quality=95)
        print(f"  [ENHANCE] Pillow enhanced: {output_path}")

        return output_path

    except Exception as e:
        print(f"  [ENHANCE] Pillow error: {e}")
        return image_path  # Return original if enhancement fails


def enhance_image_cv(image: np.ndarray) -> np.ndarray:
    """
    Enhance image using OpenCV (from Context7 docs)
    - CLAHE for adaptive histogram equalization
    - Denoising
    - Sharpening using Laplacian kernel

    Args:
        image: OpenCV image (BGR)

    Returns:
        Enhanced image (BGR)
    """
    try:
        # Convert to grayscale for processing
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Denoise (from Context7)
        denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)

        # Enhanced CLAHE for documents
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
        enhanced = clahe.apply(denoised)

        # Sharpen using kernel (from Context7)
        kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], dtype=np.float32)
        sharpened = cv2.filter2D(enhanced, -1, kernel)

        # Convert back to BGR
        return cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR)

    except Exception as e:
        print(f"  [ENHANCE] OpenCV error: {e}")
        return image  # Return original if enhancement fails


def crop_by_ocr_bounds(image, ocr_results, padding=0.03):
    """Crop image based on OCR bounding boxes"""
    if not ocr_results:
        return image

    h, w = image.shape[:2]
    all_x = []
    all_y = []

    for r in ocr_results:
        box = r.get('box', [])
        if len(box) >= 4:
            try:
                for point in box:
                    if isinstance(point, (list, tuple)) and len(point) >= 2:
                        all_x.append(float(point[0]))
                        all_y.append(float(point[1]))
            except:
                continue

    if not all_x or not all_y:
        return image

    x1 = int(max(0, min(all_x) - w * padding))
    y1 = int(max(0, min(all_y) - h * padding))
    x2 = int(min(w, max(all_x) + w * padding))
    y2 = int(min(h, max(all_y) + h * padding))

    return image[y1:y2, x1:x2]


def normalize_ktp_image(image_path, output_path=None, ocr_results=None):
    """
    Normalisasi gambar KTP:
    1. Crop berdasarkan OCR bounds
    2. Ensure landscape
    3. Resize ke ukuran standar
    """
    try:
        image = cv2.imread(image_path)
        if image is None:
            return None, False, "Gagal membaca gambar"

        h, w = image.shape[:2]
        print(f"  [IMAGE] Original: {w}x{h}")

        # Crop
        if ocr_results:
            image = crop_by_ocr_bounds(image, ocr_results)
            h, w = image.shape[:2]
            print(f"  [IMAGE] Cropped: {w}x{h}")

        # Landscape
        if h > w:
            image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)

        # Resize
        resized = cv2.resize(image, (KTP_WIDTH, KTP_HEIGHT),
                            interpolation=cv2.INTER_LANCZOS4)

        # Save
        if output_path is None:
            base, ext = os.path.splitext(image_path)
            output_path = f"{base}_normalized.jpg"

        cv2.imwrite(output_path, resized, [cv2.IMWRITE_JPEG_QUALITY, 95])
        print(f"  [IMAGE] Saved: {output_path}")

        return output_path, True, f"Normalized to {KTP_WIDTH}x{KTP_HEIGHT}"

    except Exception as e:
        import traceback
        traceback.print_exc()
        return None, False, f"Error: {str(e)}"