Files
local-ocr/image_processor.py
2025-12-31 01:38:01 +08:00

175 lines
5.0 KiB
Python

"""
KTP Image Processor - Enhanced Version
Crop, resize, dan enhanced preprocessing untuk OCR yang lebih akurat
Standar e-KTP: 85.6mm x 53.98mm = 1011x638 px @300dpi
Improvements based on Context7 documentation:
- Pillow ImageEnhance for contrast/sharpness
- OpenCV CLAHE for adaptive histogram equalization
- Denoising for cleaner text detection
"""
import cv2
import numpy as np
import os
from PIL import Image, ImageEnhance, ImageFilter
KTP_WIDTH = 1011
KTP_HEIGHT = 638
def enhance_image_pil(image_path: str, output_path: str = None) -> str:
"""
Enhance image using Pillow (from Context7 docs)
- Contrast enhancement
- Sharpness enhancement
- Detail filter for text clarity
Args:
image_path: Path to input image
output_path: Optional path to save enhanced image
Returns:
Path to enhanced image
"""
try:
img = Image.open(image_path)
# Contrast enhancement (factor 1.3 from Context7)
contrast = ImageEnhance.Contrast(img)
img = contrast.enhance(1.3)
# Sharpness enhancement
sharpness = ImageEnhance.Sharpness(img)
img = sharpness.enhance(1.2)
# Apply detail filter for text clarity
img = img.filter(ImageFilter.DETAIL)
# Save
if output_path is None:
base, ext = os.path.splitext(image_path)
output_path = f"{base}_enhanced.jpg"
img.save(output_path, quality=95)
print(f" [ENHANCE] Pillow enhanced: {output_path}")
return output_path
except Exception as e:
print(f" [ENHANCE] Pillow error: {e}")
return image_path # Return original if enhancement fails
def enhance_image_cv(image: np.ndarray) -> np.ndarray:
"""
Enhance image using OpenCV (from Context7 docs)
- CLAHE for adaptive histogram equalization
- Denoising
- Sharpening using Laplacian kernel
Args:
image: OpenCV image (BGR)
Returns:
Enhanced image (BGR)
"""
try:
# Convert to grayscale for processing
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Denoise (from Context7)
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
# Enhanced CLAHE for documents
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
enhanced = clahe.apply(denoised)
# Sharpen using kernel (from Context7)
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], dtype=np.float32)
sharpened = cv2.filter2D(enhanced, -1, kernel)
# Convert back to BGR
return cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR)
except Exception as e:
print(f" [ENHANCE] OpenCV error: {e}")
return image # Return original if enhancement fails
def crop_by_ocr_bounds(image, ocr_results, padding=0.03):
"""Crop image based on OCR bounding boxes"""
if not ocr_results:
return image
h, w = image.shape[:2]
all_x = []
all_y = []
for r in ocr_results:
box = r.get('box', [])
if len(box) >= 4:
try:
for point in box:
if isinstance(point, (list, tuple)) and len(point) >= 2:
all_x.append(float(point[0]))
all_y.append(float(point[1]))
except:
continue
if not all_x or not all_y:
return image
x1 = int(max(0, min(all_x) - w * padding))
y1 = int(max(0, min(all_y) - h * padding))
x2 = int(min(w, max(all_x) + w * padding))
y2 = int(min(h, max(all_y) + h * padding))
return image[y1:y2, x1:x2]
def normalize_ktp_image(image_path, output_path=None, ocr_results=None):
"""
Normalisasi gambar KTP:
1. Crop berdasarkan OCR bounds
2. Ensure landscape
3. Resize ke ukuran standar
"""
try:
image = cv2.imread(image_path)
if image is None:
return None, False, "Gagal membaca gambar"
h, w = image.shape[:2]
print(f" [IMAGE] Original: {w}x{h}")
# Crop
if ocr_results:
image = crop_by_ocr_bounds(image, ocr_results)
h, w = image.shape[:2]
print(f" [IMAGE] Cropped: {w}x{h}")
# Landscape
if h > w:
image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
# Resize
resized = cv2.resize(image, (KTP_WIDTH, KTP_HEIGHT),
interpolation=cv2.INTER_LANCZOS4)
# Save
if output_path is None:
base, ext = os.path.splitext(image_path)
output_path = f"{base}_normalized.jpg"
cv2.imwrite(output_path, resized, [cv2.IMWRITE_JPEG_QUALITY, 95])
print(f" [IMAGE] Saved: {output_path}")
return output_path, True, f"Normalized to {KTP_WIDTH}x{KTP_HEIGHT}"
except Exception as e:
import traceback
traceback.print_exc()
return None, False, f"Error: {str(e)}"