165 lines
6.2 KiB
Python
165 lines
6.2 KiB
Python
"""
|
|
OCR Engine menggunakan PaddleOCR 3.x
|
|
Untuk membaca teks dari gambar dokumen Indonesia (KTP, KK)
|
|
"""
|
|
|
|
from paddleocr import PaddleOCR
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image
|
|
|
|
|
|
class OCREngine:
|
|
def __init__(self):
|
|
"""Inisialisasi PaddleOCR 3.x dengan konfigurasi untuk dokumen Indonesia"""
|
|
self.ocr = PaddleOCR(
|
|
use_doc_orientation_classify=True, # Deteksi rotasi (0°/90°/180°/270°)
|
|
use_doc_unwarping=True, # Koreksi perspektif (trapezium → persegi)
|
|
use_textline_orientation=True, # Orientasi per baris teks
|
|
)
|
|
|
|
def preprocess_image(self, image_path: str) -> np.ndarray:
|
|
"""
|
|
Enhanced preprocessing untuk hasil OCR lebih baik
|
|
Based on Context7 OpenCV documentation:
|
|
- Resize jika terlalu besar
|
|
- Denoising untuk mengurangi noise
|
|
- CLAHE untuk adaptive histogram equalization
|
|
- Sharpening untuk teks lebih jelas
|
|
"""
|
|
img = cv2.imread(image_path)
|
|
if img is None:
|
|
raise ValueError(f"Tidak dapat membaca gambar: {image_path}")
|
|
|
|
# Resize jika terlalu besar (max 1500px - optimized for speed)
|
|
max_dim = 1500
|
|
height, width = img.shape[:2]
|
|
if max(height, width) > max_dim:
|
|
scale = max_dim / max(height, width)
|
|
img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
|
|
|
|
# Convert ke grayscale untuk preprocessing
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
|
|
# Denoise (from Context7) - mengurangi noise tanpa blur teks
|
|
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
|
|
|
|
# Enhanced CLAHE untuk dokumen (from Context7)
|
|
# clipLimit lebih tinggi untuk kontras lebih baik
|
|
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
|
enhanced = clahe.apply(denoised)
|
|
|
|
# Sharpen using kernel (from Context7)
|
|
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], dtype=np.float32)
|
|
sharpened = cv2.filter2D(enhanced, -1, kernel)
|
|
|
|
# Convert kembali ke BGR untuk PaddleOCR
|
|
enhanced_bgr = cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR)
|
|
|
|
return enhanced_bgr
|
|
|
|
def extract_text(self, image_path: str, preprocess: bool = False) -> list:
|
|
"""
|
|
Ekstraksi teks dari gambar menggunakan PaddleOCR 3.x API
|
|
|
|
Args:
|
|
image_path: Path ke file gambar
|
|
preprocess: Apakah melakukan preprocessing
|
|
|
|
Returns:
|
|
List of dict dengan keys: 'text', 'confidence', 'bbox'
|
|
"""
|
|
try:
|
|
# Jalankan OCR dengan API baru (predict)
|
|
result = self.ocr.predict(input=image_path)
|
|
|
|
if not result:
|
|
return []
|
|
|
|
extracted = []
|
|
|
|
# Parse hasil dari PaddleOCR 3.x
|
|
for res in result:
|
|
# Akses data dari result object
|
|
if hasattr(res, 'rec_texts') and hasattr(res, 'rec_scores') and hasattr(res, 'dt_polys'):
|
|
texts = res.rec_texts if res.rec_texts else []
|
|
scores = res.rec_scores if res.rec_scores else []
|
|
polys = res.dt_polys if res.dt_polys else []
|
|
|
|
for i, text in enumerate(texts):
|
|
confidence = scores[i] if i < len(scores) else 0.0
|
|
bbox = polys[i].tolist() if i < len(polys) and hasattr(polys[i], 'tolist') else []
|
|
|
|
# Calculate center for sorting
|
|
if bbox and len(bbox) >= 4:
|
|
y_center = (bbox[0][1] + bbox[2][1]) / 2
|
|
x_center = (bbox[0][0] + bbox[2][0]) / 2
|
|
else:
|
|
y_center = 0
|
|
x_center = 0
|
|
|
|
extracted.append({
|
|
'text': text,
|
|
'confidence': float(confidence),
|
|
'bbox': bbox,
|
|
'y_center': y_center,
|
|
'x_center': x_center,
|
|
})
|
|
# Fallback: try dict-like access
|
|
elif hasattr(res, '__getitem__'):
|
|
try:
|
|
texts = res.get('rec_texts', res.get('texts', []))
|
|
scores = res.get('rec_scores', res.get('scores', []))
|
|
|
|
for i, text in enumerate(texts):
|
|
confidence = scores[i] if i < len(scores) else 0.0
|
|
extracted.append({
|
|
'text': text,
|
|
'confidence': float(confidence),
|
|
'bbox': [],
|
|
'y_center': i * 10, # Simple ordering fallback
|
|
'x_center': 0,
|
|
})
|
|
except Exception:
|
|
pass
|
|
|
|
# Sort berdasarkan posisi Y (atas ke bawah)
|
|
if extracted:
|
|
extracted.sort(key=lambda x: (x['y_center'], x['x_center']))
|
|
|
|
return extracted
|
|
|
|
except Exception as e:
|
|
print(f"Error OCR: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return []
|
|
|
|
def get_raw_text(self, image_path: str) -> str:
|
|
"""
|
|
Mendapatkan semua teks dari gambar sebagai string
|
|
"""
|
|
results = self.extract_text(image_path)
|
|
return '\n'.join([r['text'] for r in results])
|
|
|
|
|
|
# Singleton instance
|
|
_ocr_engine = None
|
|
|
|
def get_ocr_engine() -> OCREngine:
|
|
"""Get singleton OCR engine instance"""
|
|
global _ocr_engine
|
|
if _ocr_engine is None:
|
|
_ocr_engine = OCREngine()
|
|
return _ocr_engine
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Test OCR
|
|
import sys
|
|
if len(sys.argv) > 1:
|
|
engine = get_ocr_engine()
|
|
results = engine.extract_text(sys.argv[1])
|
|
for r in results:
|
|
print(f"[{r['confidence']:.2f}] {r['text']}")
|