Files
local-ocr/ocr_engine.py
2025-12-31 01:38:01 +08:00

165 lines
6.2 KiB
Python

"""
OCR Engine menggunakan PaddleOCR 3.x
Untuk membaca teks dari gambar dokumen Indonesia (KTP, KK)
"""
from paddleocr import PaddleOCR
import cv2
import numpy as np
from PIL import Image
class OCREngine:
def __init__(self):
"""Inisialisasi PaddleOCR 3.x dengan konfigurasi untuk dokumen Indonesia"""
self.ocr = PaddleOCR(
use_doc_orientation_classify=True, # Deteksi rotasi (0°/90°/180°/270°)
use_doc_unwarping=True, # Koreksi perspektif (trapezium → persegi)
use_textline_orientation=True, # Orientasi per baris teks
)
def preprocess_image(self, image_path: str) -> np.ndarray:
"""
Enhanced preprocessing untuk hasil OCR lebih baik
Based on Context7 OpenCV documentation:
- Resize jika terlalu besar
- Denoising untuk mengurangi noise
- CLAHE untuk adaptive histogram equalization
- Sharpening untuk teks lebih jelas
"""
img = cv2.imread(image_path)
if img is None:
raise ValueError(f"Tidak dapat membaca gambar: {image_path}")
# Resize jika terlalu besar (max 1500px - optimized for speed)
max_dim = 1500
height, width = img.shape[:2]
if max(height, width) > max_dim:
scale = max_dim / max(height, width)
img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
# Convert ke grayscale untuk preprocessing
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Denoise (from Context7) - mengurangi noise tanpa blur teks
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
# Enhanced CLAHE untuk dokumen (from Context7)
# clipLimit lebih tinggi untuk kontras lebih baik
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
enhanced = clahe.apply(denoised)
# Sharpen using kernel (from Context7)
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], dtype=np.float32)
sharpened = cv2.filter2D(enhanced, -1, kernel)
# Convert kembali ke BGR untuk PaddleOCR
enhanced_bgr = cv2.cvtColor(sharpened, cv2.COLOR_GRAY2BGR)
return enhanced_bgr
def extract_text(self, image_path: str, preprocess: bool = False) -> list:
"""
Ekstraksi teks dari gambar menggunakan PaddleOCR 3.x API
Args:
image_path: Path ke file gambar
preprocess: Apakah melakukan preprocessing
Returns:
List of dict dengan keys: 'text', 'confidence', 'bbox'
"""
try:
# Jalankan OCR dengan API baru (predict)
result = self.ocr.predict(input=image_path)
if not result:
return []
extracted = []
# Parse hasil dari PaddleOCR 3.x
for res in result:
# Akses data dari result object
if hasattr(res, 'rec_texts') and hasattr(res, 'rec_scores') and hasattr(res, 'dt_polys'):
texts = res.rec_texts if res.rec_texts else []
scores = res.rec_scores if res.rec_scores else []
polys = res.dt_polys if res.dt_polys else []
for i, text in enumerate(texts):
confidence = scores[i] if i < len(scores) else 0.0
bbox = polys[i].tolist() if i < len(polys) and hasattr(polys[i], 'tolist') else []
# Calculate center for sorting
if bbox and len(bbox) >= 4:
y_center = (bbox[0][1] + bbox[2][1]) / 2
x_center = (bbox[0][0] + bbox[2][0]) / 2
else:
y_center = 0
x_center = 0
extracted.append({
'text': text,
'confidence': float(confidence),
'bbox': bbox,
'y_center': y_center,
'x_center': x_center,
})
# Fallback: try dict-like access
elif hasattr(res, '__getitem__'):
try:
texts = res.get('rec_texts', res.get('texts', []))
scores = res.get('rec_scores', res.get('scores', []))
for i, text in enumerate(texts):
confidence = scores[i] if i < len(scores) else 0.0
extracted.append({
'text': text,
'confidence': float(confidence),
'bbox': [],
'y_center': i * 10, # Simple ordering fallback
'x_center': 0,
})
except Exception:
pass
# Sort berdasarkan posisi Y (atas ke bawah)
if extracted:
extracted.sort(key=lambda x: (x['y_center'], x['x_center']))
return extracted
except Exception as e:
print(f"Error OCR: {e}")
import traceback
traceback.print_exc()
return []
def get_raw_text(self, image_path: str) -> str:
"""
Mendapatkan semua teks dari gambar sebagai string
"""
results = self.extract_text(image_path)
return '\n'.join([r['text'] for r in results])
# Singleton instance
_ocr_engine = None
def get_ocr_engine() -> OCREngine:
"""Get singleton OCR engine instance"""
global _ocr_engine
if _ocr_engine is None:
_ocr_engine = OCREngine()
return _ocr_engine
if __name__ == "__main__":
# Test OCR
import sys
if len(sys.argv) > 1:
engine = get_ocr_engine()
results = engine.extract_text(sys.argv[1])
for r in results:
print(f"[{r['confidence']:.2f}] {r['text']}")