OCR dengan ZONA
This commit is contained in:
153
ocr_engine.py
Normal file
153
ocr_engine.py
Normal file
@@ -0,0 +1,153 @@
|
||||
"""
|
||||
OCR Engine menggunakan PaddleOCR 3.x
|
||||
Untuk membaca teks dari gambar dokumen Indonesia (KTP, KK)
|
||||
"""
|
||||
|
||||
from paddleocr import PaddleOCR
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class OCREngine:
|
||||
def __init__(self):
|
||||
"""Inisialisasi PaddleOCR 3.x dengan konfigurasi untuk dokumen Indonesia"""
|
||||
self.ocr = PaddleOCR(
|
||||
use_doc_orientation_classify=True, # Deteksi rotasi (0°/90°/180°/270°)
|
||||
use_doc_unwarping=True, # Koreksi perspektif (trapezium → persegi)
|
||||
use_textline_orientation=True, # Orientasi per baris teks
|
||||
)
|
||||
|
||||
def preprocess_image(self, image_path: str) -> np.ndarray:
|
||||
"""
|
||||
Preprocessing gambar untuk hasil OCR lebih baik
|
||||
- Resize jika terlalu besar
|
||||
- Enhance contrast
|
||||
"""
|
||||
img = cv2.imread(image_path)
|
||||
if img is None:
|
||||
raise ValueError(f"Tidak dapat membaca gambar: {image_path}")
|
||||
|
||||
# Resize jika terlalu besar (max 2000px)
|
||||
max_dim = 2000
|
||||
height, width = img.shape[:2]
|
||||
if max(height, width) > max_dim:
|
||||
scale = max_dim / max(height, width)
|
||||
img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
|
||||
|
||||
# Convert ke grayscale untuk preprocessing
|
||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Enhance contrast menggunakan CLAHE
|
||||
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
enhanced = clahe.apply(gray)
|
||||
|
||||
# Convert kembali ke BGR untuk PaddleOCR
|
||||
enhanced_bgr = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)
|
||||
|
||||
return enhanced_bgr
|
||||
|
||||
def extract_text(self, image_path: str, preprocess: bool = False) -> list:
|
||||
"""
|
||||
Ekstraksi teks dari gambar menggunakan PaddleOCR 3.x API
|
||||
|
||||
Args:
|
||||
image_path: Path ke file gambar
|
||||
preprocess: Apakah melakukan preprocessing
|
||||
|
||||
Returns:
|
||||
List of dict dengan keys: 'text', 'confidence', 'bbox'
|
||||
"""
|
||||
try:
|
||||
# Jalankan OCR dengan API baru (predict)
|
||||
result = self.ocr.predict(input=image_path)
|
||||
|
||||
if not result:
|
||||
return []
|
||||
|
||||
extracted = []
|
||||
|
||||
# Parse hasil dari PaddleOCR 3.x
|
||||
for res in result:
|
||||
# Akses data dari result object
|
||||
if hasattr(res, 'rec_texts') and hasattr(res, 'rec_scores') and hasattr(res, 'dt_polys'):
|
||||
texts = res.rec_texts if res.rec_texts else []
|
||||
scores = res.rec_scores if res.rec_scores else []
|
||||
polys = res.dt_polys if res.dt_polys else []
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
confidence = scores[i] if i < len(scores) else 0.0
|
||||
bbox = polys[i].tolist() if i < len(polys) and hasattr(polys[i], 'tolist') else []
|
||||
|
||||
# Calculate center for sorting
|
||||
if bbox and len(bbox) >= 4:
|
||||
y_center = (bbox[0][1] + bbox[2][1]) / 2
|
||||
x_center = (bbox[0][0] + bbox[2][0]) / 2
|
||||
else:
|
||||
y_center = 0
|
||||
x_center = 0
|
||||
|
||||
extracted.append({
|
||||
'text': text,
|
||||
'confidence': float(confidence),
|
||||
'bbox': bbox,
|
||||
'y_center': y_center,
|
||||
'x_center': x_center,
|
||||
})
|
||||
# Fallback: try dict-like access
|
||||
elif hasattr(res, '__getitem__'):
|
||||
try:
|
||||
texts = res.get('rec_texts', res.get('texts', []))
|
||||
scores = res.get('rec_scores', res.get('scores', []))
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
confidence = scores[i] if i < len(scores) else 0.0
|
||||
extracted.append({
|
||||
'text': text,
|
||||
'confidence': float(confidence),
|
||||
'bbox': [],
|
||||
'y_center': i * 10, # Simple ordering fallback
|
||||
'x_center': 0,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Sort berdasarkan posisi Y (atas ke bawah)
|
||||
if extracted:
|
||||
extracted.sort(key=lambda x: (x['y_center'], x['x_center']))
|
||||
|
||||
return extracted
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error OCR: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return []
|
||||
|
||||
def get_raw_text(self, image_path: str) -> str:
|
||||
"""
|
||||
Mendapatkan semua teks dari gambar sebagai string
|
||||
"""
|
||||
results = self.extract_text(image_path)
|
||||
return '\n'.join([r['text'] for r in results])
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_ocr_engine = None
|
||||
|
||||
def get_ocr_engine() -> OCREngine:
|
||||
"""Get singleton OCR engine instance"""
|
||||
global _ocr_engine
|
||||
if _ocr_engine is None:
|
||||
_ocr_engine = OCREngine()
|
||||
return _ocr_engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test OCR
|
||||
import sys
|
||||
if len(sys.argv) > 1:
|
||||
engine = get_ocr_engine()
|
||||
results = engine.extract_text(sys.argv[1])
|
||||
for r in results:
|
||||
print(f"[{r['confidence']:.2f}] {r['text']}")
|
||||
Reference in New Issue
Block a user