fix det table

tien_nemo
Commit ef142bf18d76262349adc984f42bb69a188ce9fb ef142bf1 1 parent 57efd5e3
Showing 1 changed file with 8 additions and 6 deletions
app/Services/OCR/extrac_table.py
--- a/app/Services/OCR/extrac_table.py
View file @ef142bf
+++ b/app/Services/OCR/extrac_table.py
View file @ef142bf
@@ -14,7 +14,7 @@ BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "
 PDF_NAME = 'nemo_new'
 # PDF path
-pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "2.pdf"
+pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "Iwasaki_1.pdf"
 # Output folder
 output_folder = Path(BASE_DIR) / "public" / "image"
@@ -81,9 +81,9 @@ def bolden_text(rgb_img: np.ndarray,
     threshold = 0.02
     text_ratio = estimate_text_ratio(gray, block_size=256)
     print(f"text_ratio={text_ratio:.3f} -> {'Mảnh' if text_ratio < threshold else 'Đậm'}")
-
+    debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg")
     if text_ratio > threshold:
-        return rgb_img
+        return rgb_img, debug_path
     # Dilation
     text_mask = 255 - binary
@@ -104,12 +104,12 @@ def bolden_text(rgb_img: np.ndarray,
     # Trả về RGB cho PaddleOCR
     out_rgb = cv2.cvtColor(out_gray, cv2.COLOR_GRAY2RGB)
-    debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg")
+
     cv2.imwrite(debug_path, cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR))
     print(f"[DEBUG] Preprocessed image saved to: {debug_path}")
-    return out_rgb
+    return out_rgb,debug_path
-preprocessed = bolden_text(
+preprocessed,debug_file = bolden_text(
     image_np,
     kernel_size=3,     # tăng lên 3 nếu chữ vẫn mảnh
     iterations=1,      # tăng lên 2 nếu muốn đậm hơn
@@ -143,6 +143,8 @@ for res in results:
         })
 # ==== Detect table ====
+if debug_file and os.path.exists(debug_file):
+    image_path = debug_file
 table_info = detect_tables(image_path)
 for table in table_info: