fix det table

tien_nemo
Commit ef142bf18d76262349adc984f42bb69a188ce9fb ef142bf1 1 parent 57efd5e3
Showing 1 changed file with 8 additions and 6 deletions
app/Services/OCR/extrac_table.py
--- a/app/Services/OCR/extrac_table.py
View file @ef142bf
+++ b/app/Services/OCR/extrac_table.py
View file @ef142bf
@@ -14,7 +14,7 @@ BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "
 PDF_NAME = 'nemo_new'
 
 # PDF path
- pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "2.pdf"
+ pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "Iwasaki_1.pdf"
 # Output folder
 output_folder = Path(BASE_DIR) / "public" / "image"
 
@@ -81,9 +81,9 @@ def bolden_text(rgb_img: np.ndarray,
     threshold = 0.02
     text_ratio = estimate_text_ratio(gray, block_size=256)
     print(f"text_ratio={text_ratio:.3f} -> {'Mảnh' if text_ratio < threshold else 'Đậm'}")
- 
+     debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg")
     if text_ratio > threshold:
-         return rgb_img
+         return rgb_img, debug_path
 
     # Dilation
     text_mask = 255 - binary
@@ -104,12 +104,12 @@ def bolden_text(rgb_img: np.ndarray,
 
     # Trả về RGB cho PaddleOCR
     out_rgb = cv2.cvtColor(out_gray, cv2.COLOR_GRAY2RGB)
-     debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg")
+ 
     cv2.imwrite(debug_path, cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR))
     print(f"[DEBUG] Preprocessed image saved to: {debug_path}")
-     return out_rgb
+     return out_rgb,debug_path
 
- preprocessed = bolden_text(
+ preprocessed,debug_file = bolden_text(
     image_np,
     kernel_size=3,     # tăng lên 3 nếu chữ vẫn mảnh
     iterations=1,      # tăng lên 2 nếu muốn đậm hơn
@@ -143,6 +143,8 @@ for res in results:
         })
 
 # ==== Detect table ====
+ if debug_file and os.path.exists(debug_file):
+     image_path = debug_file
 table_info = detect_tables(image_path)
 
 for table in table_info: