Showing
1 changed file
with
8 additions
and
6 deletions
| ... | @@ -14,7 +14,7 @@ BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", " | ... | @@ -14,7 +14,7 @@ BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", " |
| 14 | PDF_NAME = 'nemo_new' | 14 | PDF_NAME = 'nemo_new' |
| 15 | 15 | ||
| 16 | # PDF path | 16 | # PDF path |
| 17 | -pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "2.pdf" | 17 | +pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "Iwasaki_1.pdf" |
| 18 | # Output folder | 18 | # Output folder |
| 19 | output_folder = Path(BASE_DIR) / "public" / "image" | 19 | output_folder = Path(BASE_DIR) / "public" / "image" |
| 20 | 20 | ||
| ... | @@ -81,9 +81,9 @@ def bolden_text(rgb_img: np.ndarray, | ... | @@ -81,9 +81,9 @@ def bolden_text(rgb_img: np.ndarray, |
| 81 | threshold = 0.02 | 81 | threshold = 0.02 |
| 82 | text_ratio = estimate_text_ratio(gray, block_size=256) | 82 | text_ratio = estimate_text_ratio(gray, block_size=256) |
| 83 | print(f"text_ratio={text_ratio:.3f} -> {'Mảnh' if text_ratio < threshold else 'Đậm'}") | 83 | print(f"text_ratio={text_ratio:.3f} -> {'Mảnh' if text_ratio < threshold else 'Đậm'}") |
| 84 | - | 84 | + debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg") |
| 85 | if text_ratio > threshold: | 85 | if text_ratio > threshold: |
| 86 | - return rgb_img | 86 | + return rgb_img, debug_path |
| 87 | 87 | ||
| 88 | # Dilation | 88 | # Dilation |
| 89 | text_mask = 255 - binary | 89 | text_mask = 255 - binary |
| ... | @@ -104,12 +104,12 @@ def bolden_text(rgb_img: np.ndarray, | ... | @@ -104,12 +104,12 @@ def bolden_text(rgb_img: np.ndarray, |
| 104 | 104 | ||
| 105 | # Trả về RGB cho PaddleOCR | 105 | # Trả về RGB cho PaddleOCR |
| 106 | out_rgb = cv2.cvtColor(out_gray, cv2.COLOR_GRAY2RGB) | 106 | out_rgb = cv2.cvtColor(out_gray, cv2.COLOR_GRAY2RGB) |
| 107 | - debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg") | 107 | + |
| 108 | cv2.imwrite(debug_path, cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)) | 108 | cv2.imwrite(debug_path, cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)) |
| 109 | print(f"[DEBUG] Preprocessed image saved to: {debug_path}") | 109 | print(f"[DEBUG] Preprocessed image saved to: {debug_path}") |
| 110 | - return out_rgb | 110 | + return out_rgb,debug_path |
| 111 | 111 | ||
| 112 | -preprocessed = bolden_text( | 112 | +preprocessed,debug_file = bolden_text( |
| 113 | image_np, | 113 | image_np, |
| 114 | kernel_size=3, # tăng lên 3 nếu chữ vẫn mảnh | 114 | kernel_size=3, # tăng lên 3 nếu chữ vẫn mảnh |
| 115 | iterations=1, # tăng lên 2 nếu muốn đậm hơn | 115 | iterations=1, # tăng lên 2 nếu muốn đậm hơn |
| ... | @@ -143,6 +143,8 @@ for res in results: | ... | @@ -143,6 +143,8 @@ for res in results: |
| 143 | }) | 143 | }) |
| 144 | 144 | ||
| 145 | # ==== Detect table ==== | 145 | # ==== Detect table ==== |
| 146 | +if debug_file and os.path.exists(debug_file): | ||
| 147 | + image_path = debug_file | ||
| 146 | table_info = detect_tables(image_path) | 148 | table_info = detect_tables(image_path) |
| 147 | 149 | ||
| 148 | for table in table_info: | 150 | for table in table_info: | ... | ... |
-
Please register or sign in to post a comment