test1

tien_nemo
Commit dc663513b1a4b35a4ac3d2cf2031a54bf7fa5e6c dc663513 1 parent cade3f2a
Showing 2 changed files with 156 additions and 0 deletions
app/Services/OCR/extrac_table.py
app/Services/OCR/table_detector.py
--- a/app/Services/OCR/extrac_table.py 0 → 100644
View file @dc66351
+++ b/app/Services/OCR/extrac_table.py 0 → 100644
View file @dc66351
+ from paddleocr import PaddleOCR
+ from pdf2image import convert_from_path
+ import os
+ import time
+ import numpy as np
+ import json
+ from pathlib import Path
+ import cv2
+ from table_detector import detect_tables
+ 
+ # ==== Config ====
+ BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
+ PDF_NAME = 'aaaa'
+ 
+ # PDF path
+ pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "fax.pdf"
+ # Output folder
+ output_folder = Path(BASE_DIR) / "public" / "image"
+ 
+ #PDF_NAME = pdf_path.stem  # Get the stem of the PDF file
+ #print(PDF_NAME)
+ 
+ os.makedirs(output_folder, exist_ok=True)
+ 
+ timestamp = int(time.time())
+ img_base_name = f"{PDF_NAME}_{timestamp}"
+ 
+ # ==== OCR Init ====
+ ocr = PaddleOCR(
+     use_doc_orientation_classify=False,
+     use_doc_unwarping=False,
+     use_textline_orientation=False
+ )
+ 
+ # ==== PDF to Image ====
+ pages = convert_from_path(pdf_path, first_page=1, last_page=1)
+ image_path = os.path.join(output_folder, f"{img_base_name}.jpg")
+ pages[0].save(image_path, "JPEG")
+ 
+ # ==== Run OCR ====
+ image_np = np.array(pages[0])
+ results = ocr.predict(image_np)
+ 
+ # ==== Convert polygon to bbox ====
+ def poly_to_bbox(poly):
+     xs = [p[0] for p in poly]
+     ys = [p[1] for p in poly]
+     return [int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys))]
+ 
+ # ==== Build ocrData ====
+ ocr_data_list = []
+ for res in results:
+     for text, poly in zip(res['rec_texts'], res['rec_polys']):
+         bbox = poly_to_bbox(poly)
+         ocr_data_list.append({
+             "text": text,
+             "bbox": bbox,
+             "field": "",
+             "hideBorder": False
+         })
+ 
+ # ==== Detect table ====
+ table_info = detect_tables(image_path)
+ 
+ # ==== Build JSON ====
+ final_json = {
+     "ocr_data": ocr_data_list,
+     "tables": table_info
+ }
+ 
+ 
+ # ==== Save JSON ====
+ json_path = os.path.join(output_folder, f"{PDF_NAME}_{timestamp}_with_table.json")
+ with open(json_path, "w", encoding="utf-8") as f:
+     json.dump(final_json, f, ensure_ascii=False, indent=2)
+ 
+ print(f"Saved OCR + Table JSON to: {json_path}")
--- a/app/Services/OCR/table_detector.py 0 → 100644
View file @dc66351
+++ b/app/Services/OCR/table_detector.py 0 → 100644
View file @dc66351
+ import cv2
+ import numpy as np
+ import os
+ 
+ def detect_tables(image_path):
+     img = cv2.imread(image_path)
+     if img is None:
+         raise FileNotFoundError(f"Không đọc được ảnh: {image_path}")
+ 
+     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+     blur = cv2.GaussianBlur(gray, (3, 3), 0)
+ 
+     # Edge detection
+     edges = cv2.Canny(blur, 50, 150, apertureSize=3)
+ 
+     # --- Horizontal lines ---
+     lines_h = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=120,
+                               minLineLength=int(img.shape[1] * 0.6), maxLineGap=20)
+     ys_candidates, line_segments = [], []
+     if lines_h is not None:
+         for l in lines_h:
+             x1, y1, x2, y2 = l[0]
+             if abs(y1 - y2) <= 3:  # ngang
+                 y_mid = int(round((y1 + y2) / 2))
+                 ys_candidates.append(y_mid)
+                 line_segments.append((x1, x2, y_mid))
+ 
+     # gom nhóm các y
+     ys, tol_y = [], 10
+     for y in sorted(ys_candidates):
+         if not ys or abs(y - ys[-1]) > tol_y:
+             ys.append(y)
+ 
+     total_rows = max(0, len(ys) - 1)
+ 
+     # --- Vertical lines ---
+     lines_v = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100,
+                               minLineLength=int(img.shape[0] * 0.5), maxLineGap=20)
+     xs = []
+     if lines_v is not None:
+         for l in lines_v:
+             x1, y1, x2, y2 = l[0]
+             if abs(x1 - x2) <= 3:
+                 xs.append(int(round((x1 + x2) / 2)))
+ 
+     # gom nhóm cột
+     x_pos, tol_v = [], 10
+     for v in sorted(xs):
+         if not x_pos or v - x_pos[-1] > tol_v:
+             x_pos.append(v)
+ 
+     total_cols = max(0, len(x_pos) - 1)
+ 
+     tables = []
+     if len(ys) >= 3 and line_segments:
+         y_min, y_max = ys[0], ys[-1]
+         min_x = min(seg[0] for seg in line_segments)
+         max_x = max(seg[1] for seg in line_segments)
+         table_box = (min_x, y_min, max_x, y_max)
+ 
+         rows = []
+         for i in range(len(ys) - 1):
+             row_box = (min_x, ys[i], max_x, ys[i+1])
+             rows.append({"row": tuple(int(v) for v in row_box)})
+             cv2.rectangle(img, (row_box[0], row_box[1]), (row_box[2], row_box[3]), (0, 255, 255), 2)
+ 
+         tables.append({
+             "total_rows": int(total_rows),
+             "total_cols": int(total_cols),
+             "table_box": tuple(int(v) for v in table_box),
+             "rows_box": rows
+         })
+ 
+         cv2.rectangle(img, (min_x, y_min), (max_x, y_max), (255, 0, 0), 3)
+ 
+     debug_path = os.path.splitext(image_path)[0] + "_debug.jpg"
+     cv2.imwrite(debug_path, img)
+ 
+     return tables