Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Satini_pvduc
/
ocrpdf
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
Authored by
tien_nemo
2025-09-04 15:43:06 +0700
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
Commit
228013298caf59cb60db3d42f61f404815fd9533
22801329
1 parent
1d84b308
nếu chữ mảnh tăng đậm hơn
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
86 additions
and
4 deletions
app/Services/OCR/extrac_table.py
app/Services/OCR/extrac_table.py
View file @
2280132
...
...
@@ -7,17 +7,18 @@ import json
from
pathlib
import
Path
import
cv2
from
table_detector
import
detect_tables
from
PIL
import
Image
,
ImageEnhance
# ==== Config ====
BASE_DIR
=
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
,
".."
,
".."
))
PDF_NAME
=
'aaaa'
#
PDF_NAME = 'aaaa'
# PDF path
pdf_path
=
Path
(
BASE_DIR
)
/
"storage"
/
"pdf"
/
"
fax
.pdf"
pdf_path
=
Path
(
BASE_DIR
)
/
"storage"
/
"pdf"
/
"
Iwasaki_1
.pdf"
# Output folder
output_folder
=
Path
(
BASE_DIR
)
/
"public"
/
"image"
#
PDF_NAME = pdf_path.stem # Get the stem of the PDF file
PDF_NAME
=
pdf_path
.
stem
# Get the stem of the PDF file
#print(PDF_NAME)
os
.
makedirs
(
output_folder
,
exist_ok
=
True
)
...
...
@@ -39,8 +40,89 @@ pages[0].save(image_path, "JPEG")
# ==== Run OCR ====
image_np
=
np
.
array
(
pages
[
0
])
results
=
ocr
.
predict
(
image_np
)
def
estimate_text_ratio
(
gray
,
block_size
=
256
):
"""Tính median text_ratio theo block nhỏ"""
h
,
w
=
gray
.
shape
ratios
=
[]
for
y
in
range
(
0
,
h
,
block_size
):
for
x
in
range
(
0
,
w
,
block_size
):
block
=
gray
[
y
:
y
+
block_size
,
x
:
x
+
block_size
]
if
block
.
size
==
0
:
continue
_
,
binary
=
cv2
.
threshold
(
block
,
0
,
255
,
cv2
.
THRESH_BINARY
+
cv2
.
THRESH_OTSU
)
text_mask
=
255
-
binary
ratio
=
np
.
sum
(
text_mask
>
0
)
/
text_mask
.
size
ratios
.
append
(
ratio
)
if
len
(
ratios
)
==
0
:
return
0.0
return
np
.
median
(
ratios
)
# trung vị để tránh bị outlier
def
bolden_text
(
rgb_img
:
np
.
ndarray
,
kernel_size
:
int
=
3
,
iterations
:
int
=
1
,
contrast
:
float
=
1.5
,
sharpness
:
float
=
1.2
)
->
np
.
ndarray
:
"""
Làm đậm chữ trong ảnh RGB:
- kernel_size: kích thước kernel để nở chữ (2 = nhẹ, 3 = mạnh hơn)
- iterations: số lần dilate
- contrast: hệ số tăng tương phản (>=1.0)
- sharpness: hệ số tăng nét (>=1.0)
"""
# RGB -> Gray
gray
=
cv2
.
cvtColor
(
rgb_img
,
cv2
.
COLOR_RGB2GRAY
)
# Nhị phân Otsu
_
,
binary
=
cv2
.
threshold
(
gray
,
0
,
255
,
cv2
.
THRESH_BINARY
+
cv2
.
THRESH_OTSU
)
# Text mask: chữ = 255
threshold
=
0.02
text_ratio
=
estimate_text_ratio
(
gray
,
block_size
=
256
)
print
(
f
"text_ratio={text_ratio:.3f} -> {'Mảnh' if text_ratio < threshold else 'Đậm'}"
)
if
text_ratio
>
threshold
:
return
rgb_img
# Dilation
text_mask
=
255
-
binary
kernel
=
cv2
.
getStructuringElement
(
cv2
.
MORPH_RECT
,
(
kernel_size
,
kernel_size
))
bold_mask
=
cv2
.
dilate
(
text_mask
,
kernel
,
iterations
=
iterations
)
# Overlay lên gray gốc
inv_gray
=
255
-
gray
inv_gray_boost
=
np
.
maximum
(
inv_gray
,
bold_mask
)
out_gray
=
255
-
inv_gray_boost
# Tăng contrast (linear scale)
out_gray
=
cv2
.
convertScaleAbs
(
out_gray
,
alpha
=
contrast
,
beta
=
0
)
# Tăng sharpness bằng unsharp mask
blur
=
cv2
.
GaussianBlur
(
out_gray
,
(
0
,
0
),
0.8
)
out_gray
=
cv2
.
addWeighted
(
out_gray
,
sharpness
,
blur
,
-
0.2
,
0
)
# Trả về RGB cho PaddleOCR
out_rgb
=
cv2
.
cvtColor
(
out_gray
,
cv2
.
COLOR_GRAY2RGB
)
return
out_rgb
preprocessed
=
bolden_text
(
image_np
,
kernel_size
=
3
,
# tăng lên 3 nếu chữ vẫn mảnh
iterations
=
1
,
# tăng lên 2 nếu muốn đậm hơn
contrast
=
1.5
,
# 1.0 = giữ nguyên, 1.5-2.0 = rõ hơn
sharpness
=
1.2
# >1.0 để nét hơn
)
debug_path
=
os
.
path
.
join
(
output_folder
,
f
"{img_base_name}_preprocessed_debug.jpg"
)
cv2
.
imwrite
(
debug_path
,
cv2
.
cvtColor
(
preprocessed
,
cv2
.
COLOR_RGB2BGR
))
print
(
f
"[DEBUG] Preprocessed image saved to: {debug_path}"
)
# Gọi OCR (đảm bảo 3 kênh)
if
preprocessed
.
ndim
==
2
:
preprocessed
=
cv2
.
cvtColor
(
preprocessed
,
cv2
.
COLOR_GRAY2RGB
)
results
=
ocr
.
predict
(
preprocessed
)
# ==== Convert polygon to bbox ====
def
poly_to_bbox
(
poly
):
xs
=
[
p
[
0
]
for
p
in
poly
]
...
...
Please
register
or
sign in
to post a comment