Skip to content

Commit 3f3fb13

Browse files
authored
feat: optimized box sorting (#587)
* optimized box sorting * pr feedback
1 parent d2e9e6b commit 3f3fb13

File tree

1 file changed

+19
-16
lines changed
  • python/rapidocr/ch_ppocr_det

1 file changed

+19
-16
lines changed

python/rapidocr/ch_ppocr_det/main.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323

2424
from .utils import DBPostProcess, DetPreProcess, TextDetOutput
2525

26+
_BOX_SORT_Y_THRESHOLD = 10
27+
_BOX_SORT_LINE_SEPARATION_FACTOR = 1e6
28+
2629

2730
class TextDetector:
2831
def __init__(self, cfg: Dict[str, Any]):
@@ -85,19 +88,19 @@ def sorted_boxes(dt_boxes: np.ndarray) -> np.ndarray:
8588
return:
8689
sorted boxes(array) with shape [4, 2]
8790
"""
88-
num_boxes = dt_boxes.shape[0]
89-
sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
90-
_boxes = list(sorted_boxes)
91-
92-
for i in range(num_boxes - 1):
93-
for j in range(i, -1, -1):
94-
if (
95-
abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10
96-
and _boxes[j + 1][0][0] < _boxes[j][0][0]
97-
):
98-
tmp = _boxes[j]
99-
_boxes[j] = _boxes[j + 1]
100-
_boxes[j + 1] = tmp
101-
else:
102-
break
103-
return np.array(_boxes)
91+
if len(dt_boxes) == 0:
92+
return dt_boxes
93+
94+
# Sort by y, then identify lines, then sort by (line, x)
95+
y_order = np.argsort(dt_boxes[:, 0, 1], kind="stable")
96+
sorted_y = dt_boxes[y_order, 0, 1]
97+
98+
line_ids = np.empty(len(dt_boxes), dtype=np.int32)
99+
line_ids[0] = 0
100+
np.cumsum(np.abs(np.diff(sorted_y)) >= _BOX_SORT_Y_THRESHOLD, out=line_ids[1:])
101+
102+
# Create composite sort key for final ordering
103+
# Shift line_ids by large factor, add x for tie-breaking
104+
sort_key = line_ids[y_order] * _BOX_SORT_LINE_SEPARATION_FACTOR + dt_boxes[y_order, 0, 0]
105+
final_order = np.argsort(sort_key, kind="stable")
106+
return dt_boxes[y_order[final_order]]

0 commit comments

Comments
 (0)