Explorar el Código

多列格子排序方式修改

luojiehua hace 3 años
padre
commit
9371f6f872
Se han modificado 1 ficheros con 81 adiciones y 62 borrados
  1. 81 62
      format_convert/utils.py

+ 81 - 62
format_convert/utils.py

@@ -608,7 +608,7 @@ class LineTable:
         list_crosspoints = []
         # print("lines num",len(list_line))
 
-        def getMaxPoints(list_x,margin=5):
+        def getMaxPoints(list_x,margin=5,reverse=False):
             clust_x = []
             for _x in list_x:
                 _find = False
@@ -619,7 +619,7 @@ class LineTable:
                         break
                 if not _find:
                     clust_x.append([_x])
-            clust_x.sort(key=lambda x:len(x),reverse=True)
+            clust_x.sort(key=lambda x:x,reverse=reverse)
             return clust_x[0][0],len(clust_x[0])
 
         for _i in range(len(list_line)):
@@ -656,80 +656,81 @@ class LineTable:
                 if not _find:
                     break
 
+            list_crosspoints = []
+
             for list_cp in cluster_crosspoints:
                 points = list_cp.get("points")
+
                 l_lines = []
                 for p in points:
                     l_lines.extend(p.get("p_lines"))
                 l_lines = list(set(l_lines))
                 l_lines.sort(key=lambda x:x[0])
 
-                min_x,_count = getMaxPoints([l[0] for l in l_lines])
-                if _count<2:
+                min_x,_count = getMaxPoints([l[0] for l in l_lines],reverse=False)
+                if _count<=2:
                     min_x = None
 
 
-                min_y,_count = getMaxPoints([l[1] for l in l_lines])
+                min_y,_count = getMaxPoints([l[1] for l in l_lines],reverse=False)
                 if _count<2:
                     min_y = None
 
 
-                max_x,_count = getMaxPoints([l[2] for l in l_lines])
-                if _count<2:
+                max_x,_count = getMaxPoints([l[2] for l in l_lines],reverse=True)
+                if _count<=2:
                     max_x = None
 
 
-                max_y,_count = getMaxPoints([l[3] for l in l_lines])
-                if _count<2:
+                max_y,_count = getMaxPoints([l[3] for l in l_lines],reverse=True)
+                if _count<=2:
                     max_y = None
                 if min_x and min_y and max_x and max_y:
 
                     points.sort(key=lambda x:x["point"][0])
-                    if abs(min_x-points[0]["point"][0])>10:
-                        list_line.append(LTLine(1,(min_x,min_y),(min_x,max_y)))
+                    if abs(min_x-points[0]["point"][0])>30:
+                        _line = LTLine(1,(min_x,min_y),(min_x,max_y))
+                        list_line.append(_line)
+                        l_lines.append(_line.bbox)
+                        print("add=====",_line.bbox)
 
 
-                    if abs(max_x-points[-1]["point"][0])>10:
-                        list_line.append(LTLine(1,(max_x,min_y),(max_x,max_y)))
+                    if abs(max_x-points[-1]["point"][0])>30:
+                        _line = LTLine(1,(max_x,min_y),(max_x,max_y))
+                        list_line.append(_line)
+                        l_lines.append(_line.bbox)
+                        print("add=====1",_line.bbox)
 
                     points.sort(key=lambda x:x["point"][1])
-                    if abs(min_y-points[0]["point"][1])>10:
-                        list_line.append(LTLine(1,(min_x,min_y),(max_x,min_y)))
-
-                    if abs(max_y-points[-1]["point"][1])>10:
-                        list_line.append(LTLine(1,(min_x,max_y),(max_x,max_y)))
-
-
-            list_crosspoints = []
-            for _i in range(len(list_line)):
-                for _j in range(len(list_line)):
-                    line1 = list_line[_i].__dict__.get("bbox")
-                    line2 = list_line[_j].__dict__.get("bbox")
-                    exists,point = self.cross_point(line1,line2)
-                    if exists:
-                        list_crosspoints.append(point)
-
-        # plt.figure()
-        # for _line in list_line:
-        #     x0,y0,x1,y1 = _line.__dict__.get("bbox")
-        #     plt.plot([x0,x1],[y0,y1])
-        # for _line in list_line:
-        #     x0,y0,x1,y1 = _line.bbox
-        #     plt.plot([x0,x1],[y0,y1])
-        # for point in list_crosspoints:
-        #     plt.scatter(point.get("point")[0],point.get("point")[1])
-        # plt.show()
-        from matplotlib import pyplot as plt
-        plt.figure()
-        for _line in list_line:
-            x0,y0,x1,y1 = _line.__dict__.get("bbox")
-            plt.plot([x0,x1],[y0,y1])
-        for _line in list_line:
-            x0,y0,x1,y1 = _line.bbox
-            plt.plot([x0,x1],[y0,y1])
-        for point in list_crosspoints:
-            plt.scatter(point.get("point")[0],point.get("point")[1])
-        plt.show()
+                    if abs(min_y-points[0]["point"][1])>30:
+                        _line = LTLine(1,(min_x,min_y),(max_x,min_y))
+                        list_line.append(_line)
+                        l_lines.append(_line.bbox)
+                        print("add=====2",_line.bbox)
+
+                    if abs(max_y-points[-1]["point"][1])>30:
+                        _line = LTLine(1,(min_x,max_y),(max_x,max_y))
+                        list_line.append(_line)
+                        l_lines.append(_line.bbox)
+                        print("add=====2",_line.bbox)
+
+
+
+                for _i in range(len(l_lines)):
+                    for _j in range(len(l_lines)):
+                        line1 = l_lines[_i]
+                        line2 = l_lines[_j]
+                        exists,point = self.cross_point(line1,line2)
+                        if exists:
+                            list_crosspoints.append(point)
+                from matplotlib import pyplot as plt
+                plt.figure()
+                for _line in l_lines:
+                    x0,y0,x1,y1 = _line
+                    plt.plot([x0,x1],[y0,y1])
+                for point in list_crosspoints:
+                    plt.scatter(point.get("point")[0],point.get("point")[1])
+                plt.show()
 
         # print(list_crosspoints)
         # print("points num",len(list_crosspoints))
@@ -1029,17 +1030,31 @@ class LineTable:
 
         clusters_rects = []
         # 根据y1聚类
-        list_rect.sort(key=lambda x:x.bbox[3])
-        for _rect in list_rect:
-            _y0 = _rect.bbox[3]
-            _find = False
-            for l_cr in clusters_rects:
-                if abs(l_cr[0].bbox[3]-_y0)<margin:
-                    _find = True
-                    l_cr.append(_rect)
-                    break
-            if not _find:
-                clusters_rects.append([_rect])
+        if sourceP_LB:
+            list_rect.sort(key=lambda x:x.bbox[3])
+            for _rect in list_rect:
+                _y0 = _rect.bbox[3]
+                _find = False
+                for l_cr in clusters_rects:
+                    if abs(l_cr[0].bbox[3]-_y0)<margin:
+                        _find = True
+                        l_cr.append(_rect)
+                        break
+                if not _find:
+                    clusters_rects.append([_rect])
+        else:
+            list_rect.sort(key=lambda x:x.bbox[1])
+            for _rect in list_rect:
+                _y0 = _rect.bbox[1]
+                _find = False
+                for l_cr in clusters_rects:
+                    if abs(l_cr[0].bbox[1]-_y0)<margin:
+                        _find = True
+                        l_cr.append(_rect)
+                        break
+                if not _find:
+                    clusters_rects.append([_rect])
+
 
 
 
@@ -1062,7 +1077,11 @@ class LineTable:
         list_y.sort(key=lambda x:x,reverse=sourceP_LB)
 
         print("clusters_rects", len(clusters_rects))
-        clusters_rects.sort(key=lambda x:x[0].bbox[3],reverse=sourceP_LB)
+        if sourceP_LB:
+            clusters_rects.sort(key=lambda x:x[0].bbox[3],reverse=sourceP_LB)
+        else:
+            clusters_rects.sort(key=lambda x:x[0].bbox[1],reverse=sourceP_LB)
+
         for l_cr in clusters_rects:
             l_cr.sort(key=lambda x:x.bbox[0])