Przeglądaj źródła

新增无边框表格识别模块botr

fangjiasheng 2 lat temu
rodzic
commit
a660012f46

+ 266 - 0
botr/extract_table.py

@@ -0,0 +1,266 @@
+import time
+import traceback
+import cv2
+from pdfminer.layout import LTLine
+# from botr.nsp.predict import nsp_predict
+from botr.rules.get_table_by_rules import get_table_by_rule
+from botr.utils import line_iou, get_table_iou
+from format_convert.convert_need_interface import from_yolo_interface
+from format_convert.utils import log, np2bytes
+
+
+def b_table_process(list_line, list_text_boxes, list_cell, table_location):
+    def merge_textbox(textbox_list, in_objs):
+        delete_obj = []
+        threshold = 5
+        textbox_list.sort(key=lambda x:x.bbox[0])
+        for k in range(len(textbox_list)):
+            tb1 = textbox_list[k]
+            if tb1 not in in_objs and tb1 not in delete_obj:
+                for m in range(k+1, len(textbox_list)):
+                    tb2 = textbox_list[m]
+                    if tb2 in in_objs:
+                        continue
+                    if abs(tb1.bbox[1]-tb2.bbox[1]) <= threshold \
+                            and abs(tb1.bbox[3]-tb2.bbox[3]) <= threshold:
+                        if tb1.bbox[0] <= tb2.bbox[0]:
+                            tb1.text = tb1.text + tb2.text
+                        else:
+                            tb1.text = tb2.text + tb1.text
+                        tb1.bbox[0] = min(tb1.bbox[0], tb2.bbox[0])
+                        tb1.bbox[2] = max(tb1.bbox[2], tb2.bbox[2])
+                        delete_obj.append(tb2)
+        for _obj in delete_obj:
+            if _obj in textbox_list:
+                textbox_list.remove(_obj)
+        return textbox_list
+    try:
+        if list_line:
+            from format_convert.convert_tree import TableLine
+            list_lines = []
+            for line in list_line:
+                list_lines.append(LTLine(1, (line[0], line[1]), (line[2], line[3])))
+
+            # 先拿出在表格区域里的TextBox
+            area_list_text_boxes = []
+            threshold = 7
+            for t_b in list_text_boxes:
+                bbox = t_b.bbox
+                if table_location[1] - threshold <= bbox[1] <= bbox[3] <= table_location[3] + threshold:
+                    area_list_text_boxes.append(t_b)
+
+            # 对TextBox进行分行,否则同样一行有些框偏上有些偏下,影响文本顺序
+            area_list_text_boxes.sort(key=lambda x: (x.bbox[1], x.bbox[0], x.bbox[3], x.bbox[2]))
+            current_y = area_list_text_boxes[0].bbox[1]
+            threshold = 2.
+            for t_b in area_list_text_boxes:
+                bbox = t_b.bbox
+                if current_y - threshold <= bbox[1] <= current_y + threshold:
+                    t_b.bbox[1] = current_y
+                else:
+                    current_y = bbox[1]
+            area_list_text_boxes.sort(key=lambda x: (x.bbox[1], x.bbox[0], x.bbox[3], x.bbox[2]))
+
+            # list_cell 转化为 LineTable形式
+            tables = []
+            obj_in_table = []
+            table_dict = {'bbox': table_location}
+            row_list = []
+            for row in list_cell:
+                col_list = []
+                for col in row:
+                    col_dict = {'bbox': (col[0][0], col[0][1], col[1][0], col[1][1]),
+                                'rowspan': 1, 'columnspan': 1, 'text': ''}
+                    for t_b in area_list_text_boxes:
+                        if t_b in obj_in_table:
+                            continue
+                        text = t_b.text
+                        bbox = t_b.bbox
+                        iou = get_table_iou(col[0][0], col[0][1], col[1][0], col[1][1],
+                                      bbox[0], bbox[1], bbox[2], bbox[3])
+                        if iou >= 0.3:
+                            col_dict['text'] += text
+                            obj_in_table.append(t_b)
+                    col_list.append(col_dict)
+                row_list.append(col_list)
+            table_dict['table'] = row_list
+            tables.append(table_dict)
+
+            # print('b_table_process tables', tables)
+
+            # 合并同一行textbox
+            # list_text_boxes = merge_textbox(list_text_boxes, obj_in_table)
+            return list_text_boxes, tables, obj_in_table
+        else:
+            return list_text_boxes, [], set()
+    except:
+        traceback.print_exc()
+        return [-8], [-8], [-8]
+
+
+def get_table(img, table_list, text_list, bbox_list, text_box_list, show=0):
+    log('start')
+    # 检测无边框表格
+    start_time_all = time.time()
+    start_time = time.time()
+    img_bytes = np2bytes(img)
+    b_table_list = from_yolo_interface(img_bytes)
+    log('yolo detect cost: ' + str(time.time()-start_time))
+    b_table_list = b_table_list[0]
+    if not b_table_list:
+        log('detect not b_table_list')
+        return [], [], []
+
+    if show:
+        for b_table in b_table_list:
+            # for line in b_table:
+            cv2.rectangle(img, (int(b_table[0]), int(b_table[1])), (int(b_table[2]), int(b_table[3])),
+                     (0, 0, 255))
+        cv2.imshow('b_table', img)
+        cv2.waitKey(0)
+
+    if show:
+        print('b_table_list', b_table_list)
+
+    # 排除otr结果
+    b_table_location_list = []
+    for b_table in b_table_list:
+        # print('b_table', b_table)
+        min_x, min_y = 1000000, 1000000
+        max_x, max_y = 0, 0
+        # for line in b_table:
+        if b_table[1] < min_y:
+            min_y = b_table[1]
+        if b_table[3] > max_y:
+            max_y = b_table[3]
+        if b_table[0] < min_x:
+            min_x = b_table[0]
+        if b_table[2] > max_x:
+            max_x = b_table[2]
+        b_loc = [min_x, min_y, max_x, max_y]
+        inter_flag = False
+        for table in table_list:
+            loc = table.get('bbox')
+            rows = table.get('table')
+            if line_iou([[0, loc[1]], [0, loc[3]]], [[0, b_loc[1]], [0, b_loc[3]]], axis=1) > 0.3:
+                if len(rows) <= 1:
+                    if loc[1] < b_loc[1] < loc[3] < b_loc[3]:
+                        b_loc[1] = loc[3]
+                    if b_loc[1] < loc[1] < b_loc[3] < loc[3]:
+                        b_loc[3] = loc[1]
+                    continue
+                inter_flag = True
+                # cv2.rectangle(img, [int(loc[0]), int(loc[1])], [int(loc[2]), int(loc[3])], (0, 0, 255))
+                # cv2.rectangle(img, [int(b_loc[0]), int(b_loc[1])], [int(b_loc[2]), int(b_loc[3])], (0, 0, 255))
+                # cv2.imshow('inter', img)
+                # cv2.waitKey(0)
+                break
+        if not inter_flag:
+            b_table_location_list.append(b_loc)
+    if not b_table_location_list:
+        log('not b_table_location_list')
+        return [], [], []
+
+    if show:
+        for b_loc in b_table_location_list:
+            cv2.rectangle(img, (int(b_loc[0]), int(b_loc[1])), (int(b_loc[2]), int(b_loc[3])),
+                          (0, 0, 255))
+        cv2.imshow('b_table no otr', img)
+        cv2.waitKey(0)
+
+    table_list = []
+    obj_in_table_list = []
+    # print('len(b_table_location_list)', len(b_table_location_list))
+    for b_loc in b_table_location_list:
+        area_text_list = []
+        area_bbox_list = []
+        threshold = 5
+        for i, bbox in enumerate(bbox_list):
+            if b_loc[1] - threshold <= bbox[0][1] <= bbox[2][1] <= b_loc[3] + threshold:
+                area_bbox_list.append(bbox)
+                area_text_list.append(text_list[i])
+
+        # 根据ocr bbox,规则生成表格线
+        start_time = time.time()
+        line_list, cell_list, table_location = get_table_by_rule(img, area_text_list, area_bbox_list, b_loc)
+        if not table_location:
+            log('get_table_by_rule not table_location')
+            continue
+        log('get_table_by_rule cost: ' + str(time.time()-start_time))
+
+        # 根据表格线生成单元格
+        start_time = time.time()
+        text_box_list, _table_list, _obj_in_table_list = b_table_process(line_list, text_box_list, cell_list, table_location)
+        table_list += _table_list
+        obj_in_table_list += _obj_in_table_list
+        log('b_table_process cost: ' + str(time.time()-start_time))
+
+        # if not table_list:
+        #     log('table_process not table_list')
+        #     return [], [], []
+
+        if not _table_list:
+            log('table_process not table_list')
+            continue
+
+        # 单元格合并,nsp模型
+        # 使用hanlp分词,判断上下句是否该合并 顺便拉数据统计
+        # 1. 上下句ab,ab相连得到c
+        # 2.1 c分词,若ab相连处合为一个词语,则ab相连
+        # 2.2 ab相连处不为一个词语,a, b分别分词
+        # 2.2.1 若b的第一个词,从其中分第一个字给a,然后
+        # near_col_list = []
+        # table = _table_list[0].get('table')
+        # col_cnt = len(table[0])
+        # for c_cnt in range(col_cnt):
+        #     for i in range(len(table)-1):
+        #         t = table[i][c_cnt].get('text')
+        #         next_t = table[i+1][c_cnt].get('text')
+        #         if t and next_t:
+        #             near_col_list.append([t, next_t])
+        #         elif t and next_t == '':
+        #             if i+2 <= len(table)-1:
+        #                 next_2_t = table[i+2][c_cnt].get('text')
+        #                 near_col_list.append([t, next_2_t])
+        #
+        # is_next_list = nsp_predict(near_col_list, has_label=False)
+        #
+        # next_index = 0
+        # for c_cnt in range(col_cnt):
+        #     # 先把一列里的需合并的打上标签
+        #     for i in range(len(table)-1):
+        #         t = table[i][c_cnt].get('text')
+        #         next_t = table[i+1][c_cnt].get('text')
+        #         if t and next_t:
+        #             table[i+1][c_cnt]['is_next'] = is_next_list[next_index]
+        #             next_index += 1
+        #         elif t and next_t == '':
+        #             if i+2 <= len(table)-1:
+        #                 table[i+1][c_cnt]['is_next'] = is_next_list[next_index]
+        #                 table[i+2][c_cnt]['is_next'] = is_next_list[next_index]
+        #                 next_index += 1
+        #
+        #     first_col = None
+        #     for i in range(len(table)):
+        #         if table[i][c_cnt].get('is_next'):
+        #             if first_col is None:
+        #                 first_col = table[i-1][c_cnt]
+        #             first_col['text'] += table[i][c_cnt].get('text')
+        #             first_col['rowspan'] += 1
+        #         else:
+        #             first_col = None
+        #
+        # # 删除标签为True的
+        # new_table = []
+        # for row in table:
+        #     new_row = []
+        #     for col in row:
+        #         if col.get('is_next'):
+        #             continue
+        #         new_row.append(col)
+        #     new_table.append(new_row)
+        #
+        # _table_list[0]['table'] = new_table
+
+    log('get_table finish ' + str(time.time() - start_time_all))
+    return text_box_list, table_list, obj_in_table_list

+ 36 - 0
botr/nsp/model.py

@@ -0,0 +1,36 @@
+from typing import Optional, List
+import torch
+import torchsnooper
+import torch.nn as nn
+from transformers import ElectraPreTrainedModel, ElectraModel
+from transformers.modeling_outputs import NextSentencePredictorOutput, BaseModelOutputWithPoolingAndCrossAttentions
+from transformers.models.bert.modeling_bert import BertOnlyNSPHead, BertPooler
+
+
+class ElectraNSPModel(ElectraPreTrainedModel):
+    def __init__(self, config):
+        super(ElectraNSPModel, self).__init__(config)
+        self.electra = ElectraModel(config)
+        self.cls = BertOnlyNSPHead(config)
+        self.pooler = BertPooler(config)
+        self.init_weights()
+
+    # @torchsnooper.snoop()
+    def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None):
+        outputs = self.electra(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
+        last_output = outputs[0]
+        pooled_output = self.pooler(last_output)
+        seq_relationship_scores = self.cls(pooled_output)
+
+        next_sentence_loss = None
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss()
+            next_sentence_loss = loss_fct(seq_relationship_scores.view(-1, 2), labels.view(-1))
+            # next_sentence_loss = loss_fct(seq_relationship_scores.view(-1, 2), labels)
+
+        return NextSentencePredictorOutput(
+            loss=next_sentence_loss,
+            logits=seq_relationship_scores,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )

+ 32 - 0
botr/nsp/model/config.json

@@ -0,0 +1,32 @@
+{
+  "_name_or_path": "/data2/fangjiasheng/borderless-table-detect/torch_version/next_sentence_prediction/../../torch_version/models/electra/",
+  "_num_labels": 2,
+  "architectures": [
+    "ElectraNSPModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "embedding_size": 128,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 256,
+  "initializer_range": 0.02,
+  "intermediate_size": 1024,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "electra",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "summary_activation": "gelu",
+  "summary_last_dropout": 0.1,
+  "summary_type": "first",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.30.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

BIN
botr/nsp/model/pytorch_model.bin


BIN
botr/nsp/model/scheduler.pt


+ 21128 - 0
botr/nsp/model/vocab.txt

@@ -0,0 +1,21128 @@
+[PAD]
+[unused1]
+[unused2]
+[unused3]
+[unused4]
+[unused5]
+[unused6]
+[unused7]
+[unused8]
+[unused9]
+[unused10]
+[unused11]
+[unused12]
+[unused13]
+[unused14]
+[unused15]
+[unused16]
+[unused17]
+[unused18]
+[unused19]
+[unused20]
+[unused21]
+[unused22]
+[unused23]
+[unused24]
+[unused25]
+[unused26]
+[unused27]
+[unused28]
+[unused29]
+[unused30]
+[unused31]
+[unused32]
+[unused33]
+[unused34]
+[unused35]
+[unused36]
+[unused37]
+[unused38]
+[unused39]
+[unused40]
+[unused41]
+[unused42]
+[unused43]
+[unused44]
+[unused45]
+[unused46]
+[unused47]
+[unused48]
+[unused49]
+[unused50]
+[unused51]
+[unused52]
+[unused53]
+[unused54]
+[unused55]
+[unused56]
+[unused57]
+[unused58]
+[unused59]
+[unused60]
+[unused61]
+[unused62]
+[unused63]
+[unused64]
+[unused65]
+[unused66]
+[unused67]
+[unused68]
+[unused69]
+[unused70]
+[unused71]
+[unused72]
+[unused73]
+[unused74]
+[unused75]
+[unused76]
+[unused77]
+[unused78]
+[unused79]
+[unused80]
+[unused81]
+[unused82]
+[unused83]
+[unused84]
+[unused85]
+[unused86]
+[unused87]
+[unused88]
+[unused89]
+[unused90]
+[unused91]
+[unused92]
+[unused93]
+[unused94]
+[unused95]
+[unused96]
+[unused97]
+[unused98]
+[unused99]
+[UNK]
+[CLS]
+[SEP]
+[MASK]
+<S>
+<T>
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+^
+_
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+ο
+п
+ก
+ง
+น
+ม
+ย
+ร
+อ
+า
+เ
+๑
+་
+ღ
+ᄀ
+ᄁ
+ᄂ
+ᄃ
+ᄅ
+ᄆ
+ᄇ
+ᄈ
+ᄉ
+ᄋ
+ᄌ
+ᄎ
+ᄏ
+ᄐ
+ᄑ
+ᄒ
+ᅡ
+ᅢ
+ᅣ
+ᅥ
+ᅦ
+ᅧ
+ᅨ
+ᅩ
+ᅪ
+ᅬ
+ᅭ
+ᅮ
+ᅯ
+ᅲ
+ᅳ
+ᅴ
+ᅵ
+ᆨ
+ᆫ
+ᆯ
+ᆷ
+ᆸ
+ᆺ
+ᆻ
+ᆼ
+ᗜ
+ᵃ
+ᵉ
+ᵍ
+ᵏ
+ᵐ
+ᵒ
+ᵘ
+‖
+„
+†
+•
+‥
+‧
+

+‰
+′
+″
+‹
+›
+※
+‿
+⁄
+ⁱ
+⁺
+ⁿ
+₁
+₂
+₃
+₄
+€
+℃
+№
+™
+ⅰ
+ⅱ
+ⅲ
+ⅳ
+ⅴ
+←
+↑
+→
+↓
+↔
+↗
+↘
+⇒
+∀
+−
+∕
+∙
+√
+∞
+∟
+∠
+∣
+∥
+∩
+∮
+∶
+∼
+∽
+≈
+≒
+≡
+≤
+≥
+≦
+≧
+≪
+≫
+⊙
+⋅
+⋈
+⋯
+⌒
+①
+②
+③
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+⑴
+⑵
+⑶
+⑷
+⑸
+⒈
+⒉
+⒊
+⒋
+ⓒ
+ⓔ
+ⓘ
+─
+━
+│
+┃
+┅
+┆
+┊
+┌
+└
+├
+┣
+═
+║
+╚
+╞
+╠
+╭
+╮
+╯
+╰
+╱
+╳
+▂
+▃
+▅
+▇
+█
+▉
+▋
+▌
+▍
+▎
+■
+□
+▪
+▫
+▬
+▲
+△
+▶
+►
+▼
+▽
+◆
+◇
+○
+◎
+●
+◕
+◠
+◢
+◤
+☀
+★
+☆
+☕
+☞
+☺
+☼
+♀
+♂
+♠
+♡
+♣
+♥
+♦
+♪
+♫
+♬
+✈
+✔
+✕
+✖
+✦
+✨
+✪
+✰
+✿
+❀
+❤
+➜
+➤
+⦿
+、
+。
+〃
+々
+〇
+〈
+〉
+《
+》
+「
+」
+『
+』
+【
+】
+〓
+〔
+〕
+〖
+〗
+〜
+〝
+〞
+ぁ
+あ
+ぃ
+い
+う
+ぇ
+え
+お
+か
+き
+く
+け
+こ
+さ
+し
+す
+せ
+そ
+た
+ち
+っ
+つ
+て
+と
+な
+に
+ぬ
+ね
+の
+は
+ひ
+ふ
+へ
+ほ
+ま
+み
+む
+め
+も
+ゃ
+や
+ゅ
+ゆ
+ょ
+よ
+ら
+り
+る
+れ
+ろ
+わ
+を
+ん
+゜
+ゝ
+ァ
+ア
+ィ
+イ
+ゥ
+ウ
+ェ
+エ
+ォ
+オ
+カ
+キ
+ク
+ケ
+コ
+サ
+シ
+ス
+セ
+ソ
+タ
+チ
+ッ
+ツ
+テ
+ト
+ナ
+ニ
+ヌ
+ネ
+ノ
+ハ
+ヒ
+フ
+ヘ
+ホ
+マ
+ミ
+ム
+メ
+モ
+ャ
+ヤ
+ュ
+ユ
+ョ
+ヨ
+ラ
+リ
+ル
+レ
+ロ
+ワ
+ヲ
+ン
+ヶ
+・
+ー
+ヽ
+ㄅ
+ㄆ
+ㄇ
+ㄉ
+ㄋ
+ㄌ
+ㄍ
+ㄎ
+ㄏ
+ㄒ
+ㄚ
+ㄛ
+ㄞ
+ㄟ
+ㄢ
+ㄤ
+ㄥ
+ㄧ
+ㄨ
+ㆍ
+㈦
+㊣
+㎡
+㗎
+一
+丁
+七
+万
+丈
+三
+上
+下
+不
+与
+丐
+丑
+专
+且
+丕
+世
+丘
+丙
+业
+丛
+东
+丝
+丞
+丟
+両
+丢
+两
+严
+並
+丧
+丨
+个
+丫
+中
+丰
+串
+临
+丶
+丸
+丹
+为
+主
+丼
+丽
+举
+丿
+乂
+乃
+久
+么
+义
+之
+乌
+乍
+乎
+乏
+乐
+乒
+乓
+乔
+乖
+乗
+乘
+乙
+乜
+九
+乞
+也
+习
+乡
+书
+乩
+买
+乱
+乳
+乾
+亀
+亂
+了
+予
+争
+事
+二
+于
+亏
+云
+互
+五
+井
+亘
+亙
+亚
+些
+亜
+亞
+亟
+亡
+亢
+交
+亥
+亦
+产
+亨
+亩
+享
+京
+亭
+亮
+亲
+亳
+亵
+人
+亿
+什
+仁
+仃
+仄
+仅
+仆
+仇
+今
+介
+仍
+从
+仏
+仑
+仓
+仔
+仕
+他
+仗
+付
+仙
+仝
+仞
+仟
+代
+令
+以
+仨
+仪
+们
+仮
+仰
+仲
+件
+价
+任
+份
+仿
+企
+伉
+伊
+伍
+伎
+伏
+伐
+休
+伕
+众
+优
+伙
+会
+伝
+伞
+伟
+传
+伢
+伤
+伦
+伪
+伫
+伯
+估
+伴
+伶
+伸
+伺
+似
+伽
+佃
+但
+佇
+佈
+位
+低
+住
+佐
+佑
+体
+佔
+何
+佗
+佘
+余
+佚
+佛
+作
+佝
+佞
+佟
+你
+佢
+佣
+佤
+佥
+佩
+佬
+佯
+佰
+佳
+併
+佶
+佻
+佼
+使
+侃
+侄
+來
+侈
+例
+侍
+侏
+侑
+侖
+侗
+供
+依
+侠
+価
+侣
+侥
+侦
+侧
+侨
+侬
+侮
+侯
+侵
+侶
+侷
+便
+係
+促
+俄
+俊
+俎
+俏
+俐
+俑
+俗
+俘
+俚
+保
+俞
+俟
+俠
+信
+俨
+俩
+俪
+俬
+俭
+修
+俯
+俱
+俳
+俸
+俺
+俾
+倆
+倉
+個
+倌
+倍
+倏
+們
+倒
+倔
+倖
+倘
+候
+倚
+倜
+借
+倡
+値
+倦
+倩
+倪
+倫
+倬
+倭
+倶
+债
+值
+倾
+偃
+假
+偈
+偉
+偌
+偎
+偏
+偕
+做
+停
+健
+側
+偵
+偶
+偷
+偻
+偽
+偿
+傀
+傅
+傍
+傑
+傘
+備
+傚
+傢
+傣
+傥
+储
+傩
+催
+傭
+傲
+傳
+債
+傷
+傻
+傾
+僅
+働
+像
+僑
+僕
+僖
+僚
+僥
+僧
+僭
+僮
+僱
+僵
+價
+僻
+儀
+儂
+億
+儆
+儉
+儋
+儒
+儕
+儘
+償
+儡
+優
+儲
+儷
+儼
+儿
+兀
+允
+元
+兄
+充
+兆
+兇
+先
+光
+克
+兌
+免
+児
+兑
+兒
+兔
+兖
+党
+兜
+兢
+入
+內
+全
+兩
+八
+公
+六
+兮
+兰
+共
+兲
+关
+兴
+兵
+其
+具
+典
+兹
+养
+兼
+兽
+冀
+内
+円
+冇
+冈
+冉
+冊
+册
+再
+冏
+冒
+冕
+冗
+写
+军
+农
+冠
+冢
+冤
+冥
+冨
+冪
+冬
+冯
+冰
+冲
+决
+况
+冶
+冷
+冻
+冼
+冽
+冾
+净
+凄
+准
+凇
+凈
+凉
+凋
+凌
+凍
+减
+凑
+凛
+凜
+凝
+几
+凡
+凤
+処
+凪
+凭
+凯
+凰
+凱
+凳
+凶
+凸
+凹
+出
+击
+函
+凿
+刀
+刁
+刃
+分
+切
+刈
+刊
+刍
+刎
+刑
+划
+列
+刘
+则
+刚
+创
+初
+删
+判
+別
+刨
+利
+刪
+别
+刮
+到
+制
+刷
+券
+刹
+刺
+刻
+刽
+剁
+剂
+剃
+則
+剉
+削
+剋
+剌
+前
+剎
+剐
+剑
+剔
+剖
+剛
+剜
+剝
+剣
+剤
+剥
+剧
+剩
+剪
+副
+割
+創
+剷
+剽
+剿
+劃
+劇
+劈
+劉
+劊
+劍
+劏
+劑
+力
+劝
+办
+功
+加
+务
+劣
+动
+助
+努
+劫
+劭
+励
+劲
+劳
+労
+劵
+効
+劾
+势
+勁
+勃
+勇
+勉
+勋
+勐
+勒
+動
+勖
+勘
+務
+勛
+勝
+勞
+募
+勢
+勤
+勧
+勳
+勵
+勸
+勺
+勻
+勾
+勿
+匀
+包
+匆
+匈
+匍
+匐
+匕
+化
+北
+匙
+匝
+匠
+匡
+匣
+匪
+匮
+匯
+匱
+匹
+区
+医
+匾
+匿
+區
+十
+千
+卅
+升
+午
+卉
+半
+卍
+华
+协
+卑
+卒
+卓
+協
+单
+卖
+南
+単
+博
+卜
+卞
+卟
+占
+卡
+卢
+卤
+卦
+卧
+卫
+卮
+卯
+印
+危
+即
+却
+卵
+卷
+卸
+卻
+卿
+厂
+厄
+厅
+历
+厉
+压
+厌
+厕
+厘
+厚
+厝
+原
+厢
+厥
+厦
+厨
+厩
+厭
+厮
+厲
+厳
+去
+县
+叁
+参
+參
+又
+叉
+及
+友
+双
+反
+収
+发
+叔
+取
+受
+变
+叙
+叛
+叟
+叠
+叡
+叢
+口
+古
+句
+另
+叨
+叩
+只
+叫
+召
+叭
+叮
+可
+台
+叱
+史
+右
+叵
+叶
+号
+司
+叹
+叻
+叼
+叽
+吁
+吃
+各
+吆
+合
+吉
+吊
+吋
+同
+名
+后
+吏
+吐
+向
+吒
+吓
+吕
+吖
+吗
+君
+吝
+吞
+吟
+吠
+吡
+否
+吧
+吨
+吩
+含
+听
+吭
+吮
+启
+吱
+吳
+吴
+吵
+吶
+吸
+吹
+吻
+吼
+吽
+吾
+呀
+呂
+呃
+呆
+呈
+告
+呋
+呎
+呐
+呓
+呕
+呗
+员
+呛
+呜
+呢
+呤
+呦
+周
+呱
+呲
+味
+呵
+呷
+呸
+呻
+呼
+命
+咀
+咁
+咂
+咄
+咆
+咋
+和
+咎
+咏
+咐
+咒
+咔
+咕
+咖
+咗
+咘
+咙
+咚
+咛
+咣
+咤
+咦
+咧
+咨
+咩
+咪
+咫
+咬
+咭
+咯
+咱
+咲
+咳
+咸
+咻
+咽
+咿
+哀
+品
+哂
+哄
+哆
+哇
+哈
+哉
+哋
+哌
+响
+哎
+哏
+哐
+哑
+哒
+哔
+哗
+哟
+員
+哥
+哦
+哧
+哨
+哩
+哪
+哭
+哮
+哲
+哺
+哼
+哽
+唁
+唄
+唆
+唇
+唉
+唏
+唐
+唑
+唔
+唠
+唤
+唧
+唬
+售
+唯
+唰
+唱
+唳
+唷
+唸
+唾
+啃
+啄
+商
+啉
+啊
+問
+啓
+啕
+啖
+啜
+啞
+啟
+啡
+啤
+啥
+啦
+啧
+啪
+啫
+啬
+啮
+啰
+啱
+啲
+啵
+啶
+啷
+啸
+啻
+啼
+啾
+喀
+喂
+喃
+善
+喆
+喇
+喉
+喊
+喋
+喎
+喏
+喔
+喘
+喙
+喚
+喜
+喝
+喟
+喧
+喪
+喫
+喬
+單
+喰
+喱
+喲
+喳
+喵
+営
+喷
+喹
+喺
+喻
+喽
+嗅
+嗆
+嗇
+嗎
+嗑
+嗒
+嗓
+嗔
+嗖
+嗚
+嗜
+嗝
+嗟
+嗡
+嗣
+嗤
+嗦
+嗨
+嗪
+嗬
+嗯
+嗰
+嗲
+嗳
+嗶
+嗷
+嗽
+嘀
+嘅
+嘆
+嘈
+嘉
+嘌
+嘍
+嘎
+嘔
+嘖
+嘗
+嘘
+嘚
+嘛
+嘜
+嘞
+嘟
+嘢
+嘣
+嘤
+嘧
+嘩
+嘭
+嘮
+嘯
+嘰
+嘱
+嘲
+嘴
+嘶
+嘸
+嘹
+嘻
+嘿
+噁
+噌
+噎
+噓
+噔
+噗
+噙
+噜
+噠
+噢
+噤
+器
+噩
+噪
+噬
+噱
+噴
+噶
+噸
+噹
+噻
+噼
+嚀
+嚇
+嚎
+嚏
+嚐
+嚓
+嚕
+嚟
+嚣
+嚥
+嚨
+嚮
+嚴
+嚷
+嚼
+囂
+囉
+囊
+囍
+囑
+囔
+囗
+囚
+四
+囝
+回
+囟
+因
+囡
+团
+団
+囤
+囧
+囪
+囫
+园
+困
+囱
+囲
+図
+围
+囹
+固
+国
+图
+囿
+圃
+圄
+圆
+圈
+國
+圍
+圏
+園
+圓
+圖
+團
+圜
+土
+圣
+圧
+在
+圩
+圭
+地
+圳
+场
+圻
+圾
+址
+坂
+均
+坊
+坍
+坎
+坏
+坐
+坑
+块
+坚
+坛
+坝
+坞
+坟
+坠
+坡
+坤
+坦
+坨
+坪
+坯
+坳
+坵
+坷
+垂
+垃
+垄
+型
+垒
+垚
+垛
+垠
+垢
+垣
+垦
+垩
+垫
+垭
+垮
+垵
+埂
+埃
+埋
+城
+埔
+埕
+埗
+域
+埠
+埤
+埵
+執
+埸
+培
+基
+埼
+堀
+堂
+堃
+堅
+堆
+堇
+堑
+堕
+堙
+堡
+堤
+堪
+堯
+堰
+報
+場
+堵
+堺
+堿
+塊
+塌
+塑
+塔
+塗
+塘
+塚
+塞
+塢
+塩
+填
+塬
+塭
+塵
+塾
+墀
+境
+墅
+墉
+墊
+墒
+墓
+増
+墘
+墙
+墜
+增
+墟
+墨
+墩
+墮
+墳
+墻
+墾
+壁
+壅
+壆
+壇
+壊
+壑
+壓
+壕
+壘
+壞
+壟
+壢
+壤
+壩
+士
+壬
+壮
+壯
+声
+売
+壳
+壶
+壹
+壺
+壽
+处
+备
+変
+复
+夏
+夔
+夕
+外
+夙
+多
+夜
+够
+夠
+夢
+夥
+大
+天
+太
+夫
+夭
+央
+夯
+失
+头
+夷
+夸
+夹
+夺
+夾
+奂
+奄
+奇
+奈
+奉
+奋
+奎
+奏
+奐
+契
+奔
+奕
+奖
+套
+奘
+奚
+奠
+奢
+奥
+奧
+奪
+奬
+奮
+女
+奴
+奶
+奸
+她
+好
+如
+妃
+妄
+妆
+妇
+妈
+妊
+妍
+妒
+妓
+妖
+妘
+妙
+妝
+妞
+妣
+妤
+妥
+妨
+妩
+妪
+妮
+妲
+妳
+妹
+妻
+妾
+姆
+姉
+姊
+始
+姍
+姐
+姑
+姒
+姓
+委
+姗
+姚
+姜
+姝
+姣
+姥
+姦
+姨
+姪
+姫
+姬
+姹
+姻
+姿
+威
+娃
+娄
+娅
+娆
+娇
+娉
+娑
+娓
+娘
+娛
+娜
+娟
+娠
+娣
+娥
+娩
+娱
+娲
+娴
+娶
+娼
+婀
+婁
+婆
+婉
+婊
+婕
+婚
+婢
+婦
+婧
+婪
+婭
+婴
+婵
+婶
+婷
+婺
+婿
+媒
+媚
+媛
+媞
+媧
+媲
+媳
+媽
+媾
+嫁
+嫂
+嫉
+嫌
+嫑
+嫔
+嫖
+嫘
+嫚
+嫡
+嫣
+嫦
+嫩
+嫲
+嫵
+嫻
+嬅
+嬉
+嬌
+嬗
+嬛
+嬢
+嬤
+嬪
+嬰
+嬴
+嬷
+嬸
+嬿
+孀
+孃
+子
+孑
+孔
+孕
+孖
+字
+存
+孙
+孚
+孛
+孜
+孝
+孟
+孢
+季
+孤
+学
+孩
+孪
+孫
+孬
+孰
+孱
+孳
+孵
+學
+孺
+孽
+孿
+宁
+它
+宅
+宇
+守
+安
+宋
+完
+宏
+宓
+宕
+宗
+官
+宙
+定
+宛
+宜
+宝
+实
+実
+宠
+审
+客
+宣
+室
+宥
+宦
+宪
+宫
+宮
+宰
+害
+宴
+宵
+家
+宸
+容
+宽
+宾
+宿
+寂
+寄
+寅
+密
+寇
+富
+寐
+寒
+寓
+寛
+寝
+寞
+察
+寡
+寢
+寥
+實
+寧
+寨
+審
+寫
+寬
+寮
+寰
+寵
+寶
+寸
+对
+寺
+寻
+导
+対
+寿
+封
+専
+射
+将
+將
+專
+尉
+尊
+尋
+對
+導
+小
+少
+尔
+尕
+尖
+尘
+尚
+尝
+尤
+尧
+尬
+就
+尴
+尷
+尸
+尹
+尺
+尻
+尼
+尽
+尾
+尿
+局
+屁
+层
+屄
+居
+屆
+屈
+屉
+届
+屋
+屌
+屍
+屎
+屏
+屐
+屑
+展
+屜
+属
+屠
+屡
+屢
+層
+履
+屬
+屯
+山
+屹
+屿
+岀
+岁
+岂
+岌
+岐
+岑
+岔
+岖
+岗
+岘
+岙
+岚
+岛
+岡
+岩
+岫
+岬
+岭
+岱
+岳
+岷
+岸
+峇
+峋
+峒
+峙
+峡
+峤
+峥
+峦
+峨
+峪
+峭
+峯
+峰
+峴
+島
+峻
+峽
+崁
+崂
+崆
+崇
+崎
+崑
+崔
+崖
+崗
+崙
+崛
+崧
+崩
+崭
+崴
+崽
+嵇
+嵊
+嵋
+嵌
+嵐
+嵘
+嵩
+嵬
+嵯
+嶂
+嶄
+嶇
+嶋
+嶙
+嶺
+嶼
+嶽
+巅
+巍
+巒
+巔
+巖
+川
+州
+巡
+巢
+工
+左
+巧
+巨
+巩
+巫
+差
+己
+已
+巳
+巴
+巷
+巻
+巽
+巾
+巿
+币
+市
+布
+帅
+帆
+师
+希
+帐
+帑
+帕
+帖
+帘
+帚
+帛
+帜
+帝
+帥
+带
+帧
+師
+席
+帮
+帯
+帰
+帳
+帶
+帷
+常
+帼
+帽
+幀
+幂
+幄
+幅
+幌
+幔
+幕
+幟
+幡
+幢
+幣
+幫
+干
+平
+年
+并
+幸
+幹
+幺
+幻
+幼
+幽
+幾
+广
+庁
+広
+庄
+庆
+庇
+床
+序
+庐
+库
+应
+底
+庖
+店
+庙
+庚
+府
+庞
+废
+庠
+度
+座
+庫
+庭
+庵
+庶
+康
+庸
+庹
+庾
+廁
+廂
+廃
+廈
+廉
+廊
+廓
+廖
+廚
+廝
+廟
+廠
+廢
+廣
+廬
+廳
+延
+廷
+建
+廿
+开
+弁
+异
+弃
+弄
+弈
+弊
+弋
+式
+弑
+弒
+弓
+弔
+引
+弗
+弘
+弛
+弟
+张
+弥
+弦
+弧
+弩
+弭
+弯
+弱
+張
+強
+弹
+强
+弼
+弾
+彅
+彆
+彈
+彌
+彎
+归
+当
+录
+彗
+彙
+彝
+形
+彤
+彥
+彦
+彧
+彩
+彪
+彫
+彬
+彭
+彰
+影
+彷
+役
+彻
+彼
+彿
+往
+征
+径
+待
+徇
+很
+徉
+徊
+律
+後
+徐
+徑
+徒
+従
+徕
+得
+徘
+徙
+徜
+從
+徠
+御
+徨
+復
+循
+徬
+微
+徳
+徴
+徵
+德
+徹
+徼
+徽
+心
+必
+忆
+忌
+忍
+忏
+忐
+忑
+忒
+忖
+志
+忘
+忙
+応
+忠
+忡
+忤
+忧
+忪
+快
+忱
+念
+忻
+忽
+忿
+怀
+态
+怂
+怅
+怆
+怎
+怏
+怒
+怔
+怕
+怖
+怙
+怜
+思
+怠
+怡
+急
+怦
+性
+怨
+怪
+怯
+怵
+总
+怼
+恁
+恃
+恆
+恋
+恍
+恐
+恒
+恕
+恙
+恚
+恢
+恣
+恤
+恥
+恨
+恩
+恪
+恫
+恬
+恭
+息
+恰
+恳
+恵
+恶
+恸
+恺
+恻
+恼
+恿
+悄
+悅
+悉
+悌
+悍
+悔
+悖
+悚
+悟
+悠
+患
+悦
+您
+悩
+悪
+悬
+悯
+悱
+悲
+悴
+悵
+悶
+悸
+悻
+悼
+悽
+情
+惆
+惇
+惊
+惋
+惑
+惕
+惘
+惚
+惜
+惟
+惠
+惡
+惦
+惧
+惨
+惩
+惫
+惬
+惭
+惮
+惯
+惰
+惱
+想
+惴
+惶
+惹
+惺
+愁
+愆
+愈
+愉
+愍
+意
+愕
+愚
+愛
+愜
+感
+愣
+愤
+愧
+愫
+愷
+愿
+慄
+慈
+態
+慌
+慎
+慑
+慕
+慘
+慚
+慟
+慢
+慣
+慧
+慨
+慫
+慮
+慰
+慳
+慵
+慶
+慷
+慾
+憂
+憊
+憋
+憎
+憐
+憑
+憔
+憚
+憤
+憧
+憨
+憩
+憫
+憬
+憲
+憶
+憾
+懂
+懇
+懈
+應
+懊
+懋
+懑
+懒
+懦
+懲
+懵
+懶
+懷
+懸
+懺
+懼
+懾
+懿
+戀
+戈
+戊
+戌
+戍
+戎
+戏
+成
+我
+戒
+戕
+或
+战
+戚
+戛
+戟
+戡
+戦
+截
+戬
+戮
+戰
+戲
+戳
+戴
+戶
+户
+戸
+戻
+戾
+房
+所
+扁
+扇
+扈
+扉
+手
+才
+扎
+扑
+扒
+打
+扔
+払
+托
+扛
+扣
+扦
+执
+扩
+扪
+扫
+扬
+扭
+扮
+扯
+扰
+扱
+扳
+扶
+批
+扼
+找
+承
+技
+抄
+抉
+把
+抑
+抒
+抓
+投
+抖
+抗
+折
+抚
+抛
+抜
+択
+抟
+抠
+抡
+抢
+护
+报
+抨
+披
+抬
+抱
+抵
+抹
+押
+抽
+抿
+拂
+拄
+担
+拆
+拇
+拈
+拉
+拋
+拌
+拍
+拎
+拐
+拒
+拓
+拔
+拖
+拗
+拘
+拙
+拚
+招
+拜
+拟
+拡
+拢
+拣
+拥
+拦
+拧
+拨
+择
+括
+拭
+拮
+拯
+拱
+拳
+拴
+拷
+拼
+拽
+拾
+拿
+持
+挂
+指
+挈
+按
+挎
+挑
+挖
+挙
+挚
+挛
+挝
+挞
+挟
+挠
+挡
+挣
+挤
+挥
+挨
+挪
+挫
+振
+挲
+挹
+挺
+挽
+挾
+捂
+捅
+捆
+捉
+捋
+捌
+捍
+捎
+捏
+捐
+捕
+捞
+损
+捡
+换
+捣
+捧
+捨
+捩
+据
+捱
+捲
+捶
+捷
+捺
+捻
+掀
+掂
+掃
+掇
+授
+掉
+掌
+掏
+掐
+排
+掖
+掘
+掙
+掛
+掠
+採
+探
+掣
+接
+控
+推
+掩
+措
+掬
+掰
+掲
+掳
+掴
+掷
+掸
+掺
+揀
+揃
+揄
+揆
+揉
+揍
+描
+提
+插
+揖
+揚
+換
+握
+揣
+揩
+揪
+揭
+揮
+援
+揶
+揸
+揹
+揽
+搀
+搁
+搂
+搅
+損
+搏
+搐
+搓
+搔
+搖
+搗
+搜
+搞
+搡
+搪
+搬
+搭
+搵
+搶
+携
+搽
+摀
+摁
+摄
+摆
+摇
+摈
+摊
+摒
+摔
+摘
+摞
+摟
+摧
+摩
+摯
+摳
+摸
+摹
+摺
+摻
+撂
+撃
+撅
+撇
+撈
+撐
+撑
+撒
+撓
+撕
+撚
+撞
+撤
+撥
+撩
+撫
+撬
+播
+撮
+撰
+撲
+撵
+撷
+撸
+撻
+撼
+撿
+擀
+擁
+擂
+擄
+擅
+擇
+擊
+擋
+操
+擎
+擒
+擔
+擘
+據
+擞
+擠
+擡
+擢
+擦
+擬
+擰
+擱
+擲
+擴
+擷
+擺
+擼
+擾
+攀
+攏
+攒
+攔
+攘
+攙
+攜
+攝
+攞
+攢
+攣
+攤
+攥
+攪
+攫
+攬
+支
+收
+攸
+改
+攻
+放
+政
+故
+效
+敌
+敍
+敎
+敏
+救
+敕
+敖
+敗
+敘
+教
+敛
+敝
+敞
+敢
+散
+敦
+敬
+数
+敲
+整
+敵
+敷
+數
+斂
+斃
+文
+斋
+斌
+斎
+斐
+斑
+斓
+斗
+料
+斛
+斜
+斟
+斡
+斤
+斥
+斧
+斩
+斫
+斬
+断
+斯
+新
+斷
+方
+於
+施
+旁
+旃
+旅
+旋
+旌
+旎
+族
+旖
+旗
+无
+既
+日
+旦
+旧
+旨
+早
+旬
+旭
+旮
+旱
+时
+旷
+旺
+旻
+昀
+昂
+昆
+昇
+昉
+昊
+昌
+明
+昏
+易
+昔
+昕
+昙
+星
+映
+春
+昧
+昨
+昭
+是
+昱
+昴
+昵
+昶
+昼
+显
+晁
+時
+晃
+晉
+晋
+晌
+晏
+晒
+晓
+晔
+晕
+晖
+晗
+晚
+晝
+晞
+晟
+晤
+晦
+晨
+晩
+普
+景
+晰
+晴
+晶
+晷
+智
+晾
+暂
+暄
+暇
+暈
+暉
+暌
+暐
+暑
+暖
+暗
+暝
+暢
+暧
+暨
+暫
+暮
+暱
+暴
+暸
+暹
+曄
+曆
+曇
+曉
+曖
+曙
+曜
+曝
+曠
+曦
+曬
+曰
+曲
+曳
+更
+書
+曹
+曼
+曾
+替
+最
+會
+月
+有
+朋
+服
+朐
+朔
+朕
+朗
+望
+朝
+期
+朦
+朧
+木
+未
+末
+本
+札
+朮
+术
+朱
+朴
+朵
+机
+朽
+杀
+杂
+权
+杆
+杈
+杉
+李
+杏
+材
+村
+杓
+杖
+杜
+杞
+束
+杠
+条
+来
+杨
+杭
+杯
+杰
+東
+杳
+杵
+杷
+杼
+松
+板
+极
+构
+枇
+枉
+枋
+析
+枕
+林
+枚
+果
+枝
+枢
+枣
+枪
+枫
+枭
+枯
+枰
+枱
+枳
+架
+枷
+枸
+柄
+柏
+某
+柑
+柒
+染
+柔
+柘
+柚
+柜
+柞
+柠
+柢
+查
+柩
+柬
+柯
+柱
+柳
+柴
+柵
+査
+柿
+栀
+栃
+栄
+栅
+标
+栈
+栉
+栋
+栎
+栏
+树
+栓
+栖
+栗
+校
+栩
+株
+样
+核
+根
+格
+栽
+栾
+桀
+桁
+桂
+桃
+桅
+框
+案
+桉
+桌
+桎
+桐
+桑
+桓
+桔
+桜
+桠
+桡
+桢
+档
+桥
+桦
+桧
+桨
+桩
+桶
+桿
+梁
+梅
+梆
+梏
+梓
+梗
+條
+梟
+梢
+梦
+梧
+梨
+梭
+梯
+械
+梳
+梵
+梶
+检
+棂
+棄
+棉
+棋
+棍
+棒
+棕
+棗
+棘
+棚
+棟
+棠
+棣
+棧
+森
+棱
+棲
+棵
+棹
+棺
+椁
+椅
+椋
+植
+椎
+椒
+検
+椪
+椭
+椰
+椹
+椽
+椿
+楂
+楊
+楓
+楔
+楚
+楝
+楞
+楠
+楣
+楨
+楫
+業
+楮
+極
+楷
+楸
+楹
+楼
+楽
+概
+榄
+榆
+榈
+榉
+榔
+榕
+榖
+榛
+榜
+榨
+榫
+榭
+榮
+榱
+榴
+榷
+榻
+槁
+槃
+構
+槌
+槍
+槎
+槐
+槓
+様
+槛
+槟
+槤
+槭
+槲
+槳
+槻
+槽
+槿
+樁
+樂
+樊
+樑
+樓
+標
+樞
+樟
+模
+樣
+権
+横
+樫
+樯
+樱
+樵
+樸
+樹
+樺
+樽
+樾
+橄
+橇
+橋
+橐
+橘
+橙
+機
+橡
+橢
+橫
+橱
+橹
+橼
+檀
+檄
+檎
+檐
+檔
+檗
+檜
+檢
+檬
+檯
+檳
+檸
+檻
+櫃
+櫚
+櫛
+櫥
+櫸
+櫻
+欄
+權
+欒
+欖
+欠
+次
+欢
+欣
+欧
+欲
+欸
+欺
+欽
+款
+歆
+歇
+歉
+歌
+歎
+歐
+歓
+歙
+歛
+歡
+止
+正
+此
+步
+武
+歧
+歩
+歪
+歯
+歲
+歳
+歴
+歷
+歸
+歹
+死
+歼
+殁
+殃
+殆
+殇
+殉
+殊
+残
+殒
+殓
+殖
+殘
+殞
+殡
+殤
+殭
+殯
+殲
+殴
+段
+殷
+殺
+殼
+殿
+毀
+毁
+毂
+毅
+毆
+毋
+母
+毎
+每
+毒
+毓
+比
+毕
+毗
+毘
+毙
+毛
+毡
+毫
+毯
+毽
+氈
+氏
+氐
+民
+氓
+气
+氖
+気
+氙
+氛
+氟
+氡
+氢
+氣
+氤
+氦
+氧
+氨
+氪
+氫
+氮
+氯
+氰
+氲
+水
+氷
+永
+氹
+氾
+汀
+汁
+求
+汆
+汇
+汉
+汎
+汐
+汕
+汗
+汙
+汛
+汝
+汞
+江
+池
+污
+汤
+汨
+汩
+汪
+汰
+汲
+汴
+汶
+汹
+決
+汽
+汾
+沁
+沂
+沃
+沅
+沈
+沉
+沌
+沏
+沐
+沒
+沓
+沖
+沙
+沛
+沟
+没
+沢
+沣
+沥
+沦
+沧
+沪
+沫
+沭
+沮
+沱
+河
+沸
+油
+治
+沼
+沽
+沾
+沿
+況
+泄
+泉
+泊
+泌
+泓
+法
+泗
+泛
+泞
+泠
+泡
+波
+泣
+泥
+注
+泪
+泫
+泮
+泯
+泰
+泱
+泳
+泵
+泷
+泸
+泻
+泼
+泽
+泾
+洁
+洄
+洋
+洒
+洗
+洙
+洛
+洞
+津
+洩
+洪
+洮
+洱
+洲
+洵
+洶
+洸
+洹
+活
+洼
+洽
+派
+流
+浃
+浄
+浅
+浆
+浇
+浊
+测
+济
+浏
+浑
+浒
+浓
+浔
+浙
+浚
+浜
+浣
+浦
+浩
+浪
+浬
+浮
+浯
+浴
+海
+浸
+涂
+涅
+涇
+消
+涉
+涌
+涎
+涓
+涔
+涕
+涙
+涛
+涝
+涞
+涟
+涠
+涡
+涣
+涤
+润
+涧
+涨
+涩
+涪
+涮
+涯
+液
+涵
+涸
+涼
+涿
+淀
+淄
+淅
+淆
+淇
+淋
+淌
+淑
+淒
+淖
+淘
+淙
+淚
+淞
+淡
+淤
+淦
+淨
+淩
+淪
+淫
+淬
+淮
+深
+淳
+淵
+混
+淹
+淺
+添
+淼
+清
+済
+渉
+渊
+渋
+渍
+渎
+渐
+渔
+渗
+渙
+渚
+減
+渝
+渠
+渡
+渣
+渤
+渥
+渦
+温
+測
+渭
+港
+渲
+渴
+游
+渺
+渾
+湃
+湄
+湊
+湍
+湖
+湘
+湛
+湟
+湧
+湫
+湮
+湯
+湳
+湾
+湿
+満
+溃
+溅
+溉
+溏
+源
+準
+溜
+溝
+溟
+溢
+溥
+溧
+溪
+溫
+溯
+溱
+溴
+溶
+溺
+溼
+滁
+滂
+滄
+滅
+滇
+滋
+滌
+滑
+滓
+滔
+滕
+滙
+滚
+滝
+滞
+滟
+满
+滢
+滤
+滥
+滦
+滨
+滩
+滬
+滯
+滲
+滴
+滷
+滸
+滾
+滿
+漁
+漂
+漆
+漉
+漏
+漓
+演
+漕
+漠
+漢
+漣
+漩
+漪
+漫
+漬
+漯
+漱
+漲
+漳
+漸
+漾
+漿
+潆
+潇
+潋
+潍
+潑
+潔
+潘
+潛
+潜
+潞
+潟
+潢
+潤
+潦
+潧
+潭
+潮
+潰
+潴
+潸
+潺
+潼
+澀
+澄
+澆
+澈
+澍
+澎
+澗
+澜
+澡
+澤
+澧
+澱
+澳
+澹
+激
+濁
+濂
+濃
+濑
+濒
+濕
+濘
+濛
+濟
+濠
+濡
+濤
+濫
+濬
+濮
+濯
+濱
+濺
+濾
+瀅
+瀆
+瀉
+瀋
+瀏
+瀑
+瀕
+瀘
+瀚
+瀛
+瀝
+瀞
+瀟
+瀧
+瀨
+瀬
+瀰
+瀾
+灌
+灏
+灑
+灘
+灝
+灞
+灣
+火
+灬
+灭
+灯
+灰
+灵
+灶
+灸
+灼
+災
+灾
+灿
+炀
+炁
+炅
+炉
+炊
+炎
+炒
+炔
+炕
+炖
+炙
+炜
+炫
+炬
+炭
+炮
+炯
+炳
+炷
+炸
+点
+為
+炼
+炽
+烁
+烂
+烃
+烈
+烊
+烏
+烘
+烙
+烛
+烟
+烤
+烦
+烧
+烨
+烩
+烫
+烬
+热
+烯
+烷
+烹
+烽
+焉
+焊
+焕
+焖
+焗
+焘
+焙
+焚
+焜
+無
+焦
+焯
+焰
+焱
+然
+焼
+煅
+煉
+煊
+煌
+煎
+煒
+煖
+煙
+煜
+煞
+煤
+煥
+煦
+照
+煨
+煩
+煮
+煲
+煸
+煽
+熄
+熊
+熏
+熒
+熔
+熙
+熟
+熠
+熨
+熬
+熱
+熵
+熹
+熾
+燁
+燃
+燄
+燈
+燉
+燊
+燎
+燒
+燔
+燕
+燙
+燜
+營
+燥
+燦
+燧
+燭
+燮
+燴
+燻
+燼
+燿
+爆
+爍
+爐
+爛
+爪
+爬
+爭
+爰
+爱
+爲
+爵
+父
+爷
+爸
+爹
+爺
+爻
+爽
+爾
+牆
+片
+版
+牌
+牍
+牒
+牙
+牛
+牝
+牟
+牠
+牡
+牢
+牦
+牧
+物
+牯
+牲
+牴
+牵
+特
+牺
+牽
+犀
+犁
+犄
+犊
+犍
+犒
+犢
+犧
+犬
+犯
+状
+犷
+犸
+犹
+狀
+狂
+狄
+狈
+狎
+狐
+狒
+狗
+狙
+狞
+狠
+狡
+狩
+独
+狭
+狮
+狰
+狱
+狸
+狹
+狼
+狽
+猎
+猕
+猖
+猗
+猙
+猛
+猜
+猝
+猥
+猩
+猪
+猫
+猬
+献
+猴
+猶
+猷
+猾
+猿
+獄
+獅
+獎
+獐
+獒
+獗
+獠
+獣
+獨
+獭
+獰
+獲
+獵
+獷
+獸
+獺
+獻
+獼
+獾
+玄
+率
+玉
+王
+玑
+玖
+玛
+玟
+玠
+玥
+玩
+玫
+玮
+环
+现
+玲
+玳
+玷
+玺
+玻
+珀
+珂
+珅
+珈
+珉
+珊
+珍
+珏
+珐
+珑
+珙
+珞
+珠
+珣
+珥
+珩
+珪
+班
+珮
+珲
+珺
+現
+球
+琅
+理
+琇
+琉
+琊
+琍
+琏
+琐
+琛
+琢
+琥
+琦
+琨
+琪
+琬
+琮
+琰
+琲
+琳
+琴
+琵
+琶
+琺
+琼
+瑀
+瑁
+瑄
+瑋
+瑕
+瑗
+瑙
+瑚
+瑛
+瑜
+瑞
+瑟
+瑠
+瑣
+瑤
+瑩
+瑪
+瑯
+瑰
+瑶
+瑾
+璀
+璁
+璃
+璇
+璉
+璋
+璎
+璐
+璜
+璞
+璟
+璧
+璨
+環
+璽
+璿
+瓊
+瓏
+瓒
+瓜
+瓢
+瓣
+瓤
+瓦
+瓮
+瓯
+瓴
+瓶
+瓷
+甄
+甌
+甕
+甘
+甙
+甚
+甜
+生
+產
+産
+甥
+甦
+用
+甩
+甫
+甬
+甭
+甯
+田
+由
+甲
+申
+电
+男
+甸
+町
+画
+甾
+畀
+畅
+界
+畏
+畑
+畔
+留
+畜
+畝
+畢
+略
+畦
+番
+畫
+異
+畲
+畳
+畴
+當
+畸
+畹
+畿
+疆
+疇
+疊
+疏
+疑
+疔
+疖
+疗
+疙
+疚
+疝
+疟
+疡
+疣
+疤
+疥
+疫
+疮
+疯
+疱
+疲
+疳
+疵
+疸
+疹
+疼
+疽
+疾
+痂
+病
+症
+痈
+痉
+痊
+痍
+痒
+痔
+痕
+痘
+痙
+痛
+痞
+痠
+痢
+痣
+痤
+痧
+痨
+痪
+痫
+痰
+痱
+痴
+痹
+痺
+痼
+痿
+瘀
+瘁
+瘋
+瘍
+瘓
+瘘
+瘙
+瘟
+瘠
+瘡
+瘢
+瘤
+瘦
+瘧
+瘩
+瘪
+瘫
+瘴
+瘸
+瘾
+療
+癇
+癌
+癒
+癖
+癜
+癞
+癡
+癢
+癣
+癥
+癫
+癬
+癮
+癱
+癲
+癸
+発
+登
+發
+白
+百
+皂
+的
+皆
+皇
+皈
+皋
+皎
+皑
+皓
+皖
+皙
+皚
+皮
+皰
+皱
+皴
+皺
+皿
+盂
+盃
+盅
+盆
+盈
+益
+盎
+盏
+盐
+监
+盒
+盔
+盖
+盗
+盘
+盛
+盜
+盞
+盟
+盡
+監
+盤
+盥
+盧
+盪
+目
+盯
+盱
+盲
+直
+相
+盹
+盼
+盾
+省
+眈
+眉
+看
+県
+眙
+眞
+真
+眠
+眦
+眨
+眩
+眯
+眶
+眷
+眸
+眺
+眼
+眾
+着
+睁
+睇
+睏
+睐
+睑
+睛
+睜
+睞
+睡
+睢
+督
+睥
+睦
+睨
+睪
+睫
+睬
+睹
+睽
+睾
+睿
+瞄
+瞅
+瞇
+瞋
+瞌
+瞎
+瞑
+瞒
+瞓
+瞞
+瞟
+瞠
+瞥
+瞧
+瞩
+瞪
+瞬
+瞭
+瞰
+瞳
+瞻
+瞼
+瞿
+矇
+矍
+矗
+矚
+矛
+矜
+矢
+矣
+知
+矩
+矫
+短
+矮
+矯
+石
+矶
+矽
+矾
+矿
+码
+砂
+砌
+砍
+砒
+研
+砖
+砗
+砚
+砝
+砣
+砥
+砧
+砭
+砰
+砲
+破
+砷
+砸
+砺
+砼
+砾
+础
+硅
+硐
+硒
+硕
+硝
+硫
+硬
+确
+硯
+硼
+碁
+碇
+碉
+碌
+碍
+碎
+碑
+碓
+碗
+碘
+碚
+碛
+碟
+碣
+碧
+碩
+碰
+碱
+碳
+碴
+確
+碼
+碾
+磁
+磅
+磊
+磋
+磐
+磕
+磚
+磡
+磨
+磬
+磯
+磲
+磷
+磺
+礁
+礎
+礙
+礡
+礦
+礪
+礫
+礴
+示
+礼
+社
+祀
+祁
+祂
+祇
+祈
+祉
+祎
+祐
+祕
+祖
+祗
+祚
+祛
+祜
+祝
+神
+祟
+祠
+祢
+祥
+票
+祭
+祯
+祷
+祸
+祺
+祿
+禀
+禁
+禄
+禅
+禍
+禎
+福
+禛
+禦
+禧
+禪
+禮
+禱
+禹
+禺
+离
+禽
+禾
+禿
+秀
+私
+秃
+秆
+秉
+秋
+种
+科
+秒
+秘
+租
+秣
+秤
+秦
+秧
+秩
+秭
+积
+称
+秸
+移
+秽
+稀
+稅
+程
+稍
+税
+稔
+稗
+稚
+稜
+稞
+稟
+稠
+稣
+種
+稱
+稲
+稳
+稷
+稹
+稻
+稼
+稽
+稿
+穀
+穂
+穆
+穌
+積
+穎
+穗
+穢
+穩
+穫
+穴
+究
+穷
+穹
+空
+穿
+突
+窃
+窄
+窈
+窍
+窑
+窒
+窓
+窕
+窖
+窗
+窘
+窜
+窝
+窟
+窠
+窥
+窦
+窨
+窩
+窪
+窮
+窯
+窺
+窿
+竄
+竅
+竇
+竊
+立
+竖
+站
+竜
+竞
+竟
+章
+竣
+童
+竭
+端
+競
+竹
+竺
+竽
+竿
+笃
+笆
+笈
+笋
+笏
+笑
+笔
+笙
+笛
+笞
+笠
+符
+笨
+第
+笹
+笺
+笼
+筆
+等
+筊
+筋
+筍
+筏
+筐
+筑
+筒
+答
+策
+筛
+筝
+筠
+筱
+筲
+筵
+筷
+筹
+签
+简
+箇
+箋
+箍
+箏
+箐
+箔
+箕
+算
+箝
+管
+箩
+箫
+箭
+箱
+箴
+箸
+節
+篁
+範
+篆
+篇
+築
+篑
+篓
+篙
+篝
+篠
+篡
+篤
+篩
+篪
+篮
+篱
+篷
+簇
+簌
+簍
+簡
+簦
+簧
+簪
+簫
+簷
+簸
+簽
+簾
+簿
+籁
+籃
+籌
+籍
+籐
+籟
+籠
+籤
+籬
+籮
+籲
+米
+类
+籼
+籽
+粄
+粉
+粑
+粒
+粕
+粗
+粘
+粟
+粤
+粥
+粧
+粪
+粮
+粱
+粲
+粳
+粵
+粹
+粼
+粽
+精
+粿
+糅
+糊
+糍
+糕
+糖
+糗
+糙
+糜
+糞
+糟
+糠
+糧
+糬
+糯
+糰
+糸
+系
+糾
+紀
+紂
+約
+紅
+紉
+紊
+紋
+納
+紐
+紓
+純
+紗
+紘
+紙
+級
+紛
+紜
+素
+紡
+索
+紧
+紫
+紮
+累
+細
+紳
+紹
+紺
+終
+絃
+組
+絆
+経
+結
+絕
+絞
+絡
+絢
+給
+絨
+絮
+統
+絲
+絳
+絵
+絶
+絹
+綁
+綏
+綑
+經
+継
+続
+綜
+綠
+綢
+綦
+綫
+綬
+維
+綱
+網
+綴
+綵
+綸
+綺
+綻
+綽
+綾
+綿
+緊
+緋
+総
+緑
+緒
+緘
+線
+緝
+緞
+締
+緣
+編
+緩
+緬
+緯
+練
+緹
+緻
+縁
+縄
+縈
+縛
+縝
+縣
+縫
+縮
+縱
+縴
+縷
+總
+績
+繁
+繃
+繆
+繇
+繋
+織
+繕
+繚
+繞
+繡
+繩
+繪
+繫
+繭
+繳
+繹
+繼
+繽
+纂
+續
+纍
+纏
+纓
+纔
+纖
+纜
+纠
+红
+纣
+纤
+约
+级
+纨
+纪
+纫
+纬
+纭
+纯
+纰
+纱
+纲
+纳
+纵
+纶
+纷
+纸
+纹
+纺
+纽
+纾
+线
+绀
+练
+组
+绅
+细
+织
+终
+绊
+绍
+绎
+经
+绑
+绒
+结
+绔
+绕
+绘
+给
+绚
+绛
+络
+绝
+绞
+统
+绡
+绢
+绣
+绥
+绦
+继
+绩
+绪
+绫
+续
+绮
+绯
+绰
+绳
+维
+绵
+绶
+绷
+绸
+绻
+综
+绽
+绾
+绿
+缀
+缄
+缅
+缆
+缇
+缈
+缉
+缎
+缓
+缔
+缕
+编
+缘
+缙
+缚
+缜
+缝
+缠
+缢
+缤
+缥
+缨
+缩
+缪
+缭
+缮
+缰
+缱
+缴
+缸
+缺
+缽
+罂
+罄
+罌
+罐
+网
+罔
+罕
+罗
+罚
+罡
+罢
+罩
+罪
+置
+罰
+署
+罵
+罷
+罹
+羁
+羅
+羈
+羊
+羌
+美
+羔
+羚
+羞
+羟
+羡
+羣
+群
+羥
+羧
+羨
+義
+羯
+羲
+羸
+羹
+羽
+羿
+翁
+翅
+翊
+翌
+翎
+習
+翔
+翘
+翟
+翠
+翡
+翦
+翩
+翰
+翱
+翳
+翹
+翻
+翼
+耀
+老
+考
+耄
+者
+耆
+耋
+而
+耍
+耐
+耒
+耕
+耗
+耘
+耙
+耦
+耨
+耳
+耶
+耷
+耸
+耻
+耽
+耿
+聂
+聆
+聊
+聋
+职
+聒
+联
+聖
+聘
+聚
+聞
+聪
+聯
+聰
+聲
+聳
+聴
+聶
+職
+聽
+聾
+聿
+肃
+肄
+肅
+肆
+肇
+肉
+肋
+肌
+肏
+肓
+肖
+肘
+肚
+肛
+肝
+肠
+股
+肢
+肤
+肥
+肩
+肪
+肮
+肯
+肱
+育
+肴
+肺
+肽
+肾
+肿
+胀
+胁
+胃
+胄
+胆
+背
+胍
+胎
+胖
+胚
+胛
+胜
+胝
+胞
+胡
+胤
+胥
+胧
+胫
+胭
+胯
+胰
+胱
+胳
+胴
+胶
+胸
+胺
+能
+脂
+脅
+脆
+脇
+脈
+脉
+脊
+脍
+脏
+脐
+脑
+脓
+脖
+脘
+脚
+脛
+脣
+脩
+脫
+脯
+脱
+脲
+脳
+脸
+脹
+脾
+腆
+腈
+腊
+腋
+腌
+腎
+腐
+腑
+腓
+腔
+腕
+腥
+腦
+腩
+腫
+腭
+腮
+腰
+腱
+腳
+腴
+腸
+腹
+腺
+腻
+腼
+腾
+腿
+膀
+膈
+膊
+膏
+膑
+膘
+膚
+膛
+膜
+膝
+膠
+膦
+膨
+膩
+膳
+膺
+膻
+膽
+膾
+膿
+臀
+臂
+臃
+臆
+臉
+臊
+臍
+臓
+臘
+臟
+臣
+臥
+臧
+臨
+自
+臬
+臭
+至
+致
+臺
+臻
+臼
+臾
+舀
+舂
+舅
+舆
+與
+興
+舉
+舊
+舌
+舍
+舎
+舐
+舒
+舔
+舖
+舗
+舛
+舜
+舞
+舟
+航
+舫
+般
+舰
+舱
+舵
+舶
+舷
+舸
+船
+舺
+舾
+艇
+艋
+艘
+艙
+艦
+艮
+良
+艰
+艱
+色
+艳
+艷
+艹
+艺
+艾
+节
+芃
+芈
+芊
+芋
+芍
+芎
+芒
+芙
+芜
+芝
+芡
+芥
+芦
+芩
+芪
+芫
+芬
+芭
+芮
+芯
+花
+芳
+芷
+芸
+芹
+芻
+芽
+芾
+苁
+苄
+苇
+苋
+苍
+苏
+苑
+苒
+苓
+苔
+苕
+苗
+苛
+苜
+苞
+苟
+苡
+苣
+若
+苦
+苫
+苯
+英
+苷
+苹
+苻
+茁
+茂
+范
+茄
+茅
+茉
+茎
+茏
+茗
+茜
+茧
+茨
+茫
+茬
+茭
+茯
+茱
+茲
+茴
+茵
+茶
+茸
+茹
+茼
+荀
+荃
+荆
+草
+荊
+荏
+荐
+荒
+荔
+荖
+荘
+荚
+荞
+荟
+荠
+荡
+荣
+荤
+荥
+荧
+荨
+荪
+荫
+药
+荳
+荷
+荸
+荻
+荼
+荽
+莅
+莆
+莉
+莊
+莎
+莒
+莓
+莖
+莘
+莞
+莠
+莢
+莧
+莪
+莫
+莱
+莲
+莴
+获
+莹
+莺
+莽
+莿
+菀
+菁
+菅
+菇
+菈
+菊
+菌
+菏
+菓
+菖
+菘
+菜
+菟
+菠
+菡
+菩
+華
+菱
+菲
+菸
+菽
+萁
+萃
+萄
+萊
+萋
+萌
+萍
+萎
+萘
+萝
+萤
+营
+萦
+萧
+萨
+萩
+萬
+萱
+萵
+萸
+萼
+落
+葆
+葉
+著
+葚
+葛
+葡
+董
+葦
+葩
+葫
+葬
+葭
+葯
+葱
+葳
+葵
+葷
+葺
+蒂
+蒋
+蒐
+蒔
+蒙
+蒜
+蒞
+蒟
+蒡
+蒨
+蒲
+蒸
+蒹
+蒻
+蒼
+蒿
+蓁
+蓄
+蓆
+蓉
+蓋
+蓑
+蓓
+蓖
+蓝
+蓟
+蓦
+蓬
+蓮
+蓼
+蓿
+蔑
+蔓
+蔔
+蔗
+蔘
+蔚
+蔡
+蔣
+蔥
+蔫
+蔬
+蔭
+蔵
+蔷
+蔺
+蔻
+蔼
+蔽
+蕁
+蕃
+蕈
+蕉
+蕊
+蕎
+蕙
+蕤
+蕨
+蕩
+蕪
+蕭
+蕲
+蕴
+蕻
+蕾
+薄
+薅
+薇
+薈
+薊
+薏
+薑
+薔
+薙
+薛
+薦
+薨
+薩
+薪
+薬
+薯
+薰
+薹
+藉
+藍
+藏
+藐
+藓
+藕
+藜
+藝
+藤
+藥
+藩
+藹
+藻
+藿
+蘆
+蘇
+蘊
+蘋
+蘑
+蘚
+蘭
+蘸
+蘼
+蘿
+虎
+虏
+虐
+虑
+虔
+處
+虚
+虛
+虜
+虞
+號
+虢
+虧
+虫
+虬
+虱
+虹
+虻
+虽
+虾
+蚀
+蚁
+蚂
+蚊
+蚌
+蚓
+蚕
+蚜
+蚝
+蚣
+蚤
+蚩
+蚪
+蚯
+蚱
+蚵
+蛀
+蛆
+蛇
+蛊
+蛋
+蛎
+蛐
+蛔
+蛙
+蛛
+蛟
+蛤
+蛭
+蛮
+蛰
+蛳
+蛹
+蛻
+蛾
+蜀
+蜂
+蜃
+蜆
+蜇
+蜈
+蜊
+蜍
+蜒
+蜓
+蜕
+蜗
+蜘
+蜚
+蜜
+蜡
+蜢
+蜥
+蜱
+蜴
+蜷
+蜻
+蜿
+蝇
+蝈
+蝉
+蝌
+蝎
+蝕
+蝗
+蝙
+蝟
+蝠
+蝦
+蝨
+蝴
+蝶
+蝸
+蝼
+螂
+螃
+融
+螞
+螢
+螨
+螯
+螳
+螺
+蟀
+蟄
+蟆
+蟋
+蟎
+蟑
+蟒
+蟠
+蟬
+蟲
+蟹
+蟻
+蟾
+蠅
+蠍
+蠔
+蠕
+蠛
+蠟
+蠡
+蠢
+蠣
+蠱
+蠶
+蠹
+蠻
+血
+衄
+衅
+衆
+行
+衍
+術
+衔
+街
+衙
+衛
+衝
+衞
+衡
+衢
+衣
+补
+表
+衩
+衫
+衬
+衮
+衰
+衲
+衷
+衹
+衾
+衿
+袁
+袂
+袄
+袅
+袈
+袋
+袍
+袒
+袖
+袜
+袞
+袤
+袪
+被
+袭
+袱
+裁
+裂
+装
+裆
+裊
+裏
+裔
+裕
+裘
+裙
+補
+裝
+裟
+裡
+裤
+裨
+裱
+裳
+裴
+裸
+裹
+製
+裾
+褂
+複
+褐
+褒
+褓
+褔
+褚
+褥
+褪
+褫
+褲
+褶
+褻
+襁
+襄
+襟
+襠
+襪
+襬
+襯
+襲
+西
+要
+覃
+覆
+覇
+見
+規
+覓
+視
+覚
+覦
+覧
+親
+覬
+観
+覷
+覺
+覽
+觀
+见
+观
+规
+觅
+视
+览
+觉
+觊
+觎
+觐
+觑
+角
+觞
+解
+觥
+触
+觸
+言
+訂
+計
+訊
+討
+訓
+訕
+訖
+託
+記
+訛
+訝
+訟
+訣
+訥
+訪
+設
+許
+訳
+訴
+訶
+診
+註
+証
+詆
+詐
+詔
+評
+詛
+詞
+詠
+詡
+詢
+詣
+試
+詩
+詫
+詬
+詭
+詮
+詰
+話
+該
+詳
+詹
+詼
+誅
+誇
+誉
+誌
+認
+誓
+誕
+誘
+語
+誠
+誡
+誣
+誤
+誥
+誦
+誨
+說
+説
+読
+誰
+課
+誹
+誼
+調
+諄
+談
+請
+諏
+諒
+論
+諗
+諜
+諡
+諦
+諧
+諫
+諭
+諮
+諱
+諳
+諷
+諸
+諺
+諾
+謀
+謁
+謂
+謄
+謊
+謎
+謐
+謔
+謗
+謙
+講
+謝
+謠
+謨
+謬
+謹
+謾
+譁
+證
+譎
+譏
+識
+譙
+譚
+譜
+警
+譬
+譯
+議
+譲
+譴
+護
+譽
+讀
+變
+讓
+讚
+讞
+计
+订
+认
+讥
+讧
+讨
+让
+讪
+讫
+训
+议
+讯
+记
+讲
+讳
+讴
+讶
+讷
+许
+讹
+论
+讼
+讽
+设
+访
+诀
+证
+诃
+评
+诅
+识
+诈
+诉
+诊
+诋
+词
+诏
+译
+试
+诗
+诘
+诙
+诚
+诛
+话
+诞
+诟
+诠
+诡
+询
+诣
+诤
+该
+详
+诧
+诩
+诫
+诬
+语
+误
+诰
+诱
+诲
+说
+诵
+诶
+请
+诸
+诺
+读
+诽
+课
+诿
+谀
+谁
+调
+谄
+谅
+谆
+谈
+谊
+谋
+谌
+谍
+谎
+谏
+谐
+谑
+谒
+谓
+谔
+谕
+谗
+谘
+谙
+谚
+谛
+谜
+谟
+谢
+谣
+谤
+谥
+谦
+谧
+谨
+谩
+谪
+谬
+谭
+谯
+谱
+谲
+谴
+谶
+谷
+豁
+豆
+豇
+豈
+豉
+豊
+豌
+豎
+豐
+豔
+豚
+象
+豢
+豪
+豫
+豬
+豹
+豺
+貂
+貅
+貌
+貓
+貔
+貘
+貝
+貞
+負
+財
+貢
+貧
+貨
+販
+貪
+貫
+責
+貯
+貰
+貳
+貴
+貶
+買
+貸
+費
+貼
+貽
+貿
+賀
+賁
+賂
+賃
+賄
+資
+賈
+賊
+賑
+賓
+賜
+賞
+賠
+賡
+賢
+賣
+賤
+賦
+質
+賬
+賭
+賴
+賺
+購
+賽
+贅
+贈
+贊
+贍
+贏
+贓
+贖
+贛
+贝
+贞
+负
+贡
+财
+责
+贤
+败
+账
+货
+质
+贩
+贪
+贫
+贬
+购
+贮
+贯
+贰
+贱
+贲
+贴
+贵
+贷
+贸
+费
+贺
+贻
+贼
+贾
+贿
+赁
+赂
+赃
+资
+赅
+赈
+赊
+赋
+赌
+赎
+赏
+赐
+赓
+赔
+赖
+赘
+赚
+赛
+赝
+赞
+赠
+赡
+赢
+赣
+赤
+赦
+赧
+赫
+赭
+走
+赳
+赴
+赵
+赶
+起
+趁
+超
+越
+趋
+趕
+趙
+趟
+趣
+趨
+足
+趴
+趵
+趸
+趺
+趾
+跃
+跄
+跆
+跋
+跌
+跎
+跑
+跖
+跚
+跛
+距
+跟
+跡
+跤
+跨
+跩
+跪
+路
+跳
+践
+跷
+跹
+跺
+跻
+踉
+踊
+踌
+踏
+踐
+踝
+踞
+踟
+踢
+踩
+踪
+踮
+踱
+踴
+踵
+踹
+蹂
+蹄
+蹇
+蹈
+蹉
+蹊
+蹋
+蹑
+蹒
+蹙
+蹟
+蹣
+蹤
+蹦
+蹩
+蹬
+蹭
+蹲
+蹴
+蹶
+蹺
+蹼
+蹿
+躁
+躇
+躉
+躊
+躋
+躍
+躏
+躪
+身
+躬
+躯
+躲
+躺
+軀
+車
+軋
+軌
+軍
+軒
+軟
+転
+軸
+軼
+軽
+軾
+較
+載
+輒
+輓
+輔
+輕
+輛
+輝
+輟
+輩
+輪
+輯
+輸
+輻
+輾
+輿
+轄
+轅
+轆
+轉
+轍
+轎
+轟
+车
+轧
+轨
+轩
+转
+轭
+轮
+软
+轰
+轲
+轴
+轶
+轻
+轼
+载
+轿
+较
+辄
+辅
+辆
+辇
+辈
+辉
+辊
+辍
+辐
+辑
+输
+辕
+辖
+辗
+辘
+辙
+辛
+辜
+辞
+辟
+辣
+辦
+辨
+辩
+辫
+辭
+辮
+辯
+辰
+辱
+農
+边
+辺
+辻
+込
+辽
+达
+迁
+迂
+迄
+迅
+过
+迈
+迎
+运
+近
+返
+还
+这
+进
+远
+违
+连
+迟
+迢
+迤
+迥
+迦
+迩
+迪
+迫
+迭
+述
+迴
+迷
+迸
+迹
+迺
+追
+退
+送
+适
+逃
+逅
+逆
+选
+逊
+逍
+透
+逐
+递
+途
+逕
+逗
+這
+通
+逛
+逝
+逞
+速
+造
+逢
+連
+逮
+週
+進
+逵
+逶
+逸
+逻
+逼
+逾
+遁
+遂
+遅
+遇
+遊
+運
+遍
+過
+遏
+遐
+遑
+遒
+道
+達
+違
+遗
+遙
+遛
+遜
+遞
+遠
+遢
+遣
+遥
+遨
+適
+遭
+遮
+遲
+遴
+遵
+遶
+遷
+選
+遺
+遼
+遽
+避
+邀
+邁
+邂
+邃
+還
+邇
+邈
+邊
+邋
+邏
+邑
+邓
+邕
+邛
+邝
+邢
+那
+邦
+邨
+邪
+邬
+邮
+邯
+邰
+邱
+邳
+邵
+邸
+邹
+邺
+邻
+郁
+郅
+郊
+郎
+郑
+郜
+郝
+郡
+郢
+郤
+郦
+郧
+部
+郫
+郭
+郴
+郵
+郷
+郸
+都
+鄂
+鄉
+鄒
+鄔
+鄙
+鄞
+鄢
+鄧
+鄭
+鄰
+鄱
+鄲
+鄺
+酉
+酊
+酋
+酌
+配
+酐
+酒
+酗
+酚
+酝
+酢
+酣
+酥
+酩
+酪
+酬
+酮
+酯
+酰
+酱
+酵
+酶
+酷
+酸
+酿
+醃
+醇
+醉
+醋
+醍
+醐
+醒
+醚
+醛
+醜
+醞
+醣
+醪
+醫
+醬
+醮
+醯
+醴
+醺
+釀
+釁
+采
+釉
+释
+釋
+里
+重
+野
+量
+釐
+金
+釗
+釘
+釜
+針
+釣
+釦
+釧
+釵
+鈀
+鈉
+鈍
+鈎
+鈔
+鈕
+鈞
+鈣
+鈦
+鈪
+鈴
+鈺
+鈾
+鉀
+鉄
+鉅
+鉉
+鉑
+鉗
+鉚
+鉛
+鉤
+鉴
+鉻
+銀
+銃
+銅
+銑
+銓
+銖
+銘
+銜
+銬
+銭
+銮
+銳
+銷
+銹
+鋁
+鋅
+鋒
+鋤
+鋪
+鋰
+鋸
+鋼
+錄
+錐
+錘
+錚
+錠
+錢
+錦
+錨
+錫
+錮
+錯
+録
+錳
+錶
+鍊
+鍋
+鍍
+鍛
+鍥
+鍰
+鍵
+鍺
+鍾
+鎂
+鎊
+鎌
+鎏
+鎔
+鎖
+鎗
+鎚
+鎧
+鎬
+鎮
+鎳
+鏈
+鏖
+鏗
+鏘
+鏞
+鏟
+鏡
+鏢
+鏤
+鏽
+鐘
+鐮
+鐲
+鐳
+鐵
+鐸
+鐺
+鑄
+鑊
+鑑
+鑒
+鑣
+鑫
+鑰
+鑲
+鑼
+鑽
+鑾
+鑿
+针
+钉
+钊
+钎
+钏
+钒
+钓
+钗
+钙
+钛
+钜
+钝
+钞
+钟
+钠
+钡
+钢
+钣
+钤
+钥
+钦
+钧
+钨
+钩
+钮
+钯
+钰
+钱
+钳
+钴
+钵
+钺
+钻
+钼
+钾
+钿
+铀
+铁
+铂
+铃
+铄
+铅
+铆
+铉
+铎
+铐
+铛
+铜
+铝
+铠
+铡
+铢
+铣
+铤
+铨
+铩
+铬
+铭
+铮
+铰
+铲
+铵
+银
+铸
+铺
+链
+铿
+销
+锁
+锂
+锄
+锅
+锆
+锈
+锉
+锋
+锌
+锏
+锐
+锑
+错
+锚
+锟
+锡
+锢
+锣
+锤
+锥
+锦
+锭
+键
+锯
+锰
+锲
+锵
+锹
+锺
+锻
+镀
+镁
+镂
+镇
+镉
+镌
+镍
+镐
+镑
+镕
+镖
+镗
+镛
+镜
+镣
+镭
+镯
+镰
+镳
+镶
+長
+长
+門
+閃
+閉
+開
+閎
+閏
+閑
+閒
+間
+閔
+閘
+閡
+関
+閣
+閥
+閨
+閩
+閱
+閲
+閹
+閻
+閾
+闆
+闇
+闊
+闌
+闍
+闔
+闕
+闖
+闘
+關
+闡
+闢
+门
+闪
+闫
+闭
+问
+闯
+闰
+闲
+间
+闵
+闷
+闸
+闹
+闺
+闻
+闽
+闾
+阀
+阁
+阂
+阅
+阆
+阇
+阈
+阉
+阎
+阐
+阑
+阔
+阕
+阖
+阙
+阚
+阜
+队
+阡
+阪
+阮
+阱
+防
+阳
+阴
+阵
+阶
+阻
+阿
+陀
+陂
+附
+际
+陆
+陇
+陈
+陋
+陌
+降
+限
+陕
+陛
+陝
+陞
+陟
+陡
+院
+陣
+除
+陨
+险
+陪
+陰
+陲
+陳
+陵
+陶
+陷
+陸
+険
+陽
+隅
+隆
+隈
+隊
+隋
+隍
+階
+随
+隐
+隔
+隕
+隘
+隙
+際
+障
+隠
+隣
+隧
+隨
+險
+隱
+隴
+隶
+隸
+隻
+隼
+隽
+难
+雀
+雁
+雄
+雅
+集
+雇
+雉
+雋
+雌
+雍
+雎
+雏
+雑
+雒
+雕
+雖
+雙
+雛
+雜
+雞
+離
+難
+雨
+雪
+雯
+雰
+雲
+雳
+零
+雷
+雹
+電
+雾
+需
+霁
+霄
+霆
+震
+霈
+霉
+霊
+霍
+霎
+霏
+霑
+霓
+霖
+霜
+霞
+霧
+霭
+霰
+露
+霸
+霹
+霽
+霾
+靂
+靄
+靈
+青
+靓
+靖
+静
+靚
+靛
+靜
+非
+靠
+靡
+面
+靥
+靦
+革
+靳
+靴
+靶
+靼
+鞅
+鞋
+鞍
+鞏
+鞑
+鞘
+鞠
+鞣
+鞦
+鞭
+韆
+韋
+韌
+韓
+韜
+韦
+韧
+韩
+韬
+韭
+音
+韵
+韶
+韻
+響
+頁
+頂
+頃
+項
+順
+須
+頌
+預
+頑
+頒
+頓
+頗
+領
+頜
+頡
+頤
+頫
+頭
+頰
+頷
+頸
+頹
+頻
+頼
+顆
+題
+額
+顎
+顏
+顔
+願
+顛
+類
+顧
+顫
+顯
+顱
+顴
+页
+顶
+顷
+项
+顺
+须
+顼
+顽
+顾
+顿
+颁
+颂
+预
+颅
+领
+颇
+颈
+颉
+颊
+颌
+颍
+颐
+频
+颓
+颔
+颖
+颗
+题
+颚
+颛
+颜
+额
+颞
+颠
+颡
+颢
+颤
+颦
+颧
+風
+颯
+颱
+颳
+颶
+颼
+飄
+飆
+风
+飒
+飓
+飕
+飘
+飙
+飚
+飛
+飞
+食
+飢
+飨
+飩
+飪
+飯
+飲
+飼
+飽
+飾
+餃
+餅
+餉
+養
+餌
+餐
+餒
+餓
+餘
+餚
+餛
+餞
+餡
+館
+餮
+餵
+餾
+饅
+饈
+饋
+饌
+饍
+饑
+饒
+饕
+饗
+饞
+饥
+饨
+饪
+饬
+饭
+饮
+饯
+饰
+饱
+饲
+饴
+饵
+饶
+饷
+饺
+饼
+饽
+饿
+馀
+馁
+馄
+馅
+馆
+馈
+馋
+馍
+馏
+馒
+馔
+首
+馗
+香
+馥
+馨
+馬
+馭
+馮
+馳
+馴
+駁
+駄
+駅
+駆
+駐
+駒
+駕
+駛
+駝
+駭
+駱
+駿
+騁
+騎
+騏
+験
+騙
+騨
+騰
+騷
+驀
+驅
+驊
+驍
+驒
+驕
+驗
+驚
+驛
+驟
+驢
+驥
+马
+驭
+驮
+驯
+驰
+驱
+驳
+驴
+驶
+驷
+驸
+驹
+驻
+驼
+驾
+驿
+骁
+骂
+骄
+骅
+骆
+骇
+骈
+骊
+骋
+验
+骏
+骐
+骑
+骗
+骚
+骛
+骜
+骞
+骠
+骡
+骤
+骥
+骧
+骨
+骯
+骰
+骶
+骷
+骸
+骼
+髂
+髅
+髋
+髏
+髒
+髓
+體
+髖
+高
+髦
+髪
+髮
+髯
+髻
+鬃
+鬆
+鬍
+鬓
+鬚
+鬟
+鬢
+鬣
+鬥
+鬧
+鬱
+鬼
+魁
+魂
+魄
+魅
+魇
+魍
+魏
+魔
+魘
+魚
+魯
+魷
+鮑
+鮨
+鮪
+鮭
+鮮
+鯉
+鯊
+鯖
+鯛
+鯨
+鯰
+鯽
+鰍
+鰓
+鰭
+鰲
+鰻
+鰾
+鱈
+鱉
+鱔
+鱗
+鱷
+鱸
+鱼
+鱿
+鲁
+鲈
+鲍
+鲑
+鲛
+鲜
+鲟
+鲢
+鲤
+鲨
+鲫
+鲱
+鲲
+鲶
+鲷
+鲸
+鳃
+鳄
+鳅
+鳌
+鳍
+鳕
+鳖
+鳗
+鳝
+鳞
+鳥
+鳩
+鳳
+鳴
+鳶
+鴉
+鴕
+鴛
+鴦
+鴨
+鴻
+鴿
+鵑
+鵜
+鵝
+鵡
+鵬
+鵰
+鵲
+鶘
+鶩
+鶯
+鶴
+鷗
+鷲
+鷹
+鷺
+鸚
+鸞
+鸟
+鸠
+鸡
+鸢
+鸣
+鸥
+鸦
+鸨
+鸪
+鸭
+鸯
+鸳
+鸵
+鸽
+鸾
+鸿
+鹂
+鹃
+鹄
+鹅
+鹈
+鹉
+鹊
+鹌
+鹏
+鹑
+鹕
+鹘
+鹜
+鹞
+鹤
+鹦
+鹧
+鹫
+鹭
+鹰
+鹳
+鹵
+鹹
+鹼
+鹽
+鹿
+麂
+麋
+麒
+麓
+麗
+麝
+麟
+麥
+麦
+麩
+麴
+麵
+麸
+麺
+麻
+麼
+麽
+麾
+黃
+黄
+黍
+黎
+黏
+黑
+黒
+黔
+默
+黛
+黜
+黝
+點
+黠
+黨
+黯
+黴
+鼋
+鼎
+鼐
+鼓
+鼠
+鼬
+鼹
+鼻
+鼾
+齁
+齊
+齋
+齐
+齒
+齡
+齢
+齣
+齦
+齿
+龄
+龅
+龈
+龊
+龋
+龌
+龍
+龐
+龔
+龕
+龙
+龚
+龛
+龜
+龟
+︰
+︱
+︶
+︿
+﹁
+﹂
+﹍
+﹏
+﹐
+﹑
+﹒
+﹔
+﹕
+﹖
+﹗
+﹙
+﹚
+﹝
+﹞
+﹡
+﹣
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+^
+_
+`
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+。
+「
+」
+、
+・
+ッ
+ー
+イ
+ク
+シ
+ス
+ト
+ノ
+フ
+ラ
+ル
+ン
+゙
+゚
+ ̄
+¥
+👍
+🔥
+😂
+😎
+...
+yam
+10
+2017
+12
+11
+2016
+20
+30
+15
+06
+lofter
+##s
+2015
+by
+16
+14
+18
+13
+24
+17
+2014
+21
+##0
+22
+19
+25
+23
+com
+100
+00
+05
+2013
+##a
+03
+09
+08
+28
+##2
+50
+01
+04
+##1
+27
+02
+2012
+##3
+26
+##e
+07
+##8
+##5
+##6
+##4
+##9
+##7
+29
+2011
+40
+##t
+2010
+##o
+##d
+##i
+2009
+##n
+app
+www
+the
+##m
+31
+##c
+##l
+##y
+##r
+##g
+2008
+60
+http
+200
+qq
+##p
+80
+##f
+google
+pixnet
+90
+cookies
+tripadvisor
+500
+##er
+##k
+35
+##h
+facebook
+2007
+2000
+70
+##b
+of
+##x
+##u
+45
+300
+iphone
+32
+1000
+2006
+48
+ip
+36
+in
+38
+3d
+##w
+##ing
+55
+ctrip
+##on
+##v
+33
+##の
+to
+34
+400
+id
+2005
+it
+37
+windows
+llc
+top
+99
+42
+39
+000
+led
+at
+##an
+41
+51
+52
+46
+49
+43
+53
+44
+##z
+android
+58
+and
+59
+2004
+56
+vr
+##か
+5000
+2003
+47
+blogthis
+twitter
+54
+##le
+150
+ok
+2018
+57
+75
+cn
+no
+ios
+##in
+##mm
+##00
+800
+on
+te
+3000
+65
+2001
+360
+95
+ig
+lv
+120
+##ng
+##を
+##us
+##に
+pc
+てす
+──
+600
+##te
+85
+2002
+88
+##ed
+html
+ncc
+wifi
+email
+64
+blog
+is
+##10
+##て
+mail
+online
+##al
+dvd
+##ic
+studio
+##は
+##℃
+##ia
+##と
+line
+vip
+72
+##q
+98
+##ce
+##en
+for
+##is
+##ra
+##es
+##j
+usb
+net
+cp
+1999
+asia
+4g
+##cm
+diy
+new
+3c
+##お
+ta
+66
+language
+vs
+apple
+tw
+86
+web
+##ne
+ipad
+62
+you
+##re
+101
+68
+##tion
+ps
+de
+bt
+pony
+atm
+##2017
+1998
+67
+##ch
+ceo
+##or
+go
+##na
+av
+pro
+cafe
+96
+pinterest
+97
+63
+pixstyleme3c
+##ta
+more
+said
+##2016
+1997
+mp3
+700
+##ll
+nba
+jun
+##20
+92
+tv
+1995
+pm
+61
+76
+nbsp
+250
+##ie
+linux
+##ma
+cd
+110
+hd
+##17
+78
+##ion
+77
+6000
+am
+##th
+##st
+94
+##se
+##et
+69
+180
+gdp
+my
+105
+81
+abc
+89
+flash
+79
+one
+93
+1990
+1996
+##ck
+gps
+##も
+##ly
+web885
+106
+2020
+91
+##ge
+4000
+1500
+xd
+boss
+isbn
+1994
+org
+##ry
+me
+love
+##11
+0fork
+73
+##12
+3g
+##ter
+##ar
+71
+82
+##la
+hotel
+130
+1970
+pk
+83
+87
+140
+ie
+##os
+##30
+##el
+74
+##50
+seo
+cpu
+##ml
+p2p
+84
+may
+##る
+sun
+tue
+internet
+cc
+posted
+youtube
+##at
+##ン
+##man
+ii
+##ル
+##15
+abs
+nt
+pdf
+yahoo
+ago
+1980
+##it
+news
+mac
+104
+##てす
+##me
+##り
+java
+1992
+spa
+##de
+##nt
+hk
+all
+plus
+la
+1993
+##mb
+##16
+##ve
+west
+##da
+160
+air
+##い
+##ps
+から
+##to
+1989
+logo
+htc
+php
+https
+fi
+momo
+##son
+sat
+##ke
+##80
+ebd
+suv
+wi
+day
+apk
+##88
+##um
+mv
+galaxy
+wiki
+or
+brake
+##ス
+1200
+する
+this
+1991
+mon
+##こ
+❤2017
+po
+##ない
+javascript
+life
+home
+june
+##ss
+system
+900
+##ー
+##0
+pp
+1988
+world
+fb
+4k
+br
+##as
+ic
+ai
+leonardo
+safari
+##60
+live
+free
+xx
+wed
+win7
+kiehl
+##co
+lg
+o2o
+##go
+us
+235
+1949
+mm
+しい
+vfm
+kanye
+##90
+##2015
+##id
+jr
+##ey
+123
+rss
+##sa
+##ro
+##am
+##no
+thu
+fri
+350
+##sh
+##ki
+103
+comments
+name
+##のて
+##pe
+##ine
+max
+1987
+8000
+uber
+##mi
+##ton
+wordpress
+office
+1986
+1985
+##ment
+107
+bd
+win10
+##ld
+##li
+gmail
+bb
+dior
+##rs
+##ri
+##rd
+##ます
+up
+cad
+##®
+dr
+して
+read
+##21
+をお
+##io
+##99
+url
+1984
+pvc
+paypal
+show
+policy
+##40
+##ty
+##18
+with
+##★
+##01
+txt
+102
+##ba
+dna
+from
+post
+mini
+ar
+taiwan
+john
+##ga
+privacy
+agoda
+##13
+##ny
+word
+##24
+##22
+##by
+##ur
+##hz
+1982
+##ang
+265
+cookie
+netscape
+108
+##ka
+##~
+##ad
+house
+share
+note
+ibm
+code
+hello
+nike
+sim
+survey
+##016
+1979
+1950
+wikia
+##32
+##017
+5g
+cbc
+##tor
+##kg
+1983
+##rt
+##14
+campaign
+store
+2500
+os
+##ct
+##ts
+##°
+170
+api
+##ns
+365
+excel
+##な
+##ao
+##ら
+##し
+~~
+##nd
+university
+163
+には
+518
+##70
+##ya
+##il
+##25
+pierre
+ipo
+0020
+897
+##23
+hotels
+##ian
+のお
+125
+years
+6606
+##ers
+##26
+high
+##day
+time
+##ay
+bug
+##line
+##く
+##す
+##be
+xp
+talk2yam
+yamservice
+10000
+coco
+##dy
+sony
+##ies
+1978
+microsoft
+david
+people
+##ha
+1960
+instagram
+intel
+その
+##ot
+iso
+1981
+##va
+115
+##mo
+##land
+xxx
+man
+co
+ltxsw
+##ation
+baby
+220
+##pa
+##ol
+1945
+7000
+tag
+450
+##ue
+msn
+##31
+oppo
+##ト
+##ca
+control
+##om
+st
+chrome
+##ure
+##ん
+be
+##き
+lol
+##19
+した
+##bo
+240
+lady
+##100
+##way
+##から
+4600
+##ko
+##do
+##un
+4s
+corporation
+168
+##ni
+herme
+##28
+cp
+978
+##up
+##06
+ui
+##ds
+ppt
+admin
+three
+します
+bbc
+re
+128
+##48
+ca
+##015
+##35
+hp
+##ee
+tpp
+##た
+##ive
+××
+root
+##cc
+##ました
+##ble
+##ity
+adobe
+park
+114
+et
+oled
+city
+##ex
+##ler
+##ap
+china
+##book
+20000
+view
+##ice
+global
+##km
+your
+hong
+##mg
+out
+##ms
+ng
+ebay
+##29
+menu
+ubuntu
+##cy
+rom
+##view
+open
+ktv
+do
+server
+##lo
+if
+english
+##ね
+##5
+##oo
+1600
+##02
+step1
+kong
+club
+135
+july
+inc
+1976
+mr
+hi
+##net
+touch
+##ls
+##ii
+michael
+lcd
+##05
+##33
+phone
+james
+step2
+1300
+ios9
+##box
+dc
+##2
+##ley
+samsung
+111
+280
+pokemon
+css
+##ent
+##les
+いいえ
+##1
+s8
+atom
+play
+bmw
+##said
+sa
+etf
+ctrl
+♥yoyo♥
+##55
+2025
+##2014
+##66
+adidas
+amazon
+1958
+##ber
+##ner
+visa
+##77
+##der
+1800
+connectivity
+##hi
+firefox
+109
+118
+hr
+so
+style
+mark
+pop
+ol
+skip
+1975
+as
+##27
+##ir
+##61
+190
+mba
+##う
+##ai
+le
+##ver
+1900
+cafe2017
+lte
+super
+113
+129
+##ron
+amd
+like
+##☆
+are
+##ster
+we
+##sk
+paul
+data
+international
+##ft
+longchamp
+ssd
+good
+##ート
+##ti
+reply
+##my
+↓↓↓
+apr
+star
+##ker
+source
+136
+js
+112
+get
+force
+photo
+##one
+126
+##2013
+##ow
+link
+bbs
+1972
+goods
+##lin
+python
+119
+##ip
+game
+##ics
+##ません
+blue
+##●
+520
+##45
+page
+itunes
+##03
+1955
+260
+1968
+gt
+gif
+618
+##ff
+##47
+group
+くたさい
+about
+bar
+ganji
+##nce
+music
+lee
+not
+1977
+1971
+1973
+##per
+an
+faq
+comment
+##って
+days
+##ock
+116
+##bs
+1974
+1969
+v1
+player
+1956
+xbox
+sql
+fm
+f1
+139
+##ah
+210
+##lv
+##mp
+##000
+melody
+1957
+##3
+550
+17life
+199
+1966
+xml
+market
+##au
+##71
+999
+##04
+what
+gl
+##95
+##age
+tips
+##68
+book
+##ting
+mysql
+can
+1959
+230
+##ung
+wonderland
+watch
+10℃
+##ction
+9000
+mar
+mobile
+1946
+1962
+article
+##db
+part
+▲top
+party
+って
+1967
+1964
+1948
+##07
+##ore
+##op
+この
+dj
+##78
+##38
+010
+main
+225
+1965
+##ong
+art
+320
+ad
+134
+020
+##73
+117
+pm2
+japan
+228
+##08
+ts
+1963
+##ica
+der
+sm
+##36
+2019
+##wa
+ct
+##7
+##や
+##64
+1937
+homemesh
+search
+##85
+##れは
+##tv
+##di
+macbook
+##9
+##くたさい
+service
+##♥
+type
+った
+750
+##ier
+##si
+##75
+##います
+##ok
+best
+##ット
+goris
+lock
+##った
+cf
+3m
+big
+##ut
+ftp
+carol
+##vi
+10
+1961
+happy
+sd
+##ac
+122
+anti
+pe
+cnn
+iii
+1920
+138
+##ラ
+1940
+esp
+jan
+tags
+##98
+##51
+august
+vol
+##86
+154
+##™
+##fs
+##れ
+##sion
+design
+ac
+##ム
+press
+jordan
+ppp
+that
+key
+check
+##6
+##tt
+##㎡
+1080p
+##lt
+power
+##42
+1952
+##bc
+vivi
+##ック
+he
+133
+121
+jpg
+##rry
+201
+175
+3500
+1947
+nb
+##ted
+##rn
+しています
+1954
+usd
+##t00
+master
+##ンク
+001
+model
+##58
+al
+##09
+1953
+##34
+ram
+goo
+ても
+##ui
+127
+1930
+red
+##ary
+rpg
+item
+##pm
+##41
+270
+##za
+project
+##2012
+hot
+td
+blogabstract
+##ger
+##62
+650
+##44
+gr2
+##します
+##m
+black
+electronic
+nfc
+year
+asus
+また
+html5
+cindy
+##hd
+m3
+132
+esc
+##od
+booking
+##53
+fed
+tvb
+##81
+##ina
+mit
+165
+##いる
+chan
+192
+distribution
+next
+になる
+peter
+bios
+steam
+cm
+1941
+にも
+pk10
+##ix
+##65
+##91
+dec
+nasa
+##ana
+icecat
+00z
+b1
+will
+##46
+li
+se
+##ji
+##み
+##ard
+oct
+##ain
+jp
+##ze
+##bi
+cio
+##56
+smart
+h5
+##39
+##port
+curve
+vpn
+##nm
+##dia
+utc
+##あり
+12345678910
+##52
+rmvb
+chanel
+a4
+miss
+##and
+##im
+media
+who
+##63
+she
+girl
+5s
+124
+vera
+##して
+class
+vivo
+king
+##フ
+##ei
+national
+ab
+1951
+5cm
+888
+145
+ipod
+ap
+1100
+5mm
+211
+ms
+2756
+##69
+mp4
+msci
+##po
+##89
+131
+mg
+index
+380
+##bit
+##out
+##zz
+##97
+##67
+158
+apec
+##8
+photoshop
+opec
+¥799
+ては
+##96
+##tes
+##ast
+2g
+○○
+##ール
+¥2899
+##ling
+##よ
+##ory
+1938
+##ical
+kitty
+content
+##43
+step3
+##cn
+win8
+155
+vc
+1400
+iphone7
+robert
+##した
+tcl
+137
+beauty
+##87
+en
+dollars
+##ys
+##oc
+step
+pay
+yy
+a1
+##2011
+##lly
+##ks
+##♪
+1939
+188
+download
+1944
+sep
+exe
+ph
+います
+school
+gb
+center
+pr
+street
+##board
+uv
+##37
+##lan
+winrar
+##que
+##ua
+##com
+1942
+1936
+480
+gpu
+##4
+ettoday
+fu
+tom
+##54
+##ren
+##via
+149
+##72
+b2b
+144
+##79
+##tch
+rose
+arm
+mb
+##49
+##ial
+##nn
+nvidia
+step4
+mvp
+00㎡
+york
+156
+##イ
+how
+cpi
+591
+2765
+gov
+kg
+joe
+##xx
+mandy
+pa
+##ser
+copyright
+fashion
+1935
+don
+##け
+ecu
+##ist
+##art
+erp
+wap
+have
+##lm
+talk
+##ek
+##ning
+##if
+ch
+##ite
+video
+1943
+cs
+san
+iot
+look
+##84
+##2010
+##ku
+october
+##ux
+trump
+##hs
+##ide
+box
+141
+first
+##ins
+april
+##ight
+##83
+185
+angel
+protected
+aa
+151
+162
+x1
+m2
+##fe
+##×
+##ho
+size
+143
+min
+ofo
+fun
+gomaji
+ex
+hdmi
+food
+dns
+march
+chris
+kevin
+##のか
+##lla
+##pp
+##ec
+ag
+ems
+6s
+720p
+##rm
+##ham
+off
+##92
+asp
+team
+fandom
+ed
+299
+▌♥
+##ell
+info
+されています
+##82
+sina
+4066
+161
+##able
+##ctor
+330
+399
+315
+dll
+rights
+ltd
+idc
+jul
+3kg
+1927
+142
+ma
+surface
+##76
+##ク
+~~~
+304
+mall
+eps
+146
+green
+##59
+map
+space
+donald
+v2
+sodu
+##light
+1931
+148
+1700
+まて
+310
+reserved
+htm
+##han
+##57
+2d
+178
+mod
+##ise
+##tions
+152
+ti
+##shi
+doc
+1933
+icp
+055
+wang
+##ram
+shopping
+aug
+##pi
+##well
+now
+wam
+b2
+からお
+##hu
+236
+1928
+##gb
+266
+f2
+##93
+153
+mix
+##ef
+##uan
+bwl
+##plus
+##res
+core
+##ess
+tea
+5℃
+hktvmall
+nhk
+##ate
+list
+##ese
+301
+feb
+4m
+inn
+ての
+nov
+159
+12345
+daniel
+##ci
+pass
+##bet
+##nk
+coffee
+202
+ssl
+airbnb
+##ute
+fbi
+woshipm
+skype
+ea
+cg
+sp
+##fc
+##www
+yes
+edge
+alt
+007
+##94
+fpga
+##ght
+##gs
+iso9001
+さい
+##ile
+##wood
+##uo
+image
+lin
+icon
+american
+##em
+1932
+set
+says
+##king
+##tive
+blogger
+##74
+なと
+256
+147
+##ox
+##zy
+##red
+##ium
+##lf
+nokia
+claire
+##リ
+##ding
+november
+lohas
+##500
+##tic
+##マ
+##cs
+##ある
+##che
+##ire
+##gy
+##ult
+db
+january
+win
+##カ
+166
+road
+ptt
+##ま
+##つ
+198
+##fa
+##mer
+anna
+pchome
+はい
+udn
+ef
+420
+##time
+##tte
+2030
+##ア
+g20
+white
+かかります
+1929
+308
+garden
+eleven
+di
+##おります
+chen
+309b
+777
+172
+young
+cosplay
+ちてない
+4500
+bat
+##123
+##tra
+##ては
+kindle
+npc
+steve
+etc
+##ern
+##|
+call
+xperia
+ces
+travel
+sk
+s7
+##ous
+1934
+##int
+みいたたけます
+183
+edu
+file
+cho
+qr
+##car
+##our
+186
+##ant
+##d
+eric
+1914
+rends
+##jo
+##する
+mastercard
+##2000
+kb
+##min
+290
+##ino
+vista
+##ris
+##ud
+jack
+2400
+##set
+169
+pos
+1912
+##her
+##ou
+taipei
+しく
+205
+beta
+##ませんか
+232
+##fi
+express
+255
+body
+##ill
+aphojoy
+user
+december
+meiki
+##ick
+tweet
+richard
+##av
+##ᆫ
+iphone6
+##dd
+ちてすか
+views
+##mark
+321
+pd
+##00
+times
+##▲
+level
+##ash
+10g
+point
+5l
+##ome
+208
+koreanmall
+##ak
+george
+q2
+206
+wma
+tcp
+##200
+スタッフ
+full
+mlb
+##lle
+##watch
+tm
+run
+179
+911
+smith
+business
+##und
+1919
+color
+##tal
+222
+171
+##less
+moon
+4399
+##rl
+update
+pcb
+shop
+499
+157
+little
+なし
+end
+##mhz
+van
+dsp
+easy
+660
+##house
+##key
+history
+##o
+oh
+##001
+##hy
+##web
+oem
+let
+was
+##2009
+##gg
+review
+##wan
+182
+##°c
+203
+uc
+title
+##val
+united
+233
+2021
+##ons
+doi
+trivago
+overdope
+sbs
+##ance
+##ち
+grand
+special
+573032185
+imf
+216
+wx17house
+##so
+##ーム
+audi
+##he
+london
+william
+##rp
+##ake
+science
+beach
+cfa
+amp
+ps4
+880
+##800
+##link
+##hp
+crm
+ferragamo
+bell
+make
+##eng
+195
+under
+zh
+photos
+2300
+##style
+##ント
+via
+176
+da
+##gi
+company
+i7
+##ray
+thomas
+370
+ufo
+i5
+##max
+plc
+ben
+back
+research
+8g
+173
+mike
+##pc
+##ッフ
+september
+189
+##ace
+vps
+february
+167
+pantos
+wp
+lisa
+1921
+★★
+jquery
+night
+long
+offer
+##berg
+##news
+1911
+##いて
+ray
+fks
+wto
+せます
+over
+164
+340
+##all
+##rus
+1924
+##888
+##works
+blogtitle
+loftpermalink
+##→
+187
+martin
+test
+ling
+km
+##め
+15000
+fda
+v3
+##ja
+##ロ
+wedding
+かある
+outlet
+family
+##ea
+をこ
+##top
+story
+##ness
+salvatore
+##lu
+204
+swift
+215
+room
+している
+oracle
+##ul
+1925
+sam
+b2c
+week
+pi
+rock
+##のは
+##a
+##けと
+##ean
+##300
+##gle
+cctv
+after
+chinese
+##back
+powered
+x2
+##tan
+1918
+##nes
+##イン
+canon
+only
+181
+##zi
+##las
+say
+##oe
+184
+##sd
+221
+##bot
+##world
+##zo
+sky
+made
+top100
+just
+1926
+pmi
+802
+234
+gap
+##vr
+177
+les
+174
+▲topoct
+ball
+vogue
+vi
+ing
+ofweek
+cos
+##list
+##ort
+▲topmay
+##なら
+##lon
+として
+last
+##tc
+##of
+##bus
+##gen
+real
+eva
+##コ
+a3
+nas
+##lie
+##ria
+##coin
+##bt
+▲topapr
+his
+212
+cat
+nata
+vive
+health
+⋯⋯
+drive
+sir
+▲topmar
+du
+cup
+##カー
+##ook
+##よう
+##sy
+alex
+msg
+tour
+しました
+3ce
+##word
+193
+ebooks
+r8
+block
+318
+##より
+2200
+nice
+pvp
+207
+months
+1905
+rewards
+##ther
+1917
+0800
+##xi
+##チ
+##sc
+micro
+850
+gg
+blogfp
+op
+1922
+daily
+m1
+264
+true
+##bb
+ml
+##tar
+##のお
+##ky
+anthony
+196
+253
+##yo
+state
+218
+##ara
+##aa
+##rc
+##tz
+##ston
+より
+gear
+##eo
+##ade
+ge
+see
+1923
+##win
+##ura
+ss
+heart
+##den
+##ita
+down
+##sm
+el
+png
+2100
+610
+rakuten
+whatsapp
+bay
+dream
+add
+##use
+680
+311
+pad
+gucci
+mpv
+##ode
+##fo
+island
+▲topjun
+##▼
+223
+jason
+214
+chicago
+##❤
+しの
+##hone
+io
+##れる
+##ことか
+sogo
+be2
+##ology
+990
+cloud
+vcd
+##con
+2~3
+##ford
+##joy
+##kb
+##こさいます
+##rade
+but
+##ach
+docker
+##ful
+rfid
+ul
+##ase
+hit
+ford
+##star
+580
+##○
+11
+a2
+sdk
+reading
+edited
+##are
+cmos
+##mc
+238
+siri
+light
+##ella
+##ため
+bloomberg
+##read
+pizza
+##ison
+jimmy
+##vm
+college
+node
+journal
+ba
+18k
+##play
+245
+##cer
+20
+magic
+##yu
+191
+jump
+288
+tt
+##ings
+asr
+##lia
+3200
+step5
+network
+##cd
+mc
+いします
+1234
+pixstyleme
+273
+##600
+2800
+money
+★★★★★
+1280
+12
+430
+bl
+みの
+act
+##tus
+tokyo
+##rial
+##life
+emba
+##ae
+saas
+tcs
+##rk
+##wang
+summer
+##sp
+ko
+##ving
+390
+premium
+##その
+netflix
+##ヒ
+uk
+mt
+##lton
+right
+frank
+two
+209
+える
+##ple
+##cal
+021
+##んな
+##sen
+##ville
+hold
+nexus
+dd
+##ius
+てお
+##mah
+##なく
+tila
+zero
+820
+ce
+##tin
+resort
+##ws
+charles
+old
+p10
+5d
+report
+##360
+##ru
+##には
+bus
+vans
+lt
+##est
+pv
+##レ
+links
+rebecca
+##ツ
+##dm
+azure
+##365
+きな
+limited
+bit
+4gb
+##mon
+1910
+moto
+##eam
+213
+1913
+var
+eos
+なとの
+226
+blogspot
+された
+699
+e3
+dos
+dm
+fc
+##ments
+##ik
+##kw
+boy
+##bin
+##ata
+960
+er
+##せ
+219
+##vin
+##tu
+##ula
+194
+##∥
+station
+##ろ
+##ature
+835
+files
+zara
+hdr
+top10
+nature
+950
+magazine
+s6
+marriott
+##シ
+avira
+case
+##っと
+tab
+##ran
+tony
+##home
+oculus
+im
+##ral
+jean
+saint
+cry
+307
+rosie
+##force
+##ini
+ice
+##bert
+のある
+##nder
+##mber
+pet
+2600
+##◆
+plurk
+▲topdec
+##sis
+00kg
+▲topnov
+720
+##ence
+tim
+##ω
+##nc
+##ても
+##name
+log
+ips
+great
+ikea
+malaysia
+unix
+##イト
+3600
+##ncy
+##nie
+12000
+akb48
+##ye
+##oid
+404
+##chi
+##いた
+oa
+xuehai
+##1000
+##orm
+##rf
+275
+さん
+##ware
+##リー
+980
+ho
+##pro
+text
+##era
+560
+bob
+227
+##ub
+##2008
+8891
+scp
+avi
+##zen
+2022
+mi
+wu
+museum
+qvod
+apache
+lake
+jcb
+▲topaug
+★★★
+ni
+##hr
+hill
+302
+ne
+weibo
+490
+ruby
+##ーシ
+##ヶ
+##row
+4d
+▲topjul
+iv
+##ish
+github
+306
+mate
+312
+##スト
+##lot
+##ane
+andrew
+のハイト
+##tina
+t1
+rf
+ed2k
+##vel
+##900
+way
+final
+りの
+ns
+5a
+705
+197
+##メ
+sweet
+bytes
+##ene
+▲topjan
+231
+##cker
+##2007
+##px
+100g
+topapp
+229
+helpapp
+rs
+low
+14k
+g4g
+care
+630
+ldquo
+あり
+##fork
+leave
+rm
+edition
+##gan
+##zon
+##qq
+▲topsep
+##google
+##ism
+gold
+224
+explorer
+##zer
+toyota
+category
+select
+visual
+##labels
+restaurant
+##md
+posts
+s1
+##ico
+もっと
+angelababy
+123456
+217
+sports
+s3
+mbc
+1915
+してくたさい
+shell
+x86
+candy
+##new
+kbs
+face
+xl
+470
+##here
+4a
+swissinfo
+v8
+▲topfeb
+dram
+##ual
+##vice
+3a
+##wer
+sport
+q1
+ios10
+public
+int
+card
+##c
+ep
+au
+rt
+##れた
+1080
+bill
+##mll
+kim
+30
+460
+wan
+##uk
+##ミ
+x3
+298
+0t
+scott
+##ming
+239
+e5
+##3d
+h7n9
+worldcat
+brown
+##あります
+##vo
+##led
+##580
+##ax
+249
+410
+##ert
+paris
+##~6
+polo
+925
+##lr
+599
+##ナ
+capital
+##hing
+bank
+cv
+1g
+##chat
+##s
+##たい
+adc
+##ule
+2m
+##e
+digital
+hotmail
+268
+##pad
+870
+bbq
+quot
+##ring
+before
+wali
+##まて
+mcu
+2k
+2b
+という
+costco
+316
+north
+333
+switch
+##city
+##p
+philips
+##mann
+management
+panasonic
+##cl
+##vd
+##ping
+##rge
+alice
+##lk
+##ましょう
+css3
+##ney
+vision
+alpha
+##ular
+##400
+##tter
+lz
+にお
+##ありません
+mode
+gre
+1916
+pci
+##tm
+237
+1~2
+##yan
+##そ
+について
+##let
+##キ
+work
+war
+coach
+ah
+mary
+##ᅵ
+huang
+##pt
+a8
+pt
+follow
+##berry
+1895
+##ew
+a5
+ghost
+##ション
+##wn
+##og
+south
+##code
+girls
+##rid
+action
+villa
+git
+r11
+table
+games
+##cket
+error
+##anonymoussaid
+##ag
+here
+##ame
+##gc
+qa
+##■
+##lis
+gmp
+##gin
+vmalife
+##cher
+yu
+wedding
+##tis
+demo
+dragon
+530
+soho
+social
+bye
+##rant
+river
+orz
+acer
+325
+##↑
+##ース
+##ats
+261
+del
+##ven
+440
+ups
+##ように
+##ター
+305
+value
+macd
+yougou
+##dn
+661
+##ano
+ll
+##urt
+##rent
+continue
+script
+##wen
+##ect
+paper
+263
+319
+shift
+##chel
+##フト
+##cat
+258
+x5
+fox
+243
+##さん
+car
+aaa
+##blog
+loading
+##yn
+##tp
+kuso
+799
+si
+sns
+イカせるテンマ
+ヒンクテンマ3
+rmb
+vdc
+forest
+central
+prime
+help
+ultra
+##rmb
+##ような
+241
+square
+688
+##しい
+のないフロクに
+##field
+##reen
+##ors
+##ju
+c1
+start
+510
+##air
+##map
+cdn
+##wo
+cba
+stephen
+m8
+100km
+##get
+opera
+##base
+##ood
+vsa
+com™
+##aw
+##ail
+251
+なのて
+count
+t2
+##ᅡ
+##een
+2700
+hop
+##gp
+vsc
+tree
+##eg
+##ose
+816
+285
+##ories
+##shop
+alphago
+v4
+1909
+simon
+##ᆼ
+fluke62max
+zip
+スホンサー
+##sta
+louis
+cr
+bas
+##~10
+bc
+##yer
+hadoop
+##ube
+##wi
+1906
+0755
+hola
+##low
+place
+centre
+5v
+d3
+##fer
+252
+##750
+##media
+281
+540
+0l
+exchange
+262
+series
+##ハー
+##san
+eb
+##bank
+##k
+q3
+##nge
+##mail
+take
+##lp
+259
+1888
+client
+east
+cache
+event
+vincent
+##ールを
+きを
+##nse
+sui
+855
+adchoice
+##и
+##stry
+##なたの
+246
+##zone
+ga
+apps
+sea
+##ab
+248
+cisco
+##タ
+##rner
+kymco
+##care
+dha
+##pu
+##yi
+minkoff
+royal
+p1
+への
+annie
+269
+collection
+kpi
+playstation
+257
+になります
+866
+bh
+##bar
+queen
+505
+radio
+1904
+andy
+armani
+##xy
+manager
+iherb
+##ery
+##share
+spring
+raid
+johnson
+1908
+##ob
+volvo
+hall
+##ball
+v6
+our
+taylor
+##hk
+bi
+242
+##cp
+kate
+bo
+water
+technology
+##rie
+サイトは
+277
+##ona
+##sl
+hpv
+303
+gtx
+hip
+rdquo
+jayz
+stone
+##lex
+##rum
+namespace
+##やり
+620
+##ale
+##atic
+des
+##erson
+##ql
+##ves
+##type
+enter
+##この
+##てきます
+d2
+##168
+##mix
+##bian
+との
+a9
+jj
+ky
+##lc
+access
+movie
+##hc
+リストに
+tower
+##ration
+##mit
+ます
+##nch
+ua
+tel
+prefix
+##o2
+1907
+##point
+1901
+ott
+~10
+##http
+##ury
+baidu
+##ink
+member
+##logy
+bigbang
+nownews
+##js
+##shot
+##tb
+##こと
+247
+eba
+##tics
+##lus
+ける
+v5
+spark
+##ama
+there
+##ions
+god
+##lls
+##down
+hiv
+##ress
+burberry
+day2
+##kv
+◆◆
+jeff
+related
+film
+edit
+joseph
+283
+##ark
+cx
+32gb
+order
+g9
+30000
+##ans
+##tty
+s5
+##bee
+かあります
+thread
+xr
+buy
+sh
+005
+land
+spotify
+mx
+##ari
+276
+##verse
+×email
+sf
+why
+##ことて
+244
+7headlines
+nego
+sunny
+dom
+exo
+401
+666
+positioning
+fit
+rgb
+##tton
+278
+kiss
+alexa
+adam
+lp
+みリストを
+##g
+mp
+##ties
+##llow
+amy
+##du
+np
+002
+institute
+271
+##rth
+##lar
+2345
+590
+##des
+sidebar
+15
+imax
+site
+##cky
+##kit
+##ime
+##009
+season
+323
+##fun
+##ンター
+##ひ
+gogoro
+a7
+pu
+lily
+fire
+twd600
+##ッセーシを
+いて
+##vis
+30ml
+##cture
+##をお
+information
+##オ
+close
+friday
+##くれる
+yi
+nick
+てすか
+##tta
+##tel
+6500
+##lock
+cbd
+economy
+254
+かお
+267
+tinker
+double
+375
+8gb
+voice
+##app
+oops
+channel
+today
+985
+##right
+raw
+xyz
+##+
+jim
+edm
+##cent
+7500
+supreme
+814
+ds
+##its
+##asia
+dropbox
+##てすか
+##tti
+books
+272
+100ml
+##tle
+##ller
+##ken
+##more
+##boy
+sex
+309
+##dom
+t3
+##ider
+##なります
+##unch
+1903
+810
+feel
+5500
+##かった
+##put
+により
+s2
+mo
+##gh
+men
+ka
+amoled
+div
+##tr
+##n1
+port
+howard
+##tags
+ken
+dnf
+##nus
+adsense
+##а
+ide
+##へ
+buff
+thunder
+##town
+##ique
+has
+##body
+auto
+pin
+##erry
+tee
+てした
+295
+number
+##the
+##013
+object
+psp
+cool
+udnbkk
+16gb
+##mic
+miui
+##tro
+most
+r2
+##alk
+##nity
+1880
+±0
+##いました
+428
+s4
+law
+version
+##oa
+n1
+sgs
+docomo
+##tf
+##ack
+henry
+fc2
+##ded
+##sco
+##014
+##rite
+286
+0mm
+linkedin
+##ada
+##now
+wii
+##ndy
+ucbug
+##◎
+sputniknews
+legalminer
+##ika
+##xp
+2gb
+##bu
+q10
+oo
+b6
+come
+##rman
+cheese
+ming
+maker
+##gm
+nikon
+##fig
+ppi
+kelly
+##ります
+jchere
+てきます
+ted
+md
+003
+fgo
+tech
+##tto
+dan
+soc
+##gl
+##len
+hair
+earth
+640
+521
+img
+##pper
+##a1
+##てきる
+##ロク
+acca
+##ition
+##ference
+suite
+##ig
+outlook
+##mond
+##cation
+398
+##pr
+279
+101vip
+358
+##999
+282
+64gb
+3800
+345
+airport
+##over
+284
+##おり
+jones
+##ith
+lab
+##su
+##いるのて
+co2
+town
+piece
+##llo
+no1
+vmware
+24h
+##qi
+focus
+reader
+##admin
+##ora
+tb
+false
+##log
+1898
+know
+lan
+838
+##ces
+f4
+##ume
+motel
+stop
+##oper
+na
+flickr
+netcomponents
+##af
+##─
+pose
+williams
+local
+##ound
+##cg
+##site
+##iko
+いお
+274
+5m
+gsm
+con
+##ath
+1902
+friends
+##hip
+cell
+317
+##rey
+780
+cream
+##cks
+012
+##dp
+facebooktwitterpinterestgoogle
+sso
+324
+shtml
+song
+swiss
+##mw
+##キンク
+lumia
+xdd
+string
+tiffany
+522
+marc
+られた
+insee
+russell
+sc
+dell
+##ations
+ok
+camera
+289
+##vs
+##flow
+##late
+classic
+287
+##nter
+stay
+g1
+mtv
+512
+##ever
+##lab
+##nger
+qe
+sata
+ryan
+d1
+50ml
+cms
+##cing
+su
+292
+3300
+editor
+296
+##nap
+security
+sunday
+association
+##ens
+##700
+##bra
+acg
+##かり
+sofascore
+とは
+mkv
+##ign
+jonathan
+gary
+build
+labels
+##oto
+tesla
+moba
+qi
+gohappy
+general
+ajax
+1024
+##かる
+サイト
+society
+##test
+##urs
+wps
+fedora
+##ich
+mozilla
+328
+##480
+##dr
+usa
+urn
+##lina
+##r
+grace
+##die
+##try
+##ader
+1250
+##なり
+elle
+570
+##chen
+##ᆯ
+price
+##ten
+uhz
+##ough
+eq
+##hen
+states
+push
+session
+balance
+wow
+506
+##cus
+##py
+when
+##ward
+##ep
+34e
+wong
+library
+prada
+##サイト
+##cle
+running
+##ree
+313
+ck
+date
+q4
+##ctive
+##ool
+##>
+mk
+##ira
+##163
+388
+die
+secret
+rq
+dota
+buffet
+は1ヶ
+e6
+##ez
+pan
+368
+ha
+##card
+##cha
+2a
+##さ
+alan
+day3
+eye
+f3
+##end
+france
+keep
+adi
+rna
+tvbs
+##ala
+solo
+nova
+##え
+##tail
+##ょう
+support
+##ries
+##なる
+##ved
+base
+copy
+iis
+fps
+##ways
+hero
+hgih
+profile
+fish
+mu
+ssh
+entertainment
+chang
+##wd
+click
+cake
+##ond
+pre
+##tom
+kic
+pixel
+##ov
+##fl
+product
+6a
+##pd
+dear
+##gate
+es
+yumi
+audio
+##²
+##sky
+echo
+bin
+where
+##ture
+329
+##ape
+find
+sap
+isis
+##なと
+nand
+##101
+##load
+##ream
+band
+a6
+525
+never
+##post
+festival
+50cm
+##we
+555
+guide
+314
+zenfone
+##ike
+335
+gd
+forum
+jessica
+strong
+alexander
+##ould
+software
+allen
+##ious
+program
+360°
+else
+lohasthree
+##gar
+することかてきます
+please
+##れます
+rc
+##ggle
+##ric
+bim
+50000
+##own
+eclipse
+355
+brian
+3ds
+##side
+061
+361
+##other
+##ける
+##tech
+##ator
+485
+engine
+##ged
+##t
+plaza
+##fit
+cia
+ngo
+westbrook
+shi
+tbs
+50mm
+##みませんか
+sci
+291
+reuters
+##ily
+contextlink
+##hn
+af
+##cil
+bridge
+very
+##cel
+1890
+cambridge
+##ize
+15g
+##aid
+##data
+790
+frm
+##head
+award
+butler
+##sun
+meta
+##mar
+america
+ps3
+puma
+pmid
+##すか
+lc
+670
+kitchen
+##lic
+オーフン5
+きなしソフトサーヒス
+そして
+day1
+future
+★★★★
+##text
+##page
+##rris
+pm1
+##ket
+fans
+##っています
+1001
+christian
+bot
+kids
+trackback
+##hai
+c3
+display
+##hl
+n2
+1896
+idea
+さんも
+##sent
+airmail
+##ug
+##men
+pwm
+けます
+028
+##lution
+369
+852
+awards
+schemas
+354
+asics
+wikipedia
+font
+##tional
+##vy
+c2
+293
+##れている
+##dget
+##ein
+っている
+contact
+pepper
+スキル
+339
+##~5
+294
+##uel
+##ument
+730
+##hang
+みてす
+q5
+##sue
+rain
+##ndi
+wei
+swatch
+##cept
+わせ
+331
+popular
+##ste
+##tag
+p2
+501
+trc
+1899
+##west
+##live
+justin
+honda
+ping
+messenger
+##rap
+v9
+543
+##とは
+unity
+appqq
+はすへて
+025
+leo
+##tone
+##テ
+##ass
+uniqlo
+##010
+502
+her
+jane
+memory
+moneydj
+##tical
+human
+12306
+していると
+##m2
+coc
+miacare
+##mn
+tmt
+##core
+vim
+kk
+##may
+fan
+target
+use
+too
+338
+435
+2050
+867
+737
+fast
+##2c
+services
+##ope
+omega
+energy
+##わ
+pinkoi
+1a
+##なから
+##rain
+jackson
+##ement
+##シャンルの
+374
+366
+そんな
+p9
+rd
+##ᆨ
+1111
+##tier
+##vic
+zone
+##│
+385
+690
+dl
+isofix
+cpa
+m4
+322
+kimi
+めて
+davis
+##lay
+lulu
+##uck
+050
+weeks
+qs
+##hop
+920
+##n
+ae
+##ear
+~5
+eia
+405
+##fly
+korea
+jpeg
+boost
+##ship
+small
+##リア
+1860
+eur
+297
+425
+valley
+##iel
+simple
+##ude
+rn
+k2
+##ena
+されます
+non
+patrick
+しているから
+##ナー
+feed
+5757
+30g
+process
+well
+qqmei
+##thing
+they
+aws
+lu
+pink
+##ters
+##kin
+または
+board
+##vertisement
+wine
+##ien
+unicode
+##dge
+r1
+359
+##tant
+いを
+##twitter
+##3c
+cool1
+される
+##れて
+##l
+isp
+##012
+standard
+45㎡2
+402
+##150
+matt
+##fu
+326
+##iner
+googlemsn
+pixnetfacebookyahoo
+##ラン
+x7
+886
+##uce
+メーカー
+sao
+##ev
+##きました
+##file
+9678
+403
+xddd
+shirt
+6l
+##rio
+##hat
+3mm
+givenchy
+ya
+bang
+##lio
+monday
+crystal
+ロクイン
+##abc
+336
+head
+890
+ubuntuforumwikilinuxpastechat
+##vc
+##~20
+##rity
+cnc
+7866
+ipv6
+null
+1897
+##ost
+yang
+imsean
+tiger
+##fet
+##ンス
+352
+##=
+dji
+327
+ji
+maria
+##come
+##んて
+foundation
+3100
+##beth
+##なった
+1m
+601
+active
+##aft
+##don
+3p
+sr
+349
+emma
+##khz
+living
+415
+353
+1889
+341
+709
+457
+sas
+x6
+##face
+pptv
+x4
+##mate
+han
+sophie
+##jing
+337
+fifa
+##mand
+other
+sale
+inwedding
+##gn
+てきちゃいます
+##mmy
+##pmlast
+bad
+nana
+nbc
+してみてくたさいね
+なとはお
+##wu
+##かあります
+##あ
+note7
+single
+##340
+せからこ
+してくたさい♪この
+しにはとんとんワークケートを
+するとあなたにもっとマッチした
+ならワークケートへ
+もみつかっちゃうかも
+ワークケートの
+##bel
+window
+##dio
+##ht
+union
+age
+382
+14
+##ivity
+##y
+コメント
+domain
+neo
+##isa
+##lter
+5k
+f5
+steven
+##cts
+powerpoint
+tft
+self
+g2
+ft
+##テル
+zol
+##act
+mwc
+381
+343
+もう
+nbapop
+408
+てある
+eds
+ace
+##room
+previous
+author
+tomtom
+il
+##ets
+hu
+financial
+☆☆☆
+っています
+bp
+5t
+chi
+1gb
+##hg
+fairmont
+cross
+008
+gay
+h2
+function
+##けて
+356
+also
+1b
+625
+##ータ
+##raph
+1894
+3~5
+##ils
+i3
+334
+avenue
+##host
+による
+##bon
+##tsu
+message
+navigation
+50g
+fintech
+h6
+##ことを
+8cm
+##ject
+##vas
+##firm
+credit
+##wf
+xxxx
+form
+##nor
+##space
+huawei
+plan
+json
+sbl
+##dc
+machine
+921
+392
+wish
+##120
+##sol
+windows7
+edward
+##ために
+development
+washington
+##nsis
+lo
+818
+##sio
+##ym
+##bor
+planet
+##~8
+##wt
+ieee
+gpa
+##めて
+camp
+ann
+gm
+##tw
+##oka
+connect
+##rss
+##work
+##atus
+wall
+chicken
+soul
+2mm
+##times
+fa
+##ather
+##cord
+009
+##eep
+hitachi
+gui
+harry
+##pan
+e1
+disney
+##press
+##ーション
+wind
+386
+frigidaire
+##tl
+liu
+hsu
+332
+basic
+von
+ev
+いた
+てきる
+スホンサーサイト
+learning
+##ull
+expedia
+archives
+change
+##wei
+santa
+cut
+ins
+6gb
+turbo
+brand
+cf1
+508
+004
+return
+747
+##rip
+h1
+##nis
+##をこ
+128gb
+##にお
+3t
+application
+しており
+emc
+rx
+##oon
+384
+quick
+412
+15058
+wilson
+wing
+chapter
+##bug
+beyond
+##cms
+##dar
+##oh
+zoom
+e2
+trip
+sb
+##nba
+rcep
+342
+aspx
+ci
+080
+gc
+gnu
+める
+##count
+advanced
+dance
+dv
+##url
+##ging
+367
+8591
+am09
+shadow
+battle
+346
+##i
+##cia
+##という
+emily
+##のてす
+##tation
+host
+ff
+techorz
+sars
+##mini
+##mporary
+##ering
+nc
+4200
+798
+##next
+cma
+##mbps
+##gas
+##ift
+##dot
+##ィ
+455
+##~17
+amana
+##りの
+426
+##ros
+ir
+00㎡1
+##eet
+##ible
+##↓
+710
+ˋ▽ˊ
+##aka
+dcs
+iq
+##v
+l1
+##lor
+maggie
+##011
+##iu
+588
+##~1
+830
+##gt
+1tb
+articles
+create
+##burg
+##iki
+database
+fantasy
+##rex
+##cam
+dlc
+dean
+##you
+hard
+path
+gaming
+victoria
+maps
+cb
+##lee
+##itor
+overchicstoretvhome
+systems
+##xt
+416
+p3
+sarah
+760
+##nan
+407
+486
+x9
+install
+second
+626
+##ann
+##ph
+##rcle
+##nic
+860
+##nar
+ec
+##とう
+768
+metro
+chocolate
+##rian
+~4
+##table
+##しています
+skin
+##sn
+395
+mountain
+##0mm
+inparadise
+6m
+7x24
+ib
+4800
+##jia
+eeworld
+creative
+g5
+g3
+357
+parker
+ecfa
+village
+からの
+18000
+sylvia
+サーヒス
+hbl
+##ques
+##onsored
+##x2
+##きます
+##v4
+##tein
+ie6
+383
+##stack
+389
+ver
+##ads
+##baby
+sound
+bbe
+##110
+##lone
+##uid
+ads
+022
+gundam
+351
+thinkpad
+006
+scrum
+match
+##ave
+mems
+##470
+##oy
+##なりました
+##talk
+glass
+lamigo
+span
+##eme
+job
+##a5
+jay
+wade
+kde
+498
+##lace
+ocean
+tvg
+##covery
+##r3
+##ners
+##rea
+junior
+think
+##aine
+cover
+##ision
+##sia
+↓↓
+##bow
+msi
+413
+458
+406
+##love
+711
+801
+soft
+z2
+##pl
+456
+1840
+mobil
+mind
+##uy
+427
+nginx
+##oi
+めた
+##rr
+6221
+##mple
+##sson
+##ーシてす
+371
+##nts
+91tv
+comhd
+crv3000
+##uard
+1868
+397
+deep
+lost
+field
+gallery
+##bia
+rate
+spf
+redis
+traction
+930
+icloud
+011
+なら
+fe
+jose
+372
+##tory
+into
+sohu
+fx
+899
+379
+kicstart2
+##hia
+すく
+##~3
+##sit
+ra
+24
+##walk
+##xure
+500g
+##pact
+pacific
+xa
+natural
+carlo
+##250
+##walker
+1850
+##can
+cto
+gigi
+516
+##サー
+pen
+##hoo
+ob
+matlab
+##b
+##yy
+13913459
+##iti
+mango
+##bbs
+sense
+c5
+oxford
+##ニア
+walker
+jennifer
+##ola
+course
+##bre
+701
+##pus
+##rder
+lucky
+075
+##ぁ
+ivy
+なお
+##nia
+sotheby
+side
+##ugh
+joy
+##orage
+##ush
+##bat
+##dt
+364
+r9
+##2d
+##gio
+511
+country
+wear
+##lax
+##~7
+##moon
+393
+seven
+study
+411
+348
+lonzo
+8k
+##ェ
+evolution
+##イフ
+##kk
+gs
+kd
+##レス
+arduino
+344
+b12
+##lux
+arpg
+##rdon
+cook
+##x5
+dark
+five
+##als
+##ida
+とても
+sign
+362
+##ちの
+something
+20mm
+##nda
+387
+##posted
+fresh
+tf
+1870
+422
+cam
+##mine
+##skip
+##form
+##ssion
+education
+394
+##tee
+dyson
+stage
+##jie
+want
+##night
+epson
+pack
+あります
+##ppy
+テリヘル
+##█
+wd
+##eh
+##rence
+left
+##lvin
+golden
+mhz
+discovery
+##trix
+##n2
+loft
+##uch
+##dra
+##sse
+speed
+~1
+1mdb
+sorry
+welcome
+##urn
+wave
+gaga
+##lmer
+teddy
+##160
+トラックハック
+せよ
+611
+##f2016
+378
+rp
+##sha
+rar
+##あなたに
+##きた
+840
+holiday
+##ュー
+373
+074
+##vg
+##nos
+##rail
+gartner
+gi
+6p
+##dium
+kit
+488
+b3
+eco
+##ろう
+20g
+sean
+##stone
+autocad
+nu
+##np
+f16
+write
+029
+m5
+##ias
+images
+atp
+##dk
+fsm
+504
+1350
+ve
+52kb
+##xxx
+##のに
+##cake
+414
+unit
+lim
+ru
+1v
+##ification
+published
+angela
+16g
+analytics
+ak
+##q
+##nel
+gmt
+##icon
+again
+##₂
+##bby
+ios11
+445
+かこさいます
+waze
+いてす
+##ハ
+9985
+##ust
+##ティー
+framework
+##007
+iptv
+delete
+52sykb
+cl
+wwdc
+027
+30cm
+##fw
+##ての
+1389
+##xon
+brandt
+##ses
+##dragon
+tc
+vetements
+anne
+monte
+modern
+official
+##へて
+##ere
+##nne
+##oud
+もちろん
+50
+etnews
+##a2
+##graphy
+421
+863
+##ちゃん
+444
+##rtex
+##てお
+l2
+##gma
+mount
+ccd
+たと
+archive
+morning
+tan
+ddos
+e7
+##ホ
+day4
+##ウ
+gis
+453
+its
+495
+factory
+bruce
+pg
+##ito
+ってくたさい
+guest
+cdma
+##lling
+536
+n3
+しかし
+3~4
+mega
+eyes
+ro
+13
+women
+dac
+church
+##jun
+singapore
+##facebook
+6991
+starbucks
+##tos
+##stin
+##shine
+zen
+##mu
+tina
+20℃
+1893
+##たけて
+503
+465
+request
+##gence
+qt
+##っ
+1886
+347
+363
+q7
+##zzi
+diary
+##tore
+409
+##ead
+468
+cst
+##osa
+canada
+agent
+va
+##jiang
+##ちは
+##ーク
+##lam
+sg
+##nix
+##sday
+##よって
+g6
+##master
+bing
+##zl
+charlie
+16
+8mm
+nb40
+##ーン
+thai
+##ルフ
+ln284ct
+##itz
+##2f
+bonnie
+##food
+##lent
+originals
+##stro
+##lts
+418
+∟∣
+##bscribe
+children
+ntd
+yesstyle
+##かも
+hmv
+##tment
+d5
+2cm
+arts
+sms
+##pn
+##я
+##いい
+topios9
+539
+lifestyle
+virtual
+##ague
+xz
+##deo
+muji
+024
+unt
+##nnis
+##ᅩ
+faq1
+1884
+396
+##ette
+fly
+64㎡
+はしめまして
+441
+curry
+##pop
+のこ
+release
+##←
+##◆◆
+##cast
+073
+ありな
+500ml
+##ews
+5c
+##stle
+ios7
+##ima
+787
+dog
+lenovo
+##r4
+roger
+013
+cbs
+vornado
+100m
+417
+##desk
+##クok
+##ald
+1867
+9595
+2900
+##van
+oil
+##x
+some
+break
+common
+##jy
+##lines
+g7
+twice
+419
+ella
+nano
+belle
+にこ
+##mes
+##self
+##note
+jb
+##ことかてきます
+benz
+##との
+##ova
+451
+save
+##wing
+##ますのて
+kai
+りは
+##hua
+##rect
+rainer
+##unge
+448
+##0m
+adsl
+##かな
+guestname
+##uma
+##kins
+##zu
+tokichoi
+##price
+county
+##med
+##mus
+rmk
+391
+address
+vm
+えて
+openload
+##group
+##hin
+##iginal
+amg
+urban
+##oz
+jobs
+emi
+##public
+beautiful
+##sch
+album
+##dden
+##bell
+jerry
+works
+hostel
+miller
+##drive
+##rmin
+##10
+376
+boot
+828
+##370
+##fx
+##cm~
+1885
+##nome
+##ctionary
+##oman
+##lish
+##cr
+##hm
+433
+##how
+432
+francis
+xi
+c919
+b5
+evernote
+##uc
+vga
+##3000
+coupe
+##urg
+##cca
+##uality
+019
+6g
+れる
+multi
+##また
+##ett
+em
+hey
+##ani
+##tax
+##rma
+inside
+than
+740
+leonnhurt
+##jin
+ict
+れた
+bird
+notes
+200mm
+くの
+##dical
+##lli
+result
+442
+iu
+ee
+438
+smap
+gopro
+##last
+yin
+pure
+998
+32g
+けた
+5kg
+##dan
+##rame
+mama
+##oot
+bean
+marketing
+##hur
+2l
+bella
+sync
+xuite
+##ground
+515
+discuz
+##getrelax
+##ince
+##bay
+##5s
+cj
+##イス
+gmat
+apt
+##pass
+jing
+##rix
+c4
+rich
+##とても
+niusnews
+##ello
+bag
+770
+##eting
+##mobile
+18
+culture
+015
+##のてすか
+377
+1020
+area
+##ience
+616
+details
+gp
+universal
+silver
+dit
+はお
+private
+ddd
+u11
+kanshu
+##ified
+fung
+##nny
+dx
+##520
+tai
+475
+023
+##fr
+##lean
+3s
+##pin
+429
+##rin
+25000
+ly
+rick
+##bility
+usb3
+banner
+##baru
+##gion
+metal
+dt
+vdf
+1871
+karl
+qualcomm
+bear
+1010
+oldid
+ian
+jo
+##tors
+population
+##ernel
+1882
+mmorpg
+##mv
+##bike
+603
+##©
+ww
+friend
+##ager
+exhibition
+##del
+##pods
+fpx
+structure
+##free
+##tings
+kl
+##rley
+##copyright
+##mma
+california
+3400
+orange
+yoga
+4l
+canmake
+honey
+##anda
+##コメント
+595
+nikkie
+##ルハイト
+dhl
+publishing
+##mall
+##gnet
+20cm
+513
+##クセス
+##┅
+e88
+970
+##dog
+fishbase
+##!
+##"
+###
+##$
+##%
+##&
+##'
+##(
+##)
+##*
+##+
+##,
+##-
+##.
+##/
+##:
+##;
+##<
+##=
+##>
+##?
+##@
+##[
+##\
+##]
+##^
+##_
+##{
+##|
+##}
+##~
+##£
+##¤
+##¥
+##§
+##«
+##±
+##³
+##µ
+##·
+##¹
+##º
+##»
+##¼
+##ß
+##æ
+##÷
+##ø
+##đ
+##ŋ
+##ɔ
+##ə
+##ɡ
+##ʰ
+##ˇ
+##ˈ
+##ˊ
+##ˋ
+##ˍ
+##ː
+##˙
+##˚
+##ˢ
+##α
+##β
+##γ
+##δ
+##ε
+##η
+##θ
+##ι
+##κ
+##λ
+##μ
+##ν
+##ο
+##π
+##ρ
+##ς
+##σ
+##τ
+##υ
+##φ
+##χ
+##ψ
+##б
+##в
+##г
+##д
+##е
+##ж
+##з
+##к
+##л
+##м
+##н
+##о
+##п
+##р
+##с
+##т
+##у
+##ф
+##х
+##ц
+##ч
+##ш
+##ы
+##ь
+##і
+##ا
+##ب
+##ة
+##ت
+##د
+##ر
+##س
+##ع
+##ل
+##م
+##ن
+##ه
+##و
+##ي
+##۩
+##ก
+##ง
+##น
+##ม
+##ย
+##ร
+##อ
+##า
+##เ
+##๑
+##་
+##ღ
+##ᄀ
+##ᄁ
+##ᄂ
+##ᄃ
+##ᄅ
+##ᄆ
+##ᄇ
+##ᄈ
+##ᄉ
+##ᄋ
+##ᄌ
+##ᄎ
+##ᄏ
+##ᄐ
+##ᄑ
+##ᄒ
+##ᅢ
+##ᅣ
+##ᅥ
+##ᅦ
+##ᅧ
+##ᅨ
+##ᅪ
+##ᅬ
+##ᅭ
+##ᅮ
+##ᅯ
+##ᅲ
+##ᅳ
+##ᅴ
+##ᆷ
+##ᆸ
+##ᆺ
+##ᆻ
+##ᗜ
+##ᵃ
+##ᵉ
+##ᵍ
+##ᵏ
+##ᵐ
+##ᵒ
+##ᵘ
+##‖
+##„
+##†
+##•
+##‥
+##‧
+##

+##‰
+##′
+##″
+##‹
+##›
+##※
+##‿
+##⁄
+##ⁱ
+##⁺
+##ⁿ
+##₁
+##₃
+##₄
+##€
+##№
+##ⅰ
+##ⅱ
+##ⅲ
+##ⅳ
+##ⅴ
+##↔
+##↗
+##↘
+##⇒
+##∀
+##−
+##∕
+##∙
+##√
+##∞
+##∟
+##∠
+##∣
+##∩
+##∮
+##∶
+##∼
+##∽
+##≈
+##≒
+##≡
+##≤
+##≥
+##≦
+##≧
+##≪
+##≫
+##⊙
+##⋅
+##⋈
+##⋯
+##⌒
+##①
+##②
+##③
+##④
+##⑤
+##⑥
+##⑦
+##⑧
+##⑨
+##⑩
+##⑴
+##⑵
+##⑶
+##⑷
+##⑸
+##⒈
+##⒉
+##⒊
+##⒋
+##ⓒ
+##ⓔ
+##ⓘ
+##━
+##┃
+##┆
+##┊
+##┌
+##└
+##├
+##┣
+##═
+##║
+##╚
+##╞
+##╠
+##╭
+##╮
+##╯
+##╰
+##╱
+##╳
+##▂
+##▃
+##▅
+##▇
+##▉
+##▋
+##▌
+##▍
+##▎
+##□
+##▪
+##▫
+##▬
+##△
+##▶
+##►
+##▽
+##◇
+##◕
+##◠
+##◢
+##◤
+##☀
+##☕
+##☞
+##☺
+##☼
+##♀
+##♂
+##♠
+##♡
+##♣
+##♦
+##♫
+##♬
+##✈
+##✔
+##✕
+##✖
+##✦
+##✨
+##✪
+##✰
+##✿
+##❀
+##➜
+##➤
+##⦿
+##、
+##。
+##〃
+##々
+##〇
+##〈
+##〉
+##《
+##》
+##「
+##」
+##『
+##』
+##【
+##】
+##〓
+##〔
+##〕
+##〖
+##〗
+##〜
+##〝
+##〞
+##ぃ
+##ぇ
+##ぬ
+##ふ
+##ほ
+##む
+##ゃ
+##ゅ
+##ゆ
+##ょ
+##゜
+##ゝ
+##ァ
+##ゥ
+##エ
+##ォ
+##ケ
+##サ
+##セ
+##ソ
+##ッ
+##ニ
+##ヌ
+##ネ
+##ノ
+##ヘ
+##モ
+##ャ
+##ヤ
+##ュ
+##ユ
+##ョ
+##ヨ
+##ワ
+##ヲ
+##・
+##ヽ
+##ㄅ
+##ㄆ
+##ㄇ
+##ㄉ
+##ㄋ
+##ㄌ
+##ㄍ
+##ㄎ
+##ㄏ
+##ㄒ
+##ㄚ
+##ㄛ
+##ㄞ
+##ㄟ
+##ㄢ
+##ㄤ
+##ㄥ
+##ㄧ
+##ㄨ
+##ㆍ
+##㈦
+##㊣
+##㗎
+##一
+##丁
+##七
+##万
+##丈
+##三
+##上
+##下
+##不
+##与
+##丐
+##丑
+##专
+##且
+##丕
+##世
+##丘
+##丙
+##业
+##丛
+##东
+##丝
+##丞
+##丟
+##両
+##丢
+##两
+##严
+##並
+##丧
+##丨
+##个
+##丫
+##中
+##丰
+##串
+##临
+##丶
+##丸
+##丹
+##为
+##主
+##丼
+##丽
+##举
+##丿
+##乂
+##乃
+##久
+##么
+##义
+##之
+##乌
+##乍
+##乎
+##乏
+##乐
+##乒
+##乓
+##乔
+##乖
+##乗
+##乘
+##乙
+##乜
+##九
+##乞
+##也
+##习
+##乡
+##书
+##乩
+##买
+##乱
+##乳
+##乾
+##亀
+##亂
+##了
+##予
+##争
+##事
+##二
+##于
+##亏
+##云
+##互
+##五
+##井
+##亘
+##亙
+##亚
+##些
+##亜
+##亞
+##亟
+##亡
+##亢
+##交
+##亥
+##亦
+##产
+##亨
+##亩
+##享
+##京
+##亭
+##亮
+##亲
+##亳
+##亵
+##人
+##亿
+##什
+##仁
+##仃
+##仄
+##仅
+##仆
+##仇
+##今
+##介
+##仍
+##从
+##仏
+##仑
+##仓
+##仔
+##仕
+##他
+##仗
+##付
+##仙
+##仝
+##仞
+##仟
+##代
+##令
+##以
+##仨
+##仪
+##们
+##仮
+##仰
+##仲
+##件
+##价
+##任
+##份
+##仿
+##企
+##伉
+##伊
+##伍
+##伎
+##伏
+##伐
+##休
+##伕
+##众
+##优
+##伙
+##会
+##伝
+##伞
+##伟
+##传
+##伢
+##伤
+##伦
+##伪
+##伫
+##伯
+##估
+##伴
+##伶
+##伸
+##伺
+##似
+##伽
+##佃
+##但
+##佇
+##佈
+##位
+##低
+##住
+##佐
+##佑
+##体
+##佔
+##何
+##佗
+##佘
+##余
+##佚
+##佛
+##作
+##佝
+##佞
+##佟
+##你
+##佢
+##佣
+##佤
+##佥
+##佩
+##佬
+##佯
+##佰
+##佳
+##併
+##佶
+##佻
+##佼
+##使
+##侃
+##侄
+##來
+##侈
+##例
+##侍
+##侏
+##侑
+##侖
+##侗
+##供
+##依
+##侠
+##価
+##侣
+##侥
+##侦
+##侧
+##侨
+##侬
+##侮
+##侯
+##侵
+##侶
+##侷
+##便
+##係
+##促
+##俄
+##俊
+##俎
+##俏
+##俐
+##俑
+##俗
+##俘
+##俚
+##保
+##俞
+##俟
+##俠
+##信
+##俨
+##俩
+##俪
+##俬
+##俭
+##修
+##俯
+##俱
+##俳
+##俸
+##俺
+##俾
+##倆
+##倉
+##個
+##倌
+##倍
+##倏
+##們
+##倒
+##倔
+##倖
+##倘
+##候
+##倚
+##倜
+##借
+##倡
+##値
+##倦
+##倩
+##倪
+##倫
+##倬
+##倭
+##倶
+##债
+##值
+##倾
+##偃
+##假
+##偈
+##偉
+##偌
+##偎
+##偏
+##偕
+##做
+##停
+##健
+##側
+##偵
+##偶
+##偷
+##偻
+##偽
+##偿
+##傀
+##傅
+##傍
+##傑
+##傘
+##備
+##傚
+##傢
+##傣
+##傥
+##储
+##傩
+##催
+##傭
+##傲
+##傳
+##債
+##傷
+##傻
+##傾
+##僅
+##働
+##像
+##僑
+##僕
+##僖
+##僚
+##僥
+##僧
+##僭
+##僮
+##僱
+##僵
+##價
+##僻
+##儀
+##儂
+##億
+##儆
+##儉
+##儋
+##儒
+##儕
+##儘
+##償
+##儡
+##優
+##儲
+##儷
+##儼
+##儿
+##兀
+##允
+##元
+##兄
+##充
+##兆
+##兇
+##先
+##光
+##克
+##兌
+##免
+##児
+##兑
+##兒
+##兔
+##兖
+##党
+##兜
+##兢
+##入
+##內
+##全
+##兩
+##八
+##公
+##六
+##兮
+##兰
+##共
+##兲
+##关
+##兴
+##兵
+##其
+##具
+##典
+##兹
+##养
+##兼
+##兽
+##冀
+##内
+##円
+##冇
+##冈
+##冉
+##冊
+##册
+##再
+##冏
+##冒
+##冕
+##冗
+##写
+##军
+##农
+##冠
+##冢
+##冤
+##冥
+##冨
+##冪
+##冬
+##冯
+##冰
+##冲
+##决
+##况
+##冶
+##冷
+##冻
+##冼
+##冽
+##冾
+##净
+##凄
+##准
+##凇
+##凈
+##凉
+##凋
+##凌
+##凍
+##减
+##凑
+##凛
+##凜
+##凝
+##几
+##凡
+##凤
+##処
+##凪
+##凭
+##凯
+##凰
+##凱
+##凳
+##凶
+##凸
+##凹
+##出
+##击
+##函
+##凿
+##刀
+##刁
+##刃
+##分
+##切
+##刈
+##刊
+##刍
+##刎
+##刑
+##划
+##列
+##刘
+##则
+##刚
+##创
+##初
+##删
+##判
+##別
+##刨
+##利
+##刪
+##别
+##刮
+##到
+##制
+##刷
+##券
+##刹
+##刺
+##刻
+##刽
+##剁
+##剂
+##剃
+##則
+##剉
+##削
+##剋
+##剌
+##前
+##剎
+##剐
+##剑
+##剔
+##剖
+##剛
+##剜
+##剝
+##剣
+##剤
+##剥
+##剧
+##剩
+##剪
+##副
+##割
+##創
+##剷
+##剽
+##剿
+##劃
+##劇
+##劈
+##劉
+##劊
+##劍
+##劏
+##劑
+##力
+##劝
+##办
+##功
+##加
+##务
+##劣
+##动
+##助
+##努
+##劫
+##劭
+##励
+##劲
+##劳
+##労
+##劵
+##効
+##劾
+##势
+##勁
+##勃
+##勇
+##勉
+##勋
+##勐
+##勒
+##動
+##勖
+##勘
+##務
+##勛
+##勝
+##勞
+##募
+##勢
+##勤
+##勧
+##勳
+##勵
+##勸
+##勺
+##勻
+##勾
+##勿
+##匀
+##包
+##匆
+##匈
+##匍
+##匐
+##匕
+##化
+##北
+##匙
+##匝
+##匠
+##匡
+##匣
+##匪
+##匮
+##匯
+##匱
+##匹
+##区
+##医
+##匾
+##匿
+##區
+##十
+##千
+##卅
+##升
+##午
+##卉
+##半
+##卍
+##华
+##协
+##卑
+##卒
+##卓
+##協
+##单
+##卖
+##南
+##単
+##博
+##卜
+##卞
+##卟
+##占
+##卡
+##卢
+##卤
+##卦
+##卧
+##卫
+##卮
+##卯
+##印
+##危
+##即
+##却
+##卵
+##卷
+##卸
+##卻
+##卿
+##厂
+##厄
+##厅
+##历
+##厉
+##压
+##厌
+##厕
+##厘
+##厚
+##厝
+##原
+##厢
+##厥
+##厦
+##厨
+##厩
+##厭
+##厮
+##厲
+##厳
+##去
+##县
+##叁
+##参
+##參
+##又
+##叉
+##及
+##友
+##双
+##反
+##収
+##发
+##叔
+##取
+##受
+##变
+##叙
+##叛
+##叟
+##叠
+##叡
+##叢
+##口
+##古
+##句
+##另
+##叨
+##叩
+##只
+##叫
+##召
+##叭
+##叮
+##可
+##台
+##叱
+##史
+##右
+##叵
+##叶
+##号
+##司
+##叹
+##叻
+##叼
+##叽
+##吁
+##吃
+##各
+##吆
+##合
+##吉
+##吊
+##吋
+##同
+##名
+##后
+##吏
+##吐
+##向
+##吒
+##吓
+##吕
+##吖
+##吗
+##君
+##吝
+##吞
+##吟
+##吠
+##吡
+##否
+##吧
+##吨
+##吩
+##含
+##听
+##吭
+##吮
+##启
+##吱
+##吳
+##吴
+##吵
+##吶
+##吸
+##吹
+##吻
+##吼
+##吽
+##吾
+##呀
+##呂
+##呃
+##呆
+##呈
+##告
+##呋
+##呎
+##呐
+##呓
+##呕
+##呗
+##员
+##呛
+##呜
+##呢
+##呤
+##呦
+##周
+##呱
+##呲
+##味
+##呵
+##呷
+##呸
+##呻
+##呼
+##命
+##咀
+##咁
+##咂
+##咄
+##咆
+##咋
+##和
+##咎
+##咏
+##咐
+##咒
+##咔
+##咕
+##咖
+##咗
+##咘
+##咙
+##咚
+##咛
+##咣
+##咤
+##咦
+##咧
+##咨
+##咩
+##咪
+##咫
+##咬
+##咭
+##咯
+##咱
+##咲
+##咳
+##咸
+##咻
+##咽
+##咿
+##哀
+##品
+##哂
+##哄
+##哆
+##哇
+##哈
+##哉
+##哋
+##哌
+##响
+##哎
+##哏
+##哐
+##哑
+##哒
+##哔
+##哗
+##哟
+##員
+##哥
+##哦
+##哧
+##哨
+##哩
+##哪
+##哭
+##哮
+##哲
+##哺
+##哼
+##哽
+##唁
+##唄
+##唆
+##唇
+##唉
+##唏
+##唐
+##唑
+##唔
+##唠
+##唤
+##唧
+##唬
+##售
+##唯
+##唰
+##唱
+##唳
+##唷
+##唸
+##唾
+##啃
+##啄
+##商
+##啉
+##啊
+##問
+##啓
+##啕
+##啖
+##啜
+##啞
+##啟
+##啡
+##啤
+##啥
+##啦
+##啧
+##啪
+##啫
+##啬
+##啮
+##啰
+##啱
+##啲
+##啵
+##啶
+##啷
+##啸
+##啻
+##啼
+##啾
+##喀
+##喂
+##喃
+##善
+##喆
+##喇
+##喉
+##喊
+##喋
+##喎
+##喏
+##喔
+##喘
+##喙
+##喚
+##喜
+##喝
+##喟
+##喧
+##喪
+##喫
+##喬
+##單
+##喰
+##喱
+##喲
+##喳
+##喵
+##営
+##喷
+##喹
+##喺
+##喻
+##喽
+##嗅
+##嗆
+##嗇
+##嗎
+##嗑
+##嗒
+##嗓
+##嗔
+##嗖
+##嗚
+##嗜
+##嗝
+##嗟
+##嗡
+##嗣
+##嗤
+##嗦
+##嗨
+##嗪
+##嗬
+##嗯
+##嗰
+##嗲
+##嗳
+##嗶
+##嗷
+##嗽
+##嘀
+##嘅
+##嘆
+##嘈
+##嘉
+##嘌
+##嘍
+##嘎
+##嘔
+##嘖
+##嘗
+##嘘
+##嘚
+##嘛
+##嘜
+##嘞
+##嘟
+##嘢
+##嘣
+##嘤
+##嘧
+##嘩
+##嘭
+##嘮
+##嘯
+##嘰
+##嘱
+##嘲
+##嘴
+##嘶
+##嘸
+##嘹
+##嘻
+##嘿
+##噁
+##噌
+##噎
+##噓
+##噔
+##噗
+##噙
+##噜
+##噠
+##噢
+##噤
+##器
+##噩
+##噪
+##噬
+##噱
+##噴
+##噶
+##噸
+##噹
+##噻
+##噼
+##嚀
+##嚇
+##嚎
+##嚏
+##嚐
+##嚓
+##嚕
+##嚟
+##嚣
+##嚥
+##嚨
+##嚮
+##嚴
+##嚷
+##嚼
+##囂
+##囉
+##囊
+##囍
+##囑
+##囔
+##囗
+##囚
+##四
+##囝
+##回
+##囟
+##因
+##囡
+##团
+##団
+##囤
+##囧
+##囪
+##囫
+##园
+##困
+##囱
+##囲
+##図
+##围
+##囹
+##固
+##国
+##图
+##囿
+##圃
+##圄
+##圆
+##圈
+##國
+##圍
+##圏
+##園
+##圓
+##圖
+##團
+##圜
+##土
+##圣
+##圧
+##在
+##圩
+##圭
+##地
+##圳
+##场
+##圻
+##圾
+##址
+##坂
+##均
+##坊
+##坍
+##坎
+##坏
+##坐
+##坑
+##块
+##坚
+##坛
+##坝
+##坞
+##坟
+##坠
+##坡
+##坤
+##坦
+##坨
+##坪
+##坯
+##坳
+##坵
+##坷
+##垂
+##垃
+##垄
+##型
+##垒
+##垚
+##垛
+##垠
+##垢
+##垣
+##垦
+##垩
+##垫
+##垭
+##垮
+##垵
+##埂
+##埃
+##埋
+##城
+##埔
+##埕
+##埗
+##域
+##埠
+##埤
+##埵
+##執
+##埸
+##培
+##基
+##埼
+##堀
+##堂
+##堃
+##堅
+##堆
+##堇
+##堑
+##堕
+##堙
+##堡
+##堤
+##堪
+##堯
+##堰
+##報
+##場
+##堵
+##堺
+##堿
+##塊
+##塌
+##塑
+##塔
+##塗
+##塘
+##塚
+##塞
+##塢
+##塩
+##填
+##塬
+##塭
+##塵
+##塾
+##墀
+##境
+##墅
+##墉
+##墊
+##墒
+##墓
+##増
+##墘
+##墙
+##墜
+##增
+##墟
+##墨
+##墩
+##墮
+##墳
+##墻
+##墾
+##壁
+##壅
+##壆
+##壇
+##壊
+##壑
+##壓
+##壕
+##壘
+##壞
+##壟
+##壢
+##壤
+##壩
+##士
+##壬
+##壮
+##壯
+##声
+##売
+##壳
+##壶
+##壹
+##壺
+##壽
+##处
+##备
+##変
+##复
+##夏
+##夔
+##夕
+##外
+##夙
+##多
+##夜
+##够
+##夠
+##夢
+##夥
+##大
+##天
+##太
+##夫
+##夭
+##央
+##夯
+##失
+##头
+##夷
+##夸
+##夹
+##夺
+##夾
+##奂
+##奄
+##奇
+##奈
+##奉
+##奋
+##奎
+##奏
+##奐
+##契
+##奔
+##奕
+##奖
+##套
+##奘
+##奚
+##奠
+##奢
+##奥
+##奧
+##奪
+##奬
+##奮
+##女
+##奴
+##奶
+##奸
+##她
+##好
+##如
+##妃
+##妄
+##妆
+##妇
+##妈
+##妊
+##妍
+##妒
+##妓
+##妖
+##妘
+##妙
+##妝
+##妞
+##妣
+##妤
+##妥
+##妨
+##妩
+##妪
+##妮
+##妲
+##妳
+##妹
+##妻
+##妾
+##姆
+##姉
+##姊
+##始
+##姍
+##姐
+##姑
+##姒
+##姓
+##委
+##姗
+##姚
+##姜
+##姝
+##姣
+##姥
+##姦
+##姨
+##姪
+##姫
+##姬
+##姹
+##姻
+##姿
+##威
+##娃
+##娄
+##娅
+##娆
+##娇
+##娉
+##娑
+##娓
+##娘
+##娛
+##娜
+##娟
+##娠
+##娣
+##娥
+##娩
+##娱
+##娲
+##娴
+##娶
+##娼
+##婀
+##婁
+##婆
+##婉
+##婊
+##婕
+##婚
+##婢
+##婦
+##婧
+##婪
+##婭
+##婴
+##婵
+##婶
+##婷
+##婺
+##婿
+##媒
+##媚
+##媛
+##媞
+##媧
+##媲
+##媳
+##媽
+##媾
+##嫁
+##嫂
+##嫉
+##嫌
+##嫑
+##嫔
+##嫖
+##嫘
+##嫚
+##嫡
+##嫣
+##嫦
+##嫩
+##嫲
+##嫵
+##嫻
+##嬅
+##嬉
+##嬌
+##嬗
+##嬛
+##嬢
+##嬤
+##嬪
+##嬰
+##嬴
+##嬷
+##嬸
+##嬿
+##孀
+##孃
+##子
+##孑
+##孔
+##孕
+##孖
+##字
+##存
+##孙
+##孚
+##孛
+##孜
+##孝
+##孟
+##孢
+##季
+##孤
+##学
+##孩
+##孪
+##孫
+##孬
+##孰
+##孱
+##孳
+##孵
+##學
+##孺
+##孽
+##孿
+##宁
+##它
+##宅
+##宇
+##守
+##安
+##宋
+##完
+##宏
+##宓
+##宕
+##宗
+##官
+##宙
+##定
+##宛
+##宜
+##宝
+##实
+##実
+##宠
+##审
+##客
+##宣
+##室
+##宥
+##宦
+##宪
+##宫
+##宮
+##宰
+##害
+##宴
+##宵
+##家
+##宸
+##容
+##宽
+##宾
+##宿
+##寂
+##寄
+##寅
+##密
+##寇
+##富
+##寐
+##寒
+##寓
+##寛
+##寝
+##寞
+##察
+##寡
+##寢
+##寥
+##實
+##寧
+##寨
+##審
+##寫
+##寬
+##寮
+##寰
+##寵
+##寶
+##寸
+##对
+##寺
+##寻
+##导
+##対
+##寿
+##封
+##専
+##射
+##将
+##將
+##專
+##尉
+##尊
+##尋
+##對
+##導
+##小
+##少
+##尔
+##尕
+##尖
+##尘
+##尚
+##尝
+##尤
+##尧
+##尬
+##就
+##尴
+##尷
+##尸
+##尹
+##尺
+##尻
+##尼
+##尽
+##尾
+##尿
+##局
+##屁
+##层
+##屄
+##居
+##屆
+##屈
+##屉
+##届
+##屋
+##屌
+##屍
+##屎
+##屏
+##屐
+##屑
+##展
+##屜
+##属
+##屠
+##屡
+##屢
+##層
+##履
+##屬
+##屯
+##山
+##屹
+##屿
+##岀
+##岁
+##岂
+##岌
+##岐
+##岑
+##岔
+##岖
+##岗
+##岘
+##岙
+##岚
+##岛
+##岡
+##岩
+##岫
+##岬
+##岭
+##岱
+##岳
+##岷
+##岸
+##峇
+##峋
+##峒
+##峙
+##峡
+##峤
+##峥
+##峦
+##峨
+##峪
+##峭
+##峯
+##峰
+##峴
+##島
+##峻
+##峽
+##崁
+##崂
+##崆
+##崇
+##崎
+##崑
+##崔
+##崖
+##崗
+##崙
+##崛
+##崧
+##崩
+##崭
+##崴
+##崽
+##嵇
+##嵊
+##嵋
+##嵌
+##嵐
+##嵘
+##嵩
+##嵬
+##嵯
+##嶂
+##嶄
+##嶇
+##嶋
+##嶙
+##嶺
+##嶼
+##嶽
+##巅
+##巍
+##巒
+##巔
+##巖
+##川
+##州
+##巡
+##巢
+##工
+##左
+##巧
+##巨
+##巩
+##巫
+##差
+##己
+##已
+##巳
+##巴
+##巷
+##巻
+##巽
+##巾
+##巿
+##币
+##市
+##布
+##帅
+##帆
+##师
+##希
+##帐
+##帑
+##帕
+##帖
+##帘
+##帚
+##帛
+##帜
+##帝
+##帥
+##带
+##帧
+##師
+##席
+##帮
+##帯
+##帰
+##帳
+##帶
+##帷
+##常
+##帼
+##帽
+##幀
+##幂
+##幄
+##幅
+##幌
+##幔
+##幕
+##幟
+##幡
+##幢
+##幣
+##幫
+##干
+##平
+##年
+##并
+##幸
+##幹
+##幺
+##幻
+##幼
+##幽
+##幾
+##广
+##庁
+##広
+##庄
+##庆
+##庇
+##床
+##序
+##庐
+##库
+##应
+##底
+##庖
+##店
+##庙
+##庚
+##府
+##庞
+##废
+##庠
+##度
+##座
+##庫
+##庭
+##庵
+##庶
+##康
+##庸
+##庹
+##庾
+##廁
+##廂
+##廃
+##廈
+##廉
+##廊
+##廓
+##廖
+##廚
+##廝
+##廟
+##廠
+##廢
+##廣
+##廬
+##廳
+##延
+##廷
+##建
+##廿
+##开
+##弁
+##异
+##弃
+##弄
+##弈
+##弊
+##弋
+##式
+##弑
+##弒
+##弓
+##弔
+##引
+##弗
+##弘
+##弛
+##弟
+##张
+##弥
+##弦
+##弧
+##弩
+##弭
+##弯
+##弱
+##張
+##強
+##弹
+##强
+##弼
+##弾
+##彅
+##彆
+##彈
+##彌
+##彎
+##归
+##当
+##录
+##彗
+##彙
+##彝
+##形
+##彤
+##彥
+##彦
+##彧
+##彩
+##彪
+##彫
+##彬
+##彭
+##彰
+##影
+##彷
+##役
+##彻
+##彼
+##彿
+##往
+##征
+##径
+##待
+##徇
+##很
+##徉
+##徊
+##律
+##後
+##徐
+##徑
+##徒
+##従
+##徕
+##得
+##徘
+##徙
+##徜
+##從
+##徠
+##御
+##徨
+##復
+##循
+##徬
+##微
+##徳
+##徴
+##徵
+##德
+##徹
+##徼
+##徽
+##心
+##必
+##忆
+##忌
+##忍
+##忏
+##忐
+##忑
+##忒
+##忖
+##志
+##忘
+##忙
+##応
+##忠
+##忡
+##忤
+##忧
+##忪
+##快
+##忱
+##念
+##忻
+##忽
+##忿
+##怀
+##态
+##怂
+##怅
+##怆
+##怎
+##怏
+##怒
+##怔
+##怕
+##怖
+##怙
+##怜
+##思
+##怠
+##怡
+##急
+##怦
+##性
+##怨
+##怪
+##怯
+##怵
+##总
+##怼
+##恁
+##恃
+##恆
+##恋
+##恍
+##恐
+##恒
+##恕
+##恙
+##恚
+##恢
+##恣
+##恤
+##恥
+##恨
+##恩
+##恪
+##恫
+##恬
+##恭
+##息
+##恰
+##恳
+##恵
+##恶
+##恸
+##恺
+##恻
+##恼
+##恿
+##悄
+##悅
+##悉
+##悌
+##悍
+##悔
+##悖
+##悚
+##悟
+##悠
+##患
+##悦
+##您
+##悩
+##悪
+##悬
+##悯
+##悱
+##悲
+##悴
+##悵
+##悶
+##悸
+##悻
+##悼
+##悽
+##情
+##惆
+##惇
+##惊
+##惋
+##惑
+##惕
+##惘
+##惚
+##惜
+##惟
+##惠
+##惡
+##惦
+##惧
+##惨
+##惩
+##惫
+##惬
+##惭
+##惮
+##惯
+##惰
+##惱
+##想
+##惴
+##惶
+##惹
+##惺
+##愁
+##愆
+##愈
+##愉
+##愍
+##意
+##愕
+##愚
+##愛
+##愜
+##感
+##愣
+##愤
+##愧
+##愫
+##愷
+##愿
+##慄
+##慈
+##態
+##慌
+##慎
+##慑
+##慕
+##慘
+##慚
+##慟
+##慢
+##慣
+##慧
+##慨
+##慫
+##慮
+##慰
+##慳
+##慵
+##慶
+##慷
+##慾
+##憂
+##憊
+##憋
+##憎
+##憐
+##憑
+##憔
+##憚
+##憤
+##憧
+##憨
+##憩
+##憫
+##憬
+##憲
+##憶
+##憾
+##懂
+##懇
+##懈
+##應
+##懊
+##懋
+##懑
+##懒
+##懦
+##懲
+##懵
+##懶
+##懷
+##懸
+##懺
+##懼
+##懾
+##懿
+##戀
+##戈
+##戊
+##戌
+##戍
+##戎
+##戏
+##成
+##我
+##戒
+##戕
+##或
+##战
+##戚
+##戛
+##戟
+##戡
+##戦
+##截
+##戬
+##戮
+##戰
+##戲
+##戳
+##戴
+##戶
+##户
+##戸
+##戻
+##戾
+##房
+##所
+##扁
+##扇
+##扈
+##扉
+##手
+##才
+##扎
+##扑
+##扒
+##打
+##扔
+##払
+##托
+##扛
+##扣
+##扦
+##执
+##扩
+##扪
+##扫
+##扬
+##扭
+##扮
+##扯
+##扰
+##扱
+##扳
+##扶
+##批
+##扼
+##找
+##承
+##技
+##抄
+##抉
+##把
+##抑
+##抒
+##抓
+##投
+##抖
+##抗
+##折
+##抚
+##抛
+##抜
+##択
+##抟
+##抠
+##抡
+##抢
+##护
+##报
+##抨
+##披
+##抬
+##抱
+##抵
+##抹
+##押
+##抽
+##抿
+##拂
+##拄
+##担
+##拆
+##拇
+##拈
+##拉
+##拋
+##拌
+##拍
+##拎
+##拐
+##拒
+##拓
+##拔
+##拖
+##拗
+##拘
+##拙
+##拚
+##招
+##拜
+##拟
+##拡
+##拢
+##拣
+##拥
+##拦
+##拧
+##拨
+##择
+##括
+##拭
+##拮
+##拯
+##拱
+##拳
+##拴
+##拷
+##拼
+##拽
+##拾
+##拿
+##持
+##挂
+##指
+##挈
+##按
+##挎
+##挑
+##挖
+##挙
+##挚
+##挛
+##挝
+##挞
+##挟
+##挠
+##挡
+##挣
+##挤
+##挥
+##挨
+##挪
+##挫
+##振
+##挲
+##挹
+##挺
+##挽
+##挾
+##捂
+##捅
+##捆
+##捉
+##捋
+##捌
+##捍
+##捎
+##捏
+##捐
+##捕
+##捞
+##损
+##捡
+##换
+##捣
+##捧
+##捨
+##捩
+##据
+##捱
+##捲
+##捶
+##捷
+##捺
+##捻
+##掀
+##掂
+##掃
+##掇
+##授
+##掉
+##掌
+##掏
+##掐
+##排
+##掖
+##掘
+##掙
+##掛
+##掠
+##採
+##探
+##掣
+##接
+##控
+##推
+##掩
+##措
+##掬
+##掰
+##掲
+##掳
+##掴
+##掷
+##掸
+##掺
+##揀
+##揃
+##揄
+##揆
+##揉
+##揍
+##描
+##提
+##插
+##揖
+##揚
+##換
+##握
+##揣
+##揩
+##揪
+##揭
+##揮
+##援
+##揶
+##揸
+##揹
+##揽
+##搀
+##搁
+##搂
+##搅
+##損
+##搏
+##搐
+##搓
+##搔
+##搖
+##搗
+##搜
+##搞
+##搡
+##搪
+##搬
+##搭
+##搵
+##搶
+##携
+##搽
+##摀
+##摁
+##摄
+##摆
+##摇
+##摈
+##摊
+##摒
+##摔
+##摘
+##摞
+##摟
+##摧
+##摩
+##摯
+##摳
+##摸
+##摹
+##摺
+##摻
+##撂
+##撃
+##撅
+##撇
+##撈
+##撐
+##撑
+##撒
+##撓
+##撕
+##撚
+##撞
+##撤
+##撥
+##撩
+##撫
+##撬
+##播
+##撮
+##撰
+##撲
+##撵
+##撷
+##撸
+##撻
+##撼
+##撿
+##擀
+##擁
+##擂
+##擄
+##擅
+##擇
+##擊
+##擋
+##操
+##擎
+##擒
+##擔
+##擘
+##據
+##擞
+##擠
+##擡
+##擢
+##擦
+##擬
+##擰
+##擱
+##擲
+##擴
+##擷
+##擺
+##擼
+##擾
+##攀
+##攏
+##攒
+##攔
+##攘
+##攙
+##攜
+##攝
+##攞
+##攢
+##攣
+##攤
+##攥
+##攪
+##攫
+##攬
+##支
+##收
+##攸
+##改
+##攻
+##放
+##政
+##故
+##效
+##敌
+##敍
+##敎
+##敏
+##救
+##敕
+##敖
+##敗
+##敘
+##教
+##敛
+##敝
+##敞
+##敢
+##散
+##敦
+##敬
+##数
+##敲
+##整
+##敵
+##敷
+##數
+##斂
+##斃
+##文
+##斋
+##斌
+##斎
+##斐
+##斑
+##斓
+##斗
+##料
+##斛
+##斜
+##斟
+##斡
+##斤
+##斥
+##斧
+##斩
+##斫
+##斬
+##断
+##斯
+##新
+##斷
+##方
+##於
+##施
+##旁
+##旃
+##旅
+##旋
+##旌
+##旎
+##族
+##旖
+##旗
+##无
+##既
+##日
+##旦
+##旧
+##旨
+##早
+##旬
+##旭
+##旮
+##旱
+##时
+##旷
+##旺
+##旻
+##昀
+##昂
+##昆
+##昇
+##昉
+##昊
+##昌
+##明
+##昏
+##易
+##昔
+##昕
+##昙
+##星
+##映
+##春
+##昧
+##昨
+##昭
+##是
+##昱
+##昴
+##昵
+##昶
+##昼
+##显
+##晁
+##時
+##晃
+##晉
+##晋
+##晌
+##晏
+##晒
+##晓
+##晔
+##晕
+##晖
+##晗
+##晚
+##晝
+##晞
+##晟
+##晤
+##晦
+##晨
+##晩
+##普
+##景
+##晰
+##晴
+##晶
+##晷
+##智
+##晾
+##暂
+##暄
+##暇
+##暈
+##暉
+##暌
+##暐
+##暑
+##暖
+##暗
+##暝
+##暢
+##暧
+##暨
+##暫
+##暮
+##暱
+##暴
+##暸
+##暹
+##曄
+##曆
+##曇
+##曉
+##曖
+##曙
+##曜
+##曝
+##曠
+##曦
+##曬
+##曰
+##曲
+##曳
+##更
+##書
+##曹
+##曼
+##曾
+##替
+##最
+##會
+##月
+##有
+##朋
+##服
+##朐
+##朔
+##朕
+##朗
+##望
+##朝
+##期
+##朦
+##朧
+##木
+##未
+##末
+##本
+##札
+##朮
+##术
+##朱
+##朴
+##朵
+##机
+##朽
+##杀
+##杂
+##权
+##杆
+##杈
+##杉
+##李
+##杏
+##材
+##村
+##杓
+##杖
+##杜
+##杞
+##束
+##杠
+##条
+##来
+##杨
+##杭
+##杯
+##杰
+##東
+##杳
+##杵
+##杷
+##杼
+##松
+##板
+##极
+##构
+##枇
+##枉
+##枋
+##析
+##枕
+##林
+##枚
+##果
+##枝
+##枢
+##枣
+##枪
+##枫
+##枭
+##枯
+##枰
+##枱
+##枳
+##架
+##枷
+##枸
+##柄
+##柏
+##某
+##柑
+##柒
+##染
+##柔
+##柘
+##柚
+##柜
+##柞
+##柠
+##柢
+##查
+##柩
+##柬
+##柯
+##柱
+##柳
+##柴
+##柵
+##査
+##柿
+##栀
+##栃
+##栄
+##栅
+##标
+##栈
+##栉
+##栋
+##栎
+##栏
+##树
+##栓
+##栖
+##栗
+##校
+##栩
+##株
+##样
+##核
+##根
+##格
+##栽
+##栾
+##桀
+##桁
+##桂
+##桃
+##桅
+##框
+##案
+##桉
+##桌
+##桎
+##桐
+##桑
+##桓
+##桔
+##桜
+##桠
+##桡
+##桢
+##档
+##桥
+##桦
+##桧
+##桨
+##桩
+##桶
+##桿
+##梁
+##梅
+##梆
+##梏
+##梓
+##梗
+##條
+##梟
+##梢
+##梦
+##梧
+##梨
+##梭
+##梯
+##械
+##梳
+##梵
+##梶
+##检
+##棂
+##棄
+##棉
+##棋
+##棍
+##棒
+##棕
+##棗
+##棘
+##棚
+##棟
+##棠
+##棣
+##棧
+##森
+##棱
+##棲
+##棵
+##棹
+##棺
+##椁
+##椅
+##椋
+##植
+##椎
+##椒
+##検
+##椪
+##椭
+##椰
+##椹
+##椽
+##椿
+##楂
+##楊
+##楓
+##楔
+##楚
+##楝
+##楞
+##楠
+##楣
+##楨
+##楫
+##業
+##楮
+##極
+##楷
+##楸
+##楹
+##楼
+##楽
+##概
+##榄
+##榆
+##榈
+##榉
+##榔
+##榕
+##榖
+##榛
+##榜
+##榨
+##榫
+##榭
+##榮
+##榱
+##榴
+##榷
+##榻
+##槁
+##槃
+##構
+##槌
+##槍
+##槎
+##槐
+##槓
+##様
+##槛
+##槟
+##槤
+##槭
+##槲
+##槳
+##槻
+##槽
+##槿
+##樁
+##樂
+##樊
+##樑
+##樓
+##標
+##樞
+##樟
+##模
+##樣
+##権
+##横
+##樫
+##樯
+##樱
+##樵
+##樸
+##樹
+##樺
+##樽
+##樾
+##橄
+##橇
+##橋
+##橐
+##橘
+##橙
+##機
+##橡
+##橢
+##橫
+##橱
+##橹
+##橼
+##檀
+##檄
+##檎
+##檐
+##檔
+##檗
+##檜
+##檢
+##檬
+##檯
+##檳
+##檸
+##檻
+##櫃
+##櫚
+##櫛
+##櫥
+##櫸
+##櫻
+##欄
+##權
+##欒
+##欖
+##欠
+##次
+##欢
+##欣
+##欧
+##欲
+##欸
+##欺
+##欽
+##款
+##歆
+##歇
+##歉
+##歌
+##歎
+##歐
+##歓
+##歙
+##歛
+##歡
+##止
+##正
+##此
+##步
+##武
+##歧
+##歩
+##歪
+##歯
+##歲
+##歳
+##歴
+##歷
+##歸
+##歹
+##死
+##歼
+##殁
+##殃
+##殆
+##殇
+##殉
+##殊
+##残
+##殒
+##殓
+##殖
+##殘
+##殞
+##殡
+##殤
+##殭
+##殯
+##殲
+##殴
+##段
+##殷
+##殺
+##殼
+##殿
+##毀
+##毁
+##毂
+##毅
+##毆
+##毋
+##母
+##毎
+##每
+##毒
+##毓
+##比
+##毕
+##毗
+##毘
+##毙
+##毛
+##毡
+##毫
+##毯
+##毽
+##氈
+##氏
+##氐
+##民
+##氓
+##气
+##氖
+##気
+##氙
+##氛
+##氟
+##氡
+##氢
+##氣
+##氤
+##氦
+##氧
+##氨
+##氪
+##氫
+##氮
+##氯
+##氰
+##氲
+##水
+##氷
+##永
+##氹
+##氾
+##汀
+##汁
+##求
+##汆
+##汇
+##汉
+##汎
+##汐
+##汕
+##汗
+##汙
+##汛
+##汝
+##汞
+##江
+##池
+##污
+##汤
+##汨
+##汩
+##汪
+##汰
+##汲
+##汴
+##汶
+##汹
+##決
+##汽
+##汾
+##沁
+##沂
+##沃
+##沅
+##沈
+##沉
+##沌
+##沏
+##沐
+##沒
+##沓
+##沖
+##沙
+##沛
+##沟
+##没
+##沢
+##沣
+##沥
+##沦
+##沧
+##沪
+##沫
+##沭
+##沮
+##沱
+##河
+##沸
+##油
+##治
+##沼
+##沽
+##沾
+##沿
+##況
+##泄
+##泉
+##泊
+##泌
+##泓
+##法
+##泗
+##泛
+##泞
+##泠
+##泡
+##波
+##泣
+##泥
+##注
+##泪
+##泫
+##泮
+##泯
+##泰
+##泱
+##泳
+##泵
+##泷
+##泸
+##泻
+##泼
+##泽
+##泾
+##洁
+##洄
+##洋
+##洒
+##洗
+##洙
+##洛
+##洞
+##津
+##洩
+##洪
+##洮
+##洱
+##洲
+##洵
+##洶
+##洸
+##洹
+##活
+##洼
+##洽
+##派
+##流
+##浃
+##浄
+##浅
+##浆
+##浇
+##浊
+##测
+##济
+##浏
+##浑
+##浒
+##浓
+##浔
+##浙
+##浚
+##浜
+##浣
+##浦
+##浩
+##浪
+##浬
+##浮
+##浯
+##浴
+##海
+##浸
+##涂
+##涅
+##涇
+##消
+##涉
+##涌
+##涎
+##涓
+##涔
+##涕
+##涙
+##涛
+##涝
+##涞
+##涟
+##涠
+##涡
+##涣
+##涤
+##润
+##涧
+##涨
+##涩
+##涪
+##涮
+##涯
+##液
+##涵
+##涸
+##涼
+##涿
+##淀
+##淄
+##淅
+##淆
+##淇
+##淋
+##淌
+##淑
+##淒
+##淖
+##淘
+##淙
+##淚
+##淞
+##淡
+##淤
+##淦
+##淨
+##淩
+##淪
+##淫
+##淬
+##淮
+##深
+##淳
+##淵
+##混
+##淹
+##淺
+##添
+##淼
+##清
+##済
+##渉
+##渊
+##渋
+##渍
+##渎
+##渐
+##渔
+##渗
+##渙
+##渚
+##減
+##渝
+##渠
+##渡
+##渣
+##渤
+##渥
+##渦
+##温
+##測
+##渭
+##港
+##渲
+##渴
+##游
+##渺
+##渾
+##湃
+##湄
+##湊
+##湍
+##湖
+##湘
+##湛
+##湟
+##湧
+##湫
+##湮
+##湯
+##湳
+##湾
+##湿
+##満
+##溃
+##溅
+##溉
+##溏
+##源
+##準
+##溜
+##溝
+##溟
+##溢
+##溥
+##溧
+##溪
+##溫
+##溯
+##溱
+##溴
+##溶
+##溺
+##溼
+##滁
+##滂
+##滄
+##滅
+##滇
+##滋
+##滌
+##滑
+##滓
+##滔
+##滕
+##滙
+##滚
+##滝
+##滞
+##滟
+##满
+##滢
+##滤
+##滥
+##滦
+##滨
+##滩
+##滬
+##滯
+##滲
+##滴
+##滷
+##滸
+##滾
+##滿
+##漁
+##漂
+##漆
+##漉
+##漏
+##漓
+##演
+##漕
+##漠
+##漢
+##漣
+##漩
+##漪
+##漫
+##漬
+##漯
+##漱
+##漲
+##漳
+##漸
+##漾
+##漿
+##潆
+##潇
+##潋
+##潍
+##潑
+##潔
+##潘
+##潛
+##潜
+##潞
+##潟
+##潢
+##潤
+##潦
+##潧
+##潭
+##潮
+##潰
+##潴
+##潸
+##潺
+##潼
+##澀
+##澄
+##澆
+##澈
+##澍
+##澎
+##澗
+##澜
+##澡
+##澤
+##澧
+##澱
+##澳
+##澹
+##激
+##濁
+##濂
+##濃
+##濑
+##濒
+##濕
+##濘
+##濛
+##濟
+##濠
+##濡
+##濤
+##濫
+##濬
+##濮
+##濯
+##濱
+##濺
+##濾
+##瀅
+##瀆
+##瀉
+##瀋
+##瀏
+##瀑
+##瀕
+##瀘
+##瀚
+##瀛
+##瀝
+##瀞
+##瀟
+##瀧
+##瀨
+##瀬
+##瀰
+##瀾
+##灌
+##灏
+##灑
+##灘
+##灝
+##灞
+##灣
+##火
+##灬
+##灭
+##灯
+##灰
+##灵
+##灶
+##灸
+##灼
+##災
+##灾
+##灿
+##炀
+##炁
+##炅
+##炉
+##炊
+##炎
+##炒
+##炔
+##炕
+##炖
+##炙
+##炜
+##炫
+##炬
+##炭
+##炮
+##炯
+##炳
+##炷
+##炸
+##点
+##為
+##炼
+##炽
+##烁
+##烂
+##烃
+##烈
+##烊
+##烏
+##烘
+##烙
+##烛
+##烟
+##烤
+##烦
+##烧
+##烨
+##烩
+##烫
+##烬
+##热
+##烯
+##烷
+##烹
+##烽
+##焉
+##焊
+##焕
+##焖
+##焗
+##焘
+##焙
+##焚
+##焜
+##無
+##焦
+##焯
+##焰
+##焱
+##然
+##焼
+##煅
+##煉
+##煊
+##煌
+##煎
+##煒
+##煖
+##煙
+##煜
+##煞
+##煤
+##煥
+##煦
+##照
+##煨
+##煩
+##煮
+##煲
+##煸
+##煽
+##熄
+##熊
+##熏
+##熒
+##熔
+##熙
+##熟
+##熠
+##熨
+##熬
+##熱
+##熵
+##熹
+##熾
+##燁
+##燃
+##燄
+##燈
+##燉
+##燊
+##燎
+##燒
+##燔
+##燕
+##燙
+##燜
+##營
+##燥
+##燦
+##燧
+##燭
+##燮
+##燴
+##燻
+##燼
+##燿
+##爆
+##爍
+##爐
+##爛
+##爪
+##爬
+##爭
+##爰
+##爱
+##爲
+##爵
+##父
+##爷
+##爸
+##爹
+##爺
+##爻
+##爽
+##爾
+##牆
+##片
+##版
+##牌
+##牍
+##牒
+##牙
+##牛
+##牝
+##牟
+##牠
+##牡
+##牢
+##牦
+##牧
+##物
+##牯
+##牲
+##牴
+##牵
+##特
+##牺
+##牽
+##犀
+##犁
+##犄
+##犊
+##犍
+##犒
+##犢
+##犧
+##犬
+##犯
+##状
+##犷
+##犸
+##犹
+##狀
+##狂
+##狄
+##狈
+##狎
+##狐
+##狒
+##狗
+##狙
+##狞
+##狠
+##狡
+##狩
+##独
+##狭
+##狮
+##狰
+##狱
+##狸
+##狹
+##狼
+##狽
+##猎
+##猕
+##猖
+##猗
+##猙
+##猛
+##猜
+##猝
+##猥
+##猩
+##猪
+##猫
+##猬
+##献
+##猴
+##猶
+##猷
+##猾
+##猿
+##獄
+##獅
+##獎
+##獐
+##獒
+##獗
+##獠
+##獣
+##獨
+##獭
+##獰
+##獲
+##獵
+##獷
+##獸
+##獺
+##獻
+##獼
+##獾
+##玄
+##率
+##玉
+##王
+##玑
+##玖
+##玛
+##玟
+##玠
+##玥
+##玩
+##玫
+##玮
+##环
+##现
+##玲
+##玳
+##玷
+##玺
+##玻
+##珀
+##珂
+##珅
+##珈
+##珉
+##珊
+##珍
+##珏
+##珐
+##珑
+##珙
+##珞
+##珠
+##珣
+##珥
+##珩
+##珪
+##班
+##珮
+##珲
+##珺
+##現
+##球
+##琅
+##理
+##琇
+##琉
+##琊
+##琍
+##琏
+##琐
+##琛
+##琢
+##琥
+##琦
+##琨
+##琪
+##琬
+##琮
+##琰
+##琲
+##琳
+##琴
+##琵
+##琶
+##琺
+##琼
+##瑀
+##瑁
+##瑄
+##瑋
+##瑕
+##瑗
+##瑙
+##瑚
+##瑛
+##瑜
+##瑞
+##瑟
+##瑠
+##瑣
+##瑤
+##瑩
+##瑪
+##瑯
+##瑰
+##瑶
+##瑾
+##璀
+##璁
+##璃
+##璇
+##璉
+##璋
+##璎
+##璐
+##璜
+##璞
+##璟
+##璧
+##璨
+##環
+##璽
+##璿
+##瓊
+##瓏
+##瓒
+##瓜
+##瓢
+##瓣
+##瓤
+##瓦
+##瓮
+##瓯
+##瓴
+##瓶
+##瓷
+##甄
+##甌
+##甕
+##甘
+##甙
+##甚
+##甜
+##生
+##產
+##産
+##甥
+##甦
+##用
+##甩
+##甫
+##甬
+##甭
+##甯
+##田
+##由
+##甲
+##申
+##电
+##男
+##甸
+##町
+##画
+##甾
+##畀
+##畅
+##界
+##畏
+##畑
+##畔
+##留
+##畜
+##畝
+##畢
+##略
+##畦
+##番
+##畫
+##異
+##畲
+##畳
+##畴
+##當
+##畸
+##畹
+##畿
+##疆
+##疇
+##疊
+##疏
+##疑
+##疔
+##疖
+##疗
+##疙
+##疚
+##疝
+##疟
+##疡
+##疣
+##疤
+##疥
+##疫
+##疮
+##疯
+##疱
+##疲
+##疳
+##疵
+##疸
+##疹
+##疼
+##疽
+##疾
+##痂
+##病
+##症
+##痈
+##痉
+##痊
+##痍
+##痒
+##痔
+##痕
+##痘
+##痙
+##痛
+##痞
+##痠
+##痢
+##痣
+##痤
+##痧
+##痨
+##痪
+##痫
+##痰
+##痱
+##痴
+##痹
+##痺
+##痼
+##痿
+##瘀
+##瘁
+##瘋
+##瘍
+##瘓
+##瘘
+##瘙
+##瘟
+##瘠
+##瘡
+##瘢
+##瘤
+##瘦
+##瘧
+##瘩
+##瘪
+##瘫
+##瘴
+##瘸
+##瘾
+##療
+##癇
+##癌
+##癒
+##癖
+##癜
+##癞
+##癡
+##癢
+##癣
+##癥
+##癫
+##癬
+##癮
+##癱
+##癲
+##癸
+##発
+##登
+##發
+##白
+##百
+##皂
+##的
+##皆
+##皇
+##皈
+##皋
+##皎
+##皑
+##皓
+##皖
+##皙
+##皚
+##皮
+##皰
+##皱
+##皴
+##皺
+##皿
+##盂
+##盃
+##盅
+##盆
+##盈
+##益
+##盎
+##盏
+##盐
+##监
+##盒
+##盔
+##盖
+##盗
+##盘
+##盛
+##盜
+##盞
+##盟
+##盡
+##監
+##盤
+##盥
+##盧
+##盪
+##目
+##盯
+##盱
+##盲
+##直
+##相
+##盹
+##盼
+##盾
+##省
+##眈
+##眉
+##看
+##県
+##眙
+##眞
+##真
+##眠
+##眦
+##眨
+##眩
+##眯
+##眶
+##眷
+##眸
+##眺
+##眼
+##眾
+##着
+##睁
+##睇
+##睏
+##睐
+##睑
+##睛
+##睜
+##睞
+##睡
+##睢
+##督
+##睥
+##睦
+##睨
+##睪
+##睫
+##睬
+##睹
+##睽
+##睾
+##睿
+##瞄
+##瞅
+##瞇
+##瞋
+##瞌
+##瞎
+##瞑
+##瞒
+##瞓
+##瞞
+##瞟
+##瞠
+##瞥
+##瞧
+##瞩
+##瞪
+##瞬
+##瞭
+##瞰
+##瞳
+##瞻
+##瞼
+##瞿
+##矇
+##矍
+##矗
+##矚
+##矛
+##矜
+##矢
+##矣
+##知
+##矩
+##矫
+##短
+##矮
+##矯
+##石
+##矶
+##矽
+##矾
+##矿
+##码
+##砂
+##砌
+##砍
+##砒
+##研
+##砖
+##砗
+##砚
+##砝
+##砣
+##砥
+##砧
+##砭
+##砰
+##砲
+##破
+##砷
+##砸
+##砺
+##砼
+##砾
+##础
+##硅
+##硐
+##硒
+##硕
+##硝
+##硫
+##硬
+##确
+##硯
+##硼
+##碁
+##碇
+##碉
+##碌
+##碍
+##碎
+##碑
+##碓
+##碗
+##碘
+##碚
+##碛
+##碟
+##碣
+##碧
+##碩
+##碰
+##碱
+##碳
+##碴
+##確
+##碼
+##碾
+##磁
+##磅
+##磊
+##磋
+##磐
+##磕
+##磚
+##磡
+##磨
+##磬
+##磯
+##磲
+##磷
+##磺
+##礁
+##礎
+##礙
+##礡
+##礦
+##礪
+##礫
+##礴
+##示
+##礼
+##社
+##祀
+##祁
+##祂
+##祇
+##祈
+##祉
+##祎
+##祐
+##祕
+##祖
+##祗
+##祚
+##祛
+##祜
+##祝
+##神
+##祟
+##祠
+##祢
+##祥
+##票
+##祭
+##祯
+##祷
+##祸
+##祺
+##祿
+##禀
+##禁
+##禄
+##禅
+##禍
+##禎
+##福
+##禛
+##禦
+##禧
+##禪
+##禮
+##禱
+##禹
+##禺
+##离
+##禽
+##禾
+##禿
+##秀
+##私
+##秃
+##秆
+##秉
+##秋
+##种
+##科
+##秒
+##秘
+##租
+##秣
+##秤
+##秦
+##秧
+##秩
+##秭
+##积
+##称
+##秸
+##移
+##秽
+##稀
+##稅
+##程
+##稍
+##税
+##稔
+##稗
+##稚
+##稜
+##稞
+##稟
+##稠
+##稣
+##種
+##稱
+##稲
+##稳
+##稷
+##稹
+##稻
+##稼
+##稽
+##稿
+##穀
+##穂
+##穆
+##穌
+##積
+##穎
+##穗
+##穢
+##穩
+##穫
+##穴
+##究
+##穷
+##穹
+##空
+##穿
+##突
+##窃
+##窄
+##窈
+##窍
+##窑
+##窒
+##窓
+##窕
+##窖
+##窗
+##窘
+##窜
+##窝
+##窟
+##窠
+##窥
+##窦
+##窨
+##窩
+##窪
+##窮
+##窯
+##窺
+##窿
+##竄
+##竅
+##竇
+##竊
+##立
+##竖
+##站
+##竜
+##竞
+##竟
+##章
+##竣
+##童
+##竭
+##端
+##競
+##竹
+##竺
+##竽
+##竿
+##笃
+##笆
+##笈
+##笋
+##笏
+##笑
+##笔
+##笙
+##笛
+##笞
+##笠
+##符
+##笨
+##第
+##笹
+##笺
+##笼
+##筆
+##等
+##筊
+##筋
+##筍
+##筏
+##筐
+##筑
+##筒
+##答
+##策
+##筛
+##筝
+##筠
+##筱
+##筲
+##筵
+##筷
+##筹
+##签
+##简
+##箇
+##箋
+##箍
+##箏
+##箐
+##箔
+##箕
+##算
+##箝
+##管
+##箩
+##箫
+##箭
+##箱
+##箴
+##箸
+##節
+##篁
+##範
+##篆
+##篇
+##築
+##篑
+##篓
+##篙
+##篝
+##篠
+##篡
+##篤
+##篩
+##篪
+##篮
+##篱
+##篷
+##簇
+##簌
+##簍
+##簡
+##簦
+##簧
+##簪
+##簫
+##簷
+##簸
+##簽
+##簾
+##簿
+##籁
+##籃
+##籌
+##籍
+##籐
+##籟
+##籠
+##籤
+##籬
+##籮
+##籲
+##米
+##类
+##籼
+##籽
+##粄
+##粉
+##粑
+##粒
+##粕
+##粗
+##粘
+##粟
+##粤
+##粥
+##粧
+##粪
+##粮
+##粱
+##粲
+##粳
+##粵
+##粹
+##粼
+##粽
+##精
+##粿
+##糅
+##糊
+##糍
+##糕
+##糖
+##糗
+##糙
+##糜
+##糞
+##糟
+##糠
+##糧
+##糬
+##糯
+##糰
+##糸
+##系
+##糾
+##紀
+##紂
+##約
+##紅
+##紉
+##紊
+##紋
+##納
+##紐
+##紓
+##純
+##紗
+##紘
+##紙
+##級
+##紛
+##紜
+##素
+##紡
+##索
+##紧
+##紫
+##紮
+##累
+##細
+##紳
+##紹
+##紺
+##終
+##絃
+##組
+##絆
+##経
+##結
+##絕
+##絞
+##絡
+##絢
+##給
+##絨
+##絮
+##統
+##絲
+##絳
+##絵
+##絶
+##絹
+##綁
+##綏
+##綑
+##經
+##継
+##続
+##綜
+##綠
+##綢
+##綦
+##綫
+##綬
+##維
+##綱
+##網
+##綴
+##綵
+##綸
+##綺
+##綻
+##綽
+##綾
+##綿
+##緊
+##緋
+##総
+##緑
+##緒
+##緘
+##線
+##緝
+##緞
+##締
+##緣
+##編
+##緩
+##緬
+##緯
+##練
+##緹
+##緻
+##縁
+##縄
+##縈
+##縛
+##縝
+##縣
+##縫
+##縮
+##縱
+##縴
+##縷
+##總
+##績
+##繁
+##繃
+##繆
+##繇
+##繋
+##織
+##繕
+##繚
+##繞
+##繡
+##繩
+##繪
+##繫
+##繭
+##繳
+##繹
+##繼
+##繽
+##纂
+##續
+##纍
+##纏
+##纓
+##纔
+##纖
+##纜
+##纠
+##红
+##纣
+##纤
+##约
+##级
+##纨
+##纪
+##纫
+##纬
+##纭
+##纯
+##纰
+##纱
+##纲
+##纳
+##纵
+##纶
+##纷
+##纸
+##纹
+##纺
+##纽
+##纾
+##线
+##绀
+##练
+##组
+##绅
+##细
+##织
+##终
+##绊
+##绍
+##绎
+##经
+##绑
+##绒
+##结
+##绔
+##绕
+##绘
+##给
+##绚
+##绛
+##络
+##绝
+##绞
+##统
+##绡
+##绢
+##绣
+##绥
+##绦
+##继
+##绩
+##绪
+##绫
+##续
+##绮
+##绯
+##绰
+##绳
+##维
+##绵
+##绶
+##绷
+##绸
+##绻
+##综
+##绽
+##绾
+##绿
+##缀
+##缄
+##缅
+##缆
+##缇
+##缈
+##缉
+##缎
+##缓
+##缔
+##缕
+##编
+##缘
+##缙
+##缚
+##缜
+##缝
+##缠
+##缢
+##缤
+##缥
+##缨
+##缩
+##缪
+##缭
+##缮
+##缰
+##缱
+##缴
+##缸
+##缺
+##缽
+##罂
+##罄
+##罌
+##罐
+##网
+##罔
+##罕
+##罗
+##罚
+##罡
+##罢
+##罩
+##罪
+##置
+##罰
+##署
+##罵
+##罷
+##罹
+##羁
+##羅
+##羈
+##羊
+##羌
+##美
+##羔
+##羚
+##羞
+##羟
+##羡
+##羣
+##群
+##羥
+##羧
+##羨
+##義
+##羯
+##羲
+##羸
+##羹
+##羽
+##羿
+##翁
+##翅
+##翊
+##翌
+##翎
+##習
+##翔
+##翘
+##翟
+##翠
+##翡
+##翦
+##翩
+##翰
+##翱
+##翳
+##翹
+##翻
+##翼
+##耀
+##老
+##考
+##耄
+##者
+##耆
+##耋
+##而
+##耍
+##耐
+##耒
+##耕
+##耗
+##耘
+##耙
+##耦
+##耨
+##耳
+##耶
+##耷
+##耸
+##耻
+##耽
+##耿
+##聂
+##聆
+##聊
+##聋
+##职
+##聒
+##联
+##聖
+##聘
+##聚
+##聞
+##聪
+##聯
+##聰
+##聲
+##聳
+##聴
+##聶
+##職
+##聽
+##聾
+##聿
+##肃
+##肄
+##肅
+##肆
+##肇
+##肉
+##肋
+##肌
+##肏
+##肓
+##肖
+##肘
+##肚
+##肛
+##肝
+##肠
+##股
+##肢
+##肤
+##肥
+##肩
+##肪
+##肮
+##肯
+##肱
+##育
+##肴
+##肺
+##肽
+##肾
+##肿
+##胀
+##胁
+##胃
+##胄
+##胆
+##背
+##胍
+##胎
+##胖
+##胚
+##胛
+##胜
+##胝
+##胞
+##胡
+##胤
+##胥
+##胧
+##胫
+##胭
+##胯
+##胰
+##胱
+##胳
+##胴
+##胶
+##胸
+##胺
+##能
+##脂
+##脅
+##脆
+##脇
+##脈
+##脉
+##脊
+##脍
+##脏
+##脐
+##脑
+##脓
+##脖
+##脘
+##脚
+##脛
+##脣
+##脩
+##脫
+##脯
+##脱
+##脲
+##脳
+##脸
+##脹
+##脾
+##腆
+##腈
+##腊
+##腋
+##腌
+##腎
+##腐
+##腑
+##腓
+##腔
+##腕
+##腥
+##腦
+##腩
+##腫
+##腭
+##腮
+##腰
+##腱
+##腳
+##腴
+##腸
+##腹
+##腺
+##腻
+##腼
+##腾
+##腿
+##膀
+##膈
+##膊
+##膏
+##膑
+##膘
+##膚
+##膛
+##膜
+##膝
+##膠
+##膦
+##膨
+##膩
+##膳
+##膺
+##膻
+##膽
+##膾
+##膿
+##臀
+##臂
+##臃
+##臆
+##臉
+##臊
+##臍
+##臓
+##臘
+##臟
+##臣
+##臥
+##臧
+##臨
+##自
+##臬
+##臭
+##至
+##致
+##臺
+##臻
+##臼
+##臾
+##舀
+##舂
+##舅
+##舆
+##與
+##興
+##舉
+##舊
+##舌
+##舍
+##舎
+##舐
+##舒
+##舔
+##舖
+##舗
+##舛
+##舜
+##舞
+##舟
+##航
+##舫
+##般
+##舰
+##舱
+##舵
+##舶
+##舷
+##舸
+##船
+##舺
+##舾
+##艇
+##艋
+##艘
+##艙
+##艦
+##艮
+##良
+##艰
+##艱
+##色
+##艳
+##艷
+##艹
+##艺
+##艾
+##节
+##芃
+##芈
+##芊
+##芋
+##芍
+##芎
+##芒
+##芙
+##芜
+##芝
+##芡
+##芥
+##芦
+##芩
+##芪
+##芫
+##芬
+##芭
+##芮
+##芯
+##花
+##芳
+##芷
+##芸
+##芹
+##芻
+##芽
+##芾
+##苁
+##苄
+##苇
+##苋
+##苍
+##苏
+##苑
+##苒
+##苓
+##苔
+##苕
+##苗
+##苛
+##苜
+##苞
+##苟
+##苡
+##苣
+##若
+##苦
+##苫
+##苯
+##英
+##苷
+##苹
+##苻
+##茁
+##茂
+##范
+##茄
+##茅
+##茉
+##茎
+##茏
+##茗
+##茜
+##茧
+##茨
+##茫
+##茬
+##茭
+##茯
+##茱
+##茲
+##茴
+##茵
+##茶
+##茸
+##茹
+##茼
+##荀
+##荃
+##荆
+##草
+##荊
+##荏
+##荐
+##荒
+##荔
+##荖
+##荘
+##荚
+##荞
+##荟
+##荠
+##荡
+##荣
+##荤
+##荥
+##荧
+##荨
+##荪
+##荫
+##药
+##荳
+##荷
+##荸
+##荻
+##荼
+##荽
+##莅
+##莆
+##莉
+##莊
+##莎
+##莒
+##莓
+##莖
+##莘
+##莞
+##莠
+##莢
+##莧
+##莪
+##莫
+##莱
+##莲
+##莴
+##获
+##莹
+##莺
+##莽
+##莿
+##菀
+##菁
+##菅
+##菇
+##菈
+##菊
+##菌
+##菏
+##菓
+##菖
+##菘
+##菜
+##菟
+##菠
+##菡
+##菩
+##華
+##菱
+##菲
+##菸
+##菽
+##萁
+##萃
+##萄
+##萊
+##萋
+##萌
+##萍
+##萎
+##萘
+##萝
+##萤
+##营
+##萦
+##萧
+##萨
+##萩
+##萬
+##萱
+##萵
+##萸
+##萼
+##落
+##葆
+##葉
+##著
+##葚
+##葛
+##葡
+##董
+##葦
+##葩
+##葫
+##葬
+##葭
+##葯
+##葱
+##葳
+##葵
+##葷
+##葺
+##蒂
+##蒋
+##蒐
+##蒔
+##蒙
+##蒜
+##蒞
+##蒟
+##蒡
+##蒨
+##蒲
+##蒸
+##蒹
+##蒻
+##蒼
+##蒿
+##蓁
+##蓄
+##蓆
+##蓉
+##蓋
+##蓑
+##蓓
+##蓖
+##蓝
+##蓟
+##蓦
+##蓬
+##蓮
+##蓼
+##蓿
+##蔑
+##蔓
+##蔔
+##蔗
+##蔘
+##蔚
+##蔡
+##蔣
+##蔥
+##蔫
+##蔬
+##蔭
+##蔵
+##蔷
+##蔺
+##蔻
+##蔼
+##蔽
+##蕁
+##蕃
+##蕈
+##蕉
+##蕊
+##蕎
+##蕙
+##蕤
+##蕨
+##蕩
+##蕪
+##蕭
+##蕲
+##蕴
+##蕻
+##蕾
+##薄
+##薅
+##薇
+##薈
+##薊
+##薏
+##薑
+##薔
+##薙
+##薛
+##薦
+##薨
+##薩
+##薪
+##薬
+##薯
+##薰
+##薹
+##藉
+##藍
+##藏
+##藐
+##藓
+##藕
+##藜
+##藝
+##藤
+##藥
+##藩
+##藹
+##藻
+##藿
+##蘆
+##蘇
+##蘊
+##蘋
+##蘑
+##蘚
+##蘭
+##蘸
+##蘼
+##蘿
+##虎
+##虏
+##虐
+##虑
+##虔
+##處
+##虚
+##虛
+##虜
+##虞
+##號
+##虢
+##虧
+##虫
+##虬
+##虱
+##虹
+##虻
+##虽
+##虾
+##蚀
+##蚁
+##蚂
+##蚊
+##蚌
+##蚓
+##蚕
+##蚜
+##蚝
+##蚣
+##蚤
+##蚩
+##蚪
+##蚯
+##蚱
+##蚵
+##蛀
+##蛆
+##蛇
+##蛊
+##蛋
+##蛎
+##蛐
+##蛔
+##蛙
+##蛛
+##蛟
+##蛤
+##蛭
+##蛮
+##蛰
+##蛳
+##蛹
+##蛻
+##蛾
+##蜀
+##蜂
+##蜃
+##蜆
+##蜇
+##蜈
+##蜊
+##蜍
+##蜒
+##蜓
+##蜕
+##蜗
+##蜘
+##蜚
+##蜜
+##蜡
+##蜢
+##蜥
+##蜱
+##蜴
+##蜷
+##蜻
+##蜿
+##蝇
+##蝈
+##蝉
+##蝌
+##蝎
+##蝕
+##蝗
+##蝙
+##蝟
+##蝠
+##蝦
+##蝨
+##蝴
+##蝶
+##蝸
+##蝼
+##螂
+##螃
+##融
+##螞
+##螢
+##螨
+##螯
+##螳
+##螺
+##蟀
+##蟄
+##蟆
+##蟋
+##蟎
+##蟑
+##蟒
+##蟠
+##蟬
+##蟲
+##蟹
+##蟻
+##蟾
+##蠅
+##蠍
+##蠔
+##蠕
+##蠛
+##蠟
+##蠡
+##蠢
+##蠣
+##蠱
+##蠶
+##蠹
+##蠻
+##血
+##衄
+##衅
+##衆
+##行
+##衍
+##術
+##衔
+##街
+##衙
+##衛
+##衝
+##衞
+##衡
+##衢
+##衣
+##补
+##表
+##衩
+##衫
+##衬
+##衮
+##衰
+##衲
+##衷
+##衹
+##衾
+##衿
+##袁
+##袂
+##袄
+##袅
+##袈
+##袋
+##袍
+##袒
+##袖
+##袜
+##袞
+##袤
+##袪
+##被
+##袭
+##袱
+##裁
+##裂
+##装
+##裆
+##裊
+##裏
+##裔
+##裕
+##裘
+##裙
+##補
+##裝
+##裟
+##裡
+##裤
+##裨
+##裱
+##裳
+##裴
+##裸
+##裹
+##製
+##裾
+##褂
+##複
+##褐
+##褒
+##褓
+##褔
+##褚
+##褥
+##褪
+##褫
+##褲
+##褶
+##褻
+##襁
+##襄
+##襟
+##襠
+##襪
+##襬
+##襯
+##襲
+##西
+##要
+##覃
+##覆
+##覇
+##見
+##規
+##覓
+##視
+##覚
+##覦
+##覧
+##親
+##覬
+##観
+##覷
+##覺
+##覽
+##觀
+##见
+##观
+##规
+##觅
+##视
+##览
+##觉
+##觊
+##觎
+##觐
+##觑
+##角
+##觞
+##解
+##觥
+##触
+##觸
+##言
+##訂
+##計
+##訊
+##討
+##訓
+##訕
+##訖
+##託
+##記
+##訛
+##訝
+##訟
+##訣
+##訥
+##訪
+##設
+##許
+##訳
+##訴
+##訶
+##診
+##註
+##証
+##詆
+##詐
+##詔
+##評
+##詛
+##詞
+##詠
+##詡
+##詢
+##詣
+##試
+##詩
+##詫
+##詬
+##詭
+##詮
+##詰
+##話
+##該
+##詳
+##詹
+##詼
+##誅
+##誇
+##誉
+##誌
+##認
+##誓
+##誕
+##誘
+##語
+##誠
+##誡
+##誣
+##誤
+##誥
+##誦
+##誨
+##說
+##説
+##読
+##誰
+##課
+##誹
+##誼
+##調
+##諄
+##談
+##請
+##諏
+##諒
+##論
+##諗
+##諜
+##諡
+##諦
+##諧
+##諫
+##諭
+##諮
+##諱
+##諳
+##諷
+##諸
+##諺
+##諾
+##謀
+##謁
+##謂
+##謄
+##謊
+##謎
+##謐
+##謔
+##謗
+##謙
+##講
+##謝
+##謠
+##謨
+##謬
+##謹
+##謾
+##譁
+##證
+##譎
+##譏
+##識
+##譙
+##譚
+##譜
+##警
+##譬
+##譯
+##議
+##譲
+##譴
+##護
+##譽
+##讀
+##變
+##讓
+##讚
+##讞
+##计
+##订
+##认
+##讥
+##讧
+##讨
+##让
+##讪
+##讫
+##训
+##议
+##讯
+##记
+##讲
+##讳
+##讴
+##讶
+##讷
+##许
+##讹
+##论
+##讼
+##讽
+##设
+##访
+##诀
+##证
+##诃
+##评
+##诅
+##识
+##诈
+##诉
+##诊
+##诋
+##词
+##诏
+##译
+##试
+##诗
+##诘
+##诙
+##诚
+##诛
+##话
+##诞
+##诟
+##诠
+##诡
+##询
+##诣
+##诤
+##该
+##详
+##诧
+##诩
+##诫
+##诬
+##语
+##误
+##诰
+##诱
+##诲
+##说
+##诵
+##诶
+##请
+##诸
+##诺
+##读
+##诽
+##课
+##诿
+##谀
+##谁
+##调
+##谄
+##谅
+##谆
+##谈
+##谊
+##谋
+##谌
+##谍
+##谎
+##谏
+##谐
+##谑
+##谒
+##谓
+##谔
+##谕
+##谗
+##谘
+##谙
+##谚
+##谛
+##谜
+##谟
+##谢
+##谣
+##谤
+##谥
+##谦
+##谧
+##谨
+##谩
+##谪
+##谬
+##谭
+##谯
+##谱
+##谲
+##谴
+##谶
+##谷
+##豁
+##豆
+##豇
+##豈
+##豉
+##豊
+##豌
+##豎
+##豐
+##豔
+##豚
+##象
+##豢
+##豪
+##豫
+##豬
+##豹
+##豺
+##貂
+##貅
+##貌
+##貓
+##貔
+##貘
+##貝
+##貞
+##負
+##財
+##貢
+##貧
+##貨
+##販
+##貪
+##貫
+##責
+##貯
+##貰
+##貳
+##貴
+##貶
+##買
+##貸
+##費
+##貼
+##貽
+##貿
+##賀
+##賁
+##賂
+##賃
+##賄
+##資
+##賈
+##賊
+##賑
+##賓
+##賜
+##賞
+##賠
+##賡
+##賢
+##賣
+##賤
+##賦
+##質
+##賬
+##賭
+##賴
+##賺
+##購
+##賽
+##贅
+##贈
+##贊
+##贍
+##贏
+##贓
+##贖
+##贛
+##贝
+##贞
+##负
+##贡
+##财
+##责
+##贤
+##败
+##账
+##货
+##质
+##贩
+##贪
+##贫
+##贬
+##购
+##贮
+##贯
+##贰
+##贱
+##贲
+##贴
+##贵
+##贷
+##贸
+##费
+##贺
+##贻
+##贼
+##贾
+##贿
+##赁
+##赂
+##赃
+##资
+##赅
+##赈
+##赊
+##赋
+##赌
+##赎
+##赏
+##赐
+##赓
+##赔
+##赖
+##赘
+##赚
+##赛
+##赝
+##赞
+##赠
+##赡
+##赢
+##赣
+##赤
+##赦
+##赧
+##赫
+##赭
+##走
+##赳
+##赴
+##赵
+##赶
+##起
+##趁
+##超
+##越
+##趋
+##趕
+##趙
+##趟
+##趣
+##趨
+##足
+##趴
+##趵
+##趸
+##趺
+##趾
+##跃
+##跄
+##跆
+##跋
+##跌
+##跎
+##跑
+##跖
+##跚
+##跛
+##距
+##跟
+##跡
+##跤
+##跨
+##跩
+##跪
+##路
+##跳
+##践
+##跷
+##跹
+##跺
+##跻
+##踉
+##踊
+##踌
+##踏
+##踐
+##踝
+##踞
+##踟
+##踢
+##踩
+##踪
+##踮
+##踱
+##踴
+##踵
+##踹
+##蹂
+##蹄
+##蹇
+##蹈
+##蹉
+##蹊
+##蹋
+##蹑
+##蹒
+##蹙
+##蹟
+##蹣
+##蹤
+##蹦
+##蹩
+##蹬
+##蹭
+##蹲
+##蹴
+##蹶
+##蹺
+##蹼
+##蹿
+##躁
+##躇
+##躉
+##躊
+##躋
+##躍
+##躏
+##躪
+##身
+##躬
+##躯
+##躲
+##躺
+##軀
+##車
+##軋
+##軌
+##軍
+##軒
+##軟
+##転
+##軸
+##軼
+##軽
+##軾
+##較
+##載
+##輒
+##輓
+##輔
+##輕
+##輛
+##輝
+##輟
+##輩
+##輪
+##輯
+##輸
+##輻
+##輾
+##輿
+##轄
+##轅
+##轆
+##轉
+##轍
+##轎
+##轟
+##车
+##轧
+##轨
+##轩
+##转
+##轭
+##轮
+##软
+##轰
+##轲
+##轴
+##轶
+##轻
+##轼
+##载
+##轿
+##较
+##辄
+##辅
+##辆
+##辇
+##辈
+##辉
+##辊
+##辍
+##辐
+##辑
+##输
+##辕
+##辖
+##辗
+##辘
+##辙
+##辛
+##辜
+##辞
+##辟
+##辣
+##辦
+##辨
+##辩
+##辫
+##辭
+##辮
+##辯
+##辰
+##辱
+##農
+##边
+##辺
+##辻
+##込
+##辽
+##达
+##迁
+##迂
+##迄
+##迅
+##过
+##迈
+##迎
+##运
+##近
+##返
+##还
+##这
+##进
+##远
+##违
+##连
+##迟
+##迢
+##迤
+##迥
+##迦
+##迩
+##迪
+##迫
+##迭
+##述
+##迴
+##迷
+##迸
+##迹
+##迺
+##追
+##退
+##送
+##适
+##逃
+##逅
+##逆
+##选
+##逊
+##逍
+##透
+##逐
+##递
+##途
+##逕
+##逗
+##這
+##通
+##逛
+##逝
+##逞
+##速
+##造
+##逢
+##連
+##逮
+##週
+##進
+##逵
+##逶
+##逸
+##逻
+##逼
+##逾
+##遁
+##遂
+##遅
+##遇
+##遊
+##運
+##遍
+##過
+##遏
+##遐
+##遑
+##遒
+##道
+##達
+##違
+##遗
+##遙
+##遛
+##遜
+##遞
+##遠
+##遢
+##遣
+##遥
+##遨
+##適
+##遭
+##遮
+##遲
+##遴
+##遵
+##遶
+##遷
+##選
+##遺
+##遼
+##遽
+##避
+##邀
+##邁
+##邂
+##邃
+##還
+##邇
+##邈
+##邊
+##邋
+##邏
+##邑
+##邓
+##邕
+##邛
+##邝
+##邢
+##那
+##邦
+##邨
+##邪
+##邬
+##邮
+##邯
+##邰
+##邱
+##邳
+##邵
+##邸
+##邹
+##邺
+##邻
+##郁
+##郅
+##郊
+##郎
+##郑
+##郜
+##郝
+##郡
+##郢
+##郤
+##郦
+##郧
+##部
+##郫
+##郭
+##郴
+##郵
+##郷
+##郸
+##都
+##鄂
+##鄉
+##鄒
+##鄔
+##鄙
+##鄞
+##鄢
+##鄧
+##鄭
+##鄰
+##鄱
+##鄲
+##鄺
+##酉
+##酊
+##酋
+##酌
+##配
+##酐
+##酒
+##酗
+##酚
+##酝
+##酢
+##酣
+##酥
+##酩
+##酪
+##酬
+##酮
+##酯
+##酰
+##酱
+##酵
+##酶
+##酷
+##酸
+##酿
+##醃
+##醇
+##醉
+##醋
+##醍
+##醐
+##醒
+##醚
+##醛
+##醜
+##醞
+##醣
+##醪
+##醫
+##醬
+##醮
+##醯
+##醴
+##醺
+##釀
+##釁
+##采
+##釉
+##释
+##釋
+##里
+##重
+##野
+##量
+##釐
+##金
+##釗
+##釘
+##釜
+##針
+##釣
+##釦
+##釧
+##釵
+##鈀
+##鈉
+##鈍
+##鈎
+##鈔
+##鈕
+##鈞
+##鈣
+##鈦
+##鈪
+##鈴
+##鈺
+##鈾
+##鉀
+##鉄
+##鉅
+##鉉
+##鉑
+##鉗
+##鉚
+##鉛
+##鉤
+##鉴
+##鉻
+##銀
+##銃
+##銅
+##銑
+##銓
+##銖
+##銘
+##銜
+##銬
+##銭
+##銮
+##銳
+##銷
+##銹
+##鋁
+##鋅
+##鋒
+##鋤
+##鋪
+##鋰
+##鋸
+##鋼
+##錄
+##錐
+##錘
+##錚
+##錠
+##錢
+##錦
+##錨
+##錫
+##錮
+##錯
+##録
+##錳
+##錶
+##鍊
+##鍋
+##鍍
+##鍛
+##鍥
+##鍰
+##鍵
+##鍺
+##鍾
+##鎂
+##鎊
+##鎌
+##鎏
+##鎔
+##鎖
+##鎗
+##鎚
+##鎧
+##鎬
+##鎮
+##鎳
+##鏈
+##鏖
+##鏗
+##鏘
+##鏞
+##鏟
+##鏡
+##鏢
+##鏤
+##鏽
+##鐘
+##鐮
+##鐲
+##鐳
+##鐵
+##鐸
+##鐺
+##鑄
+##鑊
+##鑑
+##鑒
+##鑣
+##鑫
+##鑰
+##鑲
+##鑼
+##鑽
+##鑾
+##鑿
+##针
+##钉
+##钊
+##钎
+##钏
+##钒
+##钓
+##钗
+##钙
+##钛
+##钜
+##钝
+##钞
+##钟
+##钠
+##钡
+##钢
+##钣
+##钤
+##钥
+##钦
+##钧
+##钨
+##钩
+##钮
+##钯
+##钰
+##钱
+##钳
+##钴
+##钵
+##钺
+##钻
+##钼
+##钾
+##钿
+##铀
+##铁
+##铂
+##铃
+##铄
+##铅
+##铆
+##铉
+##铎
+##铐
+##铛
+##铜
+##铝
+##铠
+##铡
+##铢
+##铣
+##铤
+##铨
+##铩
+##铬
+##铭
+##铮
+##铰
+##铲
+##铵
+##银
+##铸
+##铺
+##链
+##铿
+##销
+##锁
+##锂
+##锄
+##锅
+##锆
+##锈
+##锉
+##锋
+##锌
+##锏
+##锐
+##锑
+##错
+##锚
+##锟
+##锡
+##锢
+##锣
+##锤
+##锥
+##锦
+##锭
+##键
+##锯
+##锰
+##锲
+##锵
+##锹
+##锺
+##锻
+##镀
+##镁
+##镂
+##镇
+##镉
+##镌
+##镍
+##镐
+##镑
+##镕
+##镖
+##镗
+##镛
+##镜
+##镣
+##镭
+##镯
+##镰
+##镳
+##镶
+##長
+##长
+##門
+##閃
+##閉
+##開
+##閎
+##閏
+##閑
+##閒
+##間
+##閔
+##閘
+##閡
+##関
+##閣
+##閥
+##閨
+##閩
+##閱
+##閲
+##閹
+##閻
+##閾
+##闆
+##闇
+##闊
+##闌
+##闍
+##闔
+##闕
+##闖
+##闘
+##關
+##闡
+##闢
+##门
+##闪
+##闫
+##闭
+##问
+##闯
+##闰
+##闲
+##间
+##闵
+##闷
+##闸
+##闹
+##闺
+##闻
+##闽
+##闾
+##阀
+##阁
+##阂
+##阅
+##阆
+##阇
+##阈
+##阉
+##阎
+##阐
+##阑
+##阔
+##阕
+##阖
+##阙
+##阚
+##阜
+##队
+##阡
+##阪
+##阮
+##阱
+##防
+##阳
+##阴
+##阵
+##阶
+##阻
+##阿
+##陀
+##陂
+##附
+##际
+##陆
+##陇
+##陈
+##陋
+##陌
+##降
+##限
+##陕
+##陛
+##陝
+##陞
+##陟
+##陡
+##院
+##陣
+##除
+##陨
+##险
+##陪
+##陰
+##陲
+##陳
+##陵
+##陶
+##陷
+##陸
+##険
+##陽
+##隅
+##隆
+##隈
+##隊
+##隋
+##隍
+##階
+##随
+##隐
+##隔
+##隕
+##隘
+##隙
+##際
+##障
+##隠
+##隣
+##隧
+##隨
+##險
+##隱
+##隴
+##隶
+##隸
+##隻
+##隼
+##隽
+##难
+##雀
+##雁
+##雄
+##雅
+##集
+##雇
+##雉
+##雋
+##雌
+##雍
+##雎
+##雏
+##雑
+##雒
+##雕
+##雖
+##雙
+##雛
+##雜
+##雞
+##離
+##難
+##雨
+##雪
+##雯
+##雰
+##雲
+##雳
+##零
+##雷
+##雹
+##電
+##雾
+##需
+##霁
+##霄
+##霆
+##震
+##霈
+##霉
+##霊
+##霍
+##霎
+##霏
+##霑
+##霓
+##霖
+##霜
+##霞
+##霧
+##霭
+##霰
+##露
+##霸
+##霹
+##霽
+##霾
+##靂
+##靄
+##靈
+##青
+##靓
+##靖
+##静
+##靚
+##靛
+##靜
+##非
+##靠
+##靡
+##面
+##靥
+##靦
+##革
+##靳
+##靴
+##靶
+##靼
+##鞅
+##鞋
+##鞍
+##鞏
+##鞑
+##鞘
+##鞠
+##鞣
+##鞦
+##鞭
+##韆
+##韋
+##韌
+##韓
+##韜
+##韦
+##韧
+##韩
+##韬
+##韭
+##音
+##韵
+##韶
+##韻
+##響
+##頁
+##頂
+##頃
+##項
+##順
+##須
+##頌
+##預
+##頑
+##頒
+##頓
+##頗
+##領
+##頜
+##頡
+##頤
+##頫
+##頭
+##頰
+##頷
+##頸
+##頹
+##頻
+##頼
+##顆
+##題
+##額
+##顎
+##顏
+##顔
+##願
+##顛
+##類
+##顧
+##顫
+##顯
+##顱
+##顴
+##页
+##顶
+##顷
+##项
+##顺
+##须
+##顼
+##顽
+##顾
+##顿
+##颁
+##颂
+##预
+##颅
+##领
+##颇
+##颈
+##颉
+##颊
+##颌
+##颍
+##颐
+##频
+##颓
+##颔
+##颖
+##颗
+##题
+##颚
+##颛
+##颜
+##额
+##颞
+##颠
+##颡
+##颢
+##颤
+##颦
+##颧
+##風
+##颯
+##颱
+##颳
+##颶
+##颼
+##飄
+##飆
+##风
+##飒
+##飓
+##飕
+##飘
+##飙
+##飚
+##飛
+##飞
+##食
+##飢
+##飨
+##飩
+##飪
+##飯
+##飲
+##飼
+##飽
+##飾
+##餃
+##餅
+##餉
+##養
+##餌
+##餐
+##餒
+##餓
+##餘
+##餚
+##餛
+##餞
+##餡
+##館
+##餮
+##餵
+##餾
+##饅
+##饈
+##饋
+##饌
+##饍
+##饑
+##饒
+##饕
+##饗
+##饞
+##饥
+##饨
+##饪
+##饬
+##饭
+##饮
+##饯
+##饰
+##饱
+##饲
+##饴
+##饵
+##饶
+##饷
+##饺
+##饼
+##饽
+##饿
+##馀
+##馁
+##馄
+##馅
+##馆
+##馈
+##馋
+##馍
+##馏
+##馒
+##馔
+##首
+##馗
+##香
+##馥
+##馨
+##馬
+##馭
+##馮
+##馳
+##馴
+##駁
+##駄
+##駅
+##駆
+##駐
+##駒
+##駕
+##駛
+##駝
+##駭
+##駱
+##駿
+##騁
+##騎
+##騏
+##験
+##騙
+##騨
+##騰
+##騷
+##驀
+##驅
+##驊
+##驍
+##驒
+##驕
+##驗
+##驚
+##驛
+##驟
+##驢
+##驥
+##马
+##驭
+##驮
+##驯
+##驰
+##驱
+##驳
+##驴
+##驶
+##驷
+##驸
+##驹
+##驻
+##驼
+##驾
+##驿
+##骁
+##骂
+##骄
+##骅
+##骆
+##骇
+##骈
+##骊
+##骋
+##验
+##骏
+##骐
+##骑
+##骗
+##骚
+##骛
+##骜
+##骞
+##骠
+##骡
+##骤
+##骥
+##骧
+##骨
+##骯
+##骰
+##骶
+##骷
+##骸
+##骼
+##髂
+##髅
+##髋
+##髏
+##髒
+##髓
+##體
+##髖
+##高
+##髦
+##髪
+##髮
+##髯
+##髻
+##鬃
+##鬆
+##鬍
+##鬓
+##鬚
+##鬟
+##鬢
+##鬣
+##鬥
+##鬧
+##鬱
+##鬼
+##魁
+##魂
+##魄
+##魅
+##魇
+##魍
+##魏
+##魔
+##魘
+##魚
+##魯
+##魷
+##鮑
+##鮨
+##鮪
+##鮭
+##鮮
+##鯉
+##鯊
+##鯖
+##鯛
+##鯨
+##鯰
+##鯽
+##鰍
+##鰓
+##鰭
+##鰲
+##鰻
+##鰾
+##鱈
+##鱉
+##鱔
+##鱗
+##鱷
+##鱸
+##鱼
+##鱿
+##鲁
+##鲈
+##鲍
+##鲑
+##鲛
+##鲜
+##鲟
+##鲢
+##鲤
+##鲨
+##鲫
+##鲱
+##鲲
+##鲶
+##鲷
+##鲸
+##鳃
+##鳄
+##鳅
+##鳌
+##鳍
+##鳕
+##鳖
+##鳗
+##鳝
+##鳞
+##鳥
+##鳩
+##鳳
+##鳴
+##鳶
+##鴉
+##鴕
+##鴛
+##鴦
+##鴨
+##鴻
+##鴿
+##鵑
+##鵜
+##鵝
+##鵡
+##鵬
+##鵰
+##鵲
+##鶘
+##鶩
+##鶯
+##鶴
+##鷗
+##鷲
+##鷹
+##鷺
+##鸚
+##鸞
+##鸟
+##鸠
+##鸡
+##鸢
+##鸣
+##鸥
+##鸦
+##鸨
+##鸪
+##鸭
+##鸯
+##鸳
+##鸵
+##鸽
+##鸾
+##鸿
+##鹂
+##鹃
+##鹄
+##鹅
+##鹈
+##鹉
+##鹊
+##鹌
+##鹏
+##鹑
+##鹕
+##鹘
+##鹜
+##鹞
+##鹤
+##鹦
+##鹧
+##鹫
+##鹭
+##鹰
+##鹳
+##鹵
+##鹹
+##鹼
+##鹽
+##鹿
+##麂
+##麋
+##麒
+##麓
+##麗
+##麝
+##麟
+##麥
+##麦
+##麩
+##麴
+##麵
+##麸
+##麺
+##麻
+##麼
+##麽
+##麾
+##黃
+##黄
+##黍
+##黎
+##黏
+##黑
+##黒
+##黔
+##默
+##黛
+##黜
+##黝
+##點
+##黠
+##黨
+##黯
+##黴
+##鼋
+##鼎
+##鼐
+##鼓
+##鼠
+##鼬
+##鼹
+##鼻
+##鼾
+##齁
+##齊
+##齋
+##齐
+##齒
+##齡
+##齢
+##齣
+##齦
+##齿
+##龄
+##龅
+##龈
+##龊
+##龋
+##龌
+##龍
+##龐
+##龔
+##龕
+##龙
+##龚
+##龛
+##龜
+##龟
+##︰
+##︱
+##︶
+##︿
+##﹁
+##﹂
+##﹍
+##﹏
+##﹐
+##﹑
+##﹒
+##﹔
+##﹕
+##﹖
+##﹗
+##﹙
+##﹚
+##﹝
+##﹞
+##﹡
+##﹣
+##!
+##"
+###
+##$
+##%
+##&
+##'
+##(
+##)
+##*
+##,
+##-
+##.
+##/
+##:
+##;
+##<
+##?
+##@
+##[
+##\
+##]
+##^
+##_
+##`
+##f
+##h
+##j
+##u
+##w
+##z
+##{
+##}
+##。
+##「
+##」
+##、
+##・
+##ッ
+##ー
+##イ
+##ク
+##シ
+##ス
+##ト
+##ノ
+##フ
+##ラ
+##ル
+##ン
+##゙
+##゚
+## ̄
+##¥
+##👍
+##🔥
+##😂
+##😎

+ 84 - 0
botr/nsp/predict.py

@@ -0,0 +1,84 @@
+import os
+from torch.utils.data import DataLoader
+from transformers import AutoTokenizer
+import torch
+from botr.nsp.model import ElectraNSPModel
+from botr.nsp.processing import NSPProcessor, collate_fn
+
+model_path = os.path.abspath(os.path.dirname(__file__)) + '/model/'
+max_seq_len = 32
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = ElectraNSPModel.from_pretrained(model_path)
+model.eval()
+batch_size = 8
+
+
+def nsp_predict(data_list, has_label=False):
+    if not has_label:
+        data_list = [x + [False] for x in data_list]
+
+    next_list = []
+    for prompt, next_sentence, label in data_list:
+        data_list = [prompt + '\t' + next_sentence + '\t0']
+        processor = NSPProcessor(data_list)
+        predict_dataset = processor.load_examples(max_seq_len, tokenizer, data_type='predict')
+        predict_dataloader = DataLoader(predict_dataset, batch_size=batch_size, collate_fn=collate_fn)
+
+        with torch.no_grad():
+            for step, batch in enumerate(predict_dataloader):
+                batch = tuple(t.to(device) for t in batch)
+                inputs = {"input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3]}
+                outputs = model(**inputs)
+        logits = outputs.logits
+
+        if logits[0, 0] < logits[0, 1]:
+            next_flag = True
+        else:
+            next_flag = False
+        print(prompt, next_sentence, label, next_flag)
+        next_list.append(next_flag)
+    return next_list
+
+
+if __name__ == '__main__':
+    # 包头市青山区人民政府 青山路办事处 地址:包头市青山区自由路1号
+    _list = [
+        ["青山路办事处", "我们", False],
+        ['包头市青山区人', '民政府', True],
+        ['民政府', '包头市青山区人', False],
+        ['地址:包头市青山区自由', '路1号', True],
+        ['路1号', '地址:包头市青山区自由', False],
+
+        ['采购人:', '地址', False],
+        ['地址', '联系人', False],
+        ['联系人', '电话', False],
+        ['电话', '传真', False],
+        ['传真', '电子邮件', False],
+
+        ['中国铁塔股份有限公司乌鲁木', '齐市分公司', True],
+        ['齐市分公司', '新疆乌鲁木齐市北京北路', False],
+        ['新疆乌鲁木齐市北京北路', '1999号万象天地商务公园1-1', True],
+        ['1999号万象天地商务公园1', '汤经理', False],
+        ['汤经理', '17799606095', False],
+
+        ['采购代理机', '构', True],
+        ['构', '地址', False],
+        ['地址', '新疆项目部', False],
+        ['新疆项目部', '联系人', False],
+        ['联系人', '电话', False],
+        ['电话', '传真', False],
+        ['传真', '电子邮箱', False],
+
+        ['公诚管理咨询有限公司', '广州市天河区中山大道西89号A栋9', False],
+        ['广州市天河区中山大道西89号A栋9', '层908-913房', True],
+        ['层908-913房', '新疆乌鲁木齐市沙依巴克区喀纳斯湖', False],
+        ['新疆乌鲁木齐市沙依巴克区喀纳斯湖', '北路455号新建软件园F1栋8层', True],
+        ['北路455号新建软件园F1栋8层', '李朝杰、王小丫、杨磊、马季青、姚锐', False],
+        ['李朝杰、王小丫、杨磊、马季青、姚锐', '锐、邹海燕、尉振军', True],
+        ['锐、邹海燕、尉振军', '17699660609、18699181606、', False],
+        ['17699660609、18699181606、', '13201381157、15292850990', True],
+    ]
+
+    nsp_predict(_list, has_label=True)

+ 294 - 0
botr/nsp/processing.py

@@ -0,0 +1,294 @@
+import copy
+import csv
+import json
+import logging
+import os
+import random
+import re
+import time
+
+import torch
+from torch.utils.data import TensorDataset
+
+logger = logging.getLogger(__name__)
+
+
+def collate_fn(batch):
+    """
+    batch should be a list of (sequence, target, length) tuples...
+    Returns a padded tensor of sequences sorted from longest to shortest,
+    """
+    all_input_ids, all_attention_mask, all_token_type_ids, all_labels = map(torch.stack, zip(*batch))
+    # max_len = max(all_lens).item()
+    # all_input_ids = all_input_ids[:, :max_len]
+    # all_attention_mask = all_attention_mask[:, :max_len]
+    # all_token_type_ids = all_token_type_ids[:, :max_len]
+    # all_labels = all_labels[:, :max_len]
+    return all_input_ids, all_attention_mask, all_token_type_ids, all_labels
+
+
+class NSPProcessor:
+    def __init__(self, data_path_or_list, limit=100000):
+        self.data_path = None
+        self.str_list = None
+        self.limit = limit
+        self.ratio = 0.99
+
+        if isinstance(data_path_or_list, str):
+            self.data_path = data_path_or_list
+        elif isinstance(data_path_or_list, list):
+            self.str_list = data_path_or_list
+            self.data = self.str_list
+
+        if self.data_path:
+            logging.info("Creating features from dataset file at %s", self.data_path)
+            with open(self.data_path, 'r') as f:
+                lines = f.readlines()
+            # random.shuffle(lines)
+            self.data = lines
+            # print('len(self.data)', len(self.data))
+
+    """Processor for the chinese ner data set."""
+    def get_train_examples(self):
+        """See base class."""
+        return self.create_examples("train")
+
+    def get_eval_examples(self):
+        """See base class."""
+        return self.create_examples("eval")
+
+    def get_predict_examples(self):
+        return self.create_examples("test")
+
+    def create_examples(self, set_type):
+        """Creates examples for the training and dev sets."""
+        if set_type == 'train':
+            random.shuffle(self.data)
+            self.data = self.data[:self.limit]
+            print('len(self.data)', len(self.data))
+        if set_type in ['train', 'eval']:
+            lines = [x[:-1] for x in self.data]
+            if set_type == 'train':
+                lines = lines[:int(len(lines)*self.ratio)]
+            else:
+                lines = lines[int(len(lines)*self.ratio):]
+        else:
+            lines = self.str_list
+
+        examples = []
+        for (i, line) in enumerate(lines):
+            ss = line.split('\t')
+            examples.append([ss[0], ss[1], ss[2]])
+        return examples
+
+    def load_examples(self, max_seq_len, tokenizer, data_type='train'):
+        sep_token = tokenizer.sep_token
+        cls_token = tokenizer.cls_token
+        pad_token = tokenizer.pad_token
+        # print(sep_token, cls_token, pad_token)
+
+        if data_type == 'train':
+            examples = self.get_train_examples()
+        elif data_type == 'eval':
+            examples = self.get_eval_examples()
+        else:
+            examples = self.get_predict_examples()
+
+        features = []
+        # print('len(examples)', len(examples))
+        if data_type == 'train':
+            print('loading example...')
+        for (ex_index, example) in enumerate(examples):
+            # if ex_index % 10000 == 0:
+            #     logging.info("loading example %d of %d", ex_index, len(examples))
+
+            a_tokens = tokenizer.tokenize(example[0])
+            b_tokens = tokenizer.tokenize(example[1])
+
+            # Account for [CLS] and [SEP] with "- 2".
+            special_tokens_count = 2
+            # Truncate or Padding
+            real_max_seq_len = int((max_seq_len - special_tokens_count) / 2)
+            if len(a_tokens) >= real_max_seq_len:
+                a_tokens = a_tokens[(len(a_tokens) - real_max_seq_len):]
+            else:
+                a_tokens += [pad_token] * (real_max_seq_len - len(a_tokens))
+            if len(b_tokens) >= real_max_seq_len:
+                b_tokens = b_tokens[:real_max_seq_len]
+            else:
+                b_tokens += [pad_token] * (real_max_seq_len - len(b_tokens))
+
+            tokens = [cls_token] + a_tokens + [sep_token] + b_tokens + [sep_token]
+            segment_ids = [0] + [1] * (real_max_seq_len + 1) + [2] * (real_max_seq_len + 1)
+            # print('segment_ids', segment_ids)
+            input_ids = tokenizer.convert_tokens_to_ids(tokens)
+            # print('input_ids', input_ids)
+
+            # The mask has 1 for real tokens and 0 for padding tokens. Only real
+            # tokens are attended to.
+            input_mask = [1 if x != pad_token else 0 for x in tokens]
+            label = int(example[2])
+            # if label == 0:
+            #     label = [0., 1.]
+            # else:
+            #     label = [1., 0.]
+            _dict = {
+                'input_ids': input_ids,
+                'input_mask': input_mask,
+                'segment_ids': segment_ids,
+                'label': label
+            }
+            features.append(_dict)
+
+        if data_type == 'train':
+            print('loading example finish!!!')
+
+        # Convert to Tensors and build dataset
+        all_input_ids = torch.tensor([f.get('input_ids') for f in features], dtype=torch.long)
+        all_input_mask = torch.tensor([f.get('input_mask') for f in features], dtype=torch.long)
+        all_segment_ids = torch.tensor([f.get('segment_ids') for f in features], dtype=torch.long)
+        all_label_ids = torch.tensor([f.get('label') for f in features], dtype=torch.long)
+        # all_lens = torch.tensor([f.get('input_len') for f in features], dtype=torch.long)
+        # dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_lens, all_label_ids)
+        dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
+        return dataset
+
+
+def raw_data_process(path):
+    with open(path, 'r') as f:
+        data_list = f.readlines()
+    # data_list = data_list[:100]
+    # print(data_list[0])
+
+
+def generate_train_data(raw_data_path):
+    with open(raw_data_path, 'r') as f:
+        raw_file = f.readlines()
+
+    # 提取表格中的文本
+    text_list = []
+    for raw_line in raw_file:
+        try:
+            table = eval(eval(raw_line))
+            text_list += [re.sub(' ', '', y) for x in table for y in x]
+        except:
+            continue
+    text_list = list(set(text_list))
+
+    # 过滤
+    temp_text = []
+    for t in text_list:
+        if len(t) <= 1 or not re.search('[\u4e00-\u9fa5]', t):
+            continue
+        t = re.sub('\t', '', t)
+        if random.choice([0, 1]):
+            temp_text.append(t[:30])
+        else:
+            temp_text.append(t[-30:])
+    text_list = temp_text
+    print('len(text_list)', len(text_list))
+
+    # 获取数据对
+    j = 0
+    start_time = time.time()
+    sentence_pairs = []
+    with open('nsp_src_data.txt', 'w') as f:
+        f.write('')
+    for text in text_list:
+        if j % 100000 == 0:
+            print('j', j, len(text_list), time.time()-start_time)
+            start_time = time.time()
+            if sentence_pairs:
+                temp_list = []
+                for sen in sentence_pairs:
+                    if len(sen[0]) > 1 and len(sen[1]) > 1:
+                        temp_list.append(sen)
+                    elif len(sen[0]) >= 1 and re.search('[\u4e00-\u9fa5]', sen[0]) or len(sen[1]) >= 1 and re.search('[\u4e00-\u9fa5]', sen[1]):
+                        temp_list.append(sen)
+
+                sentence_pairs = temp_list
+                sentence_pairs = [str(x[0]) + '\t' + str(x[1]) + '\t' + str(x[2]) + '\n' for x in sentence_pairs]
+                with open('nsp_src_data.txt', 'a') as f:
+                    f.writelines(sentence_pairs)
+                sentence_pairs = []
+        j += 1
+
+        # 正样本
+        for i in range(len(text)-1):
+            if re.search('[\u4e00-\u9fa5]', text[i+1]) \
+                    and re.search('[\u4e00-\u9fa5]', text[i+1:]) \
+                    and re.search('[\u4e00-\u9fa5]', text[:i+1]):
+                sentence_pairs.append([text[:i+1], text[i+1:], 1])
+                sentence_pairs.append([text[random.randint(0, i):i+1], text[i+1:random.randint(i+1, len(text))], 1])
+                sentence_pairs.append([text[i+1:], text[:i+1], 0])
+                sentence_pairs.append([text[i+1:random.randint(i+1, len(text))], text[random.randint(0, i):i+1], 0])
+                max_k = random.randint(0, 3)
+                k = 0
+                while True:
+                    if k >= max_k:
+                        break
+                    rand_t = random.sample(text_list, 1)[0][:random.randint(1, 16)]
+                    if re.search('[\u4e00-\u9fa5]', rand_t):
+                        sentence_pairs.append([text[random.randint(0, i):i+1], rand_t, 0])
+                    rand_t = random.sample(text_list, 1)[0][:random.randint(1, 16)]
+                    if re.search('[\u4e00-\u9fa5]', rand_t):
+                        sentence_pairs.append([rand_t, text[i+1:random.randint(i+1, len(text))], 0])
+                    k += 1
+                rand_index = random.randint(1, 5)
+                if len(text[:i+1]) > rand_index and len(text[i+1:]) > rand_index \
+                        and re.search('[\u4e00-\u9fa5]', text[rand_index:i+1]) \
+                        and re.search('[\u4e00-\u9fa5]', text[i+1:len(text)-rand_index]):
+                    sentence_pairs.append([text[rand_index:i+1], text[i+1:len(text)-rand_index], 1])
+                    sentence_pairs.append([text[i+1:len(text)-rand_index], text[rand_index:i+1], 0])
+
+        # 负样本
+        # for i in range(len(text)-1):
+        #     t = random.sample(text_list, 1)[0]
+        #     if t == text:
+        #         continue
+        #     if random.choice([0, 1]):
+        #         sentence_pairs.append([text, t, 0])
+        #     else:
+        #         sentence_pairs.append([t, text, 0])
+
+    temp_list = []
+    for sen in sentence_pairs:
+        if len(sen[0]) > 0 and len(sen[1]) > 0:
+            temp_list.append(sen)
+        elif len(sen[0]) >= 1 and re.search('[\u4e00-\u9fa5]', sen[0]) or len(sen[1]) >= 1 and re.search('[\u4e00-\u9fa5]', sen[1]):
+            temp_list.append(sen)
+    sentence_pairs = temp_list
+    sentence_pairs = [str(x[0]) + '\t' + str(x[1]) + '\t' + str(x[2]) + '\n' for x in sentence_pairs]
+    with open('nsp_src_data.txt', 'a') as f:
+        f.writelines(sentence_pairs)
+    return
+
+
+def clean_train_data():
+    with open('nsp_src_data.txt', 'r') as f:
+        _list = f.readlines()
+
+    _list = [json.dumps(x) for x in _list]
+    _list = list(set(_list))
+    _list = [json.loads(x) for x in _list]
+
+    new_list = []
+    for l in _list:
+        ss = l[:-1].split('\t')
+        ss = list(set(ss))
+        if '' in ss:
+            ss.remove('')
+        if len(ss) == 3:
+            new_list.append(l)
+
+    with open('nsp_src_data.txt', 'w') as f:
+        f.writelines(new_list)
+
+
+if __name__ == '__main__':
+    # raw_data_process('datasets/product_ner/ZOL_PRODUCE_INFO.csv')
+
+    generate_train_data(r'D:\Project\borderless-table-detect\torch_version\sentence_match\label_table_head_info.txt')
+
+    # clean_train_data()
+    # print('\t\tb'.split('\t'))

+ 3188 - 0
botr/rules/get_table_by_bbox.py

@@ -0,0 +1,3188 @@
+import base64
+import copy
+import json
+import logging
+import math
+import random
+import re
+import traceback
+from glob import glob
+import cv2
+from sklearn.cluster import AffinityPropagation, DBSCAN
+
+# from tensorflow_version.table_head_predict import predict
+from botr.utils import request_post, line_iou, pil_resize, get_best_predict_size2, line_overlap
+import jieba
+import numpy as np
+from matplotlib import pyplot as plt
+
+
+def _plot(_line_list, mode=1):
+    for _line in _line_list:
+        if mode == 1:
+            x0, y0, x1, y1 = _line.__dict__.get("bbox")
+        elif mode == 2:
+            x0, y0, x1, y1 = _line
+        elif mode == 3:
+            x0, y0 = _line[0]
+            x1, y1 = _line[1]
+        plt.plot([x0, x1], [y0, y1])
+    plt.show()
+    return
+
+
+def get_table_by_rule2(img, text_list, bbox_list, table_location, is_test=0):
+    # 处理bbox,缩小框
+    bbox_list = shrink_bbox(img, bbox_list)
+
+    # 创建对应dict
+    bbox_text_dict = {}
+    for i in range(len(text_list)):
+        bbox_text_dict[str(bbox_list[i])] = text_list[i]
+
+    # 获取全局的按行排列bbox
+    row_list = get_table_rows(bbox_list, bbox_text_dict)
+
+    if len(row_list) == 0:
+        return [], [], []
+
+    # 删除只有一个bbox的第一行和最后一行
+    if len(row_list[0]) == 1:
+        table_location = [table_location[0], row_list[0][0][2][1],
+                          table_location[2], table_location[3]]
+        row_list = row_list[1:]
+    if len(row_list[-1]) == 1:
+        table_location = [table_location[0], table_location[1],
+                          table_location[2], row_list[-1][0][0][1]]
+        row_list = row_list[:-1]
+
+    # 获取表格区域,以及区域里的按行排列bbox
+    table_location_list = [[[int(table_location[0]), int(table_location[1])], [int(table_location[2]), int(table_location[3])]]]
+    area_row_list = [row_list]
+
+    area_row_list = merge_row_bbox_list(area_row_list)
+
+    # 获取全局的按列排列bbox
+    area_col_list = get_table_cols(bbox_list, table_location_list)
+
+    # 获取行线、列线
+    area_row_lines, area_col_lines = get_table_borders(area_row_list, area_col_list, table_location_list)
+
+    if is_test:
+        _plot(area_row_lines[0] + area_col_lines[0], mode=3)
+
+    # 判断列线合法
+    area_col_lines = judge_col_lines(img, area_col_lines, table_location_list, bbox_list, bbox_text_dict)
+
+    # 判断行线合法
+    area_row_lines = judge_row_lines(img, area_row_lines, table_location_list, bbox_list, bbox_text_dict)
+
+    if is_test:
+        _plot(area_row_lines[0] + area_col_lines[0], mode=3)
+
+    # 由线得到按行列排列的bbox
+    area_table_bbox_list, area_table_cell_list = get_table_bbox_list(img, area_row_lines, area_col_lines, table_location_list, bbox_list)
+
+    if is_test:
+        for a in area_table_cell_list:
+            for r in a:
+                for c in r:
+                    cv2.rectangle(img, c[0], c[1], (255, 0, 0), 1)
+                    # for b in c:
+                    #     cv2.rectangle(img, [int(b[0][0]), int(b[0][1])], [int(b[2][0]), int(b[2][1])], (255, 0, 0), 1)
+        cv2.imshow('table_cell', img)
+        cv2.waitKey(0)
+
+    # 展示
+    if is_test:
+        show_result(img, bbox_list, area_row_lines, area_col_lines, table_location_list)
+
+    if not area_row_lines or not area_col_lines:
+        return [], [], []
+
+    line_list = [[x[0][0], x[0][1], x[1][0], x[1][1]] for x in area_row_lines[0] + area_col_lines[0]]
+    cell_list = area_table_cell_list[0]
+    return line_list, cell_list, table_location
+
+
+def get_table_by_rule(img, text_list, bbox_list, table_location, is_test=1):
+    # 处理bbox,缩小框
+    bbox_list = shrink_bbox(img, bbox_list)
+
+    # 创建对应dict
+    bbox_text_dict = {}
+    for i in range(len(text_list)):
+        bbox_text_dict[str(bbox_list[i])] = text_list[i]
+
+    # 根据bbox_list,计算与table_location左上角坐标距离,锁定第一个bbox
+    table_left_up_point = [table_location[0], table_location[1]]
+    min_distance = 100000000000
+    first_bbox = bbox_list[0]
+    for bbox in bbox_list:
+        distance = abs(bbox[0][0] - table_left_up_point[0]) + abs(bbox[0][1] - table_left_up_point[1])
+        if distance < min_distance:
+            min_distance = distance
+            first_bbox = bbox
+
+    # 对first_bbox预处理
+    # 分割
+    new_bbox_list, bbox_text_dict = split_bbox(img, first_bbox, bbox_text_dict)
+    if new_bbox_list:
+        if first_bbox in bbox_list:
+            bbox_list.remove(first_bbox)
+        bbox_list += new_bbox_list
+        new_bbox_list.sort(key=lambda x: (x[0][0]))
+        first_bbox = new_bbox_list[0]
+
+    # 根据第一个bbox,得到第一行
+    first_row = []
+    bbox_list.sort(key=lambda x: (x[0][1], x[0][0]))
+    for bbox in bbox_list:
+        # h有交集
+        if first_bbox[0][1] <= bbox[0][1] <= first_bbox[2][1] \
+                or first_bbox[0][1] <= bbox[2][1] <= first_bbox[2][1] \
+                or bbox[0][1] <= first_bbox[0][1] <= bbox[2][1] \
+                or bbox[0][1] <= first_bbox[2][1] <= bbox[2][1]:
+            first_row.append(bbox)
+        # h小于first_box
+        elif bbox[2][1] <= first_bbox[0][1]:
+            first_row.append(bbox)
+
+    # 对第一行分列
+    first_row.sort(key=lambda x: (x[0][0], x[0][1]))
+    first_row_col = []
+    used_bbox = []
+    for bbox in first_row:
+        if bbox in used_bbox:
+            continue
+        temp_col = []
+        for bbox1 in first_row:
+            if bbox1 in used_bbox:
+                continue
+            if bbox1[0][0] <= bbox[0][0] <= bbox1[2][0] \
+                    or bbox1[0][0] <= bbox[2][0] <= bbox1[2][0] \
+                    or bbox[0][0] <= bbox1[0][0] <= bbox[2][0] \
+                    or bbox[0][0] <= bbox1[2][0] <= bbox[2][0]:
+                temp_col.append(bbox1)
+                used_bbox.append(bbox1)
+        first_row_col.append(temp_col)
+
+    # 根据第一个bbox,得到第一列
+    first_col = []
+    bbox_list.sort(key=lambda x: (x[0][0], x[0][1]))
+    for bbox in bbox_list:
+        # w有交集
+        if first_bbox[0][0] <= bbox[0][0] <= first_bbox[2][0] \
+                or first_bbox[0][0] <= bbox[2][0] <= first_bbox[2][0] \
+                or bbox[0][0] <= first_bbox[0][0] <= bbox[2][0] \
+                or bbox[0][0] <= first_bbox[2][0] <= bbox[2][0]:
+            first_col.append(bbox)
+        # w小于first_box
+        elif bbox[2][0] <= first_bbox[0][0]:
+            first_col.append(bbox)
+
+    # 对第一列分行
+    first_col.sort(key=lambda x: (x[0][1], x[0][0]))
+    first_col_row = []
+    current_bbox = first_col[0]
+    temp_row = []
+    for bbox in first_col:
+        if current_bbox[0][1] <= bbox[0][1] <= current_bbox[2][1] \
+                or current_bbox[0][1] <= bbox[2][1] <= current_bbox[2][1] \
+                or bbox[0][1] <= current_bbox[0][1] <= bbox[2][1] \
+                or bbox[0][1] <= current_bbox[2][1] <= bbox[2][1]:
+            temp_row.append(bbox)
+        else:
+            if temp_row:
+                temp_row.sort(key=lambda x: x[0][1])
+                first_col_row.append(temp_row)
+            temp_row = [bbox]
+            current_bbox = bbox
+    if temp_row:
+        temp_row.sort(key=lambda x: x[0][1])
+        first_col_row.append(temp_row)
+
+    print('len(first_row)', len(first_row))
+    print('first_row', [bbox_text_dict.get(str(x)) for x in first_row])
+    print('first_col', [bbox_text_dict.get(str(x)) for x in first_col])
+    print('len(first_col)', len(first_col))
+    print('len(first_row_col)', len(first_row_col))
+    print('len(first_col_row)', len(first_col_row))
+
+    # 划线 列
+    col_line_list = []
+    for col in first_row_col:
+        # 画2条线,根据左右bbox
+        min_w, max_w = 1000000, 0
+        print('col', [bbox_text_dict.get(str(x)) for x in col])
+        for bbox in col:
+            if bbox[0][0] < min_w:
+                min_w = bbox[0][0]
+            if bbox[2][0] > max_w:
+                max_w = bbox[2][0]
+        col_line_list.append([min_w, table_location[1], min_w, table_location[3]])
+        col_line_list.append([max_w, table_location[1], max_w, table_location[3]])
+
+    # 划线 行
+    row_line_list = []
+    last_max_h = None
+    for row in first_col_row:
+        # 画3条线,根据上下bbox
+        min_h, max_h = 1000000, 0
+        for bbox in row:
+            if bbox[0][1] < min_h:
+                min_h = bbox[0][1]
+            if bbox[2][1] > max_h:
+                max_h = bbox[2][1]
+        row_line_list.append([table_location[0], min_h, table_location[2], min_h])
+        row_line_list.append([table_location[0], max_h, table_location[2], max_h])
+        # if last_max_h:
+        #     row_line_list.append([table_location[0], int((min_h+last_max_h)/2), table_location[2], int((min_h+last_max_h)/2)])
+        last_max_h = max_h
+
+    print('len(col_line_list)', len(col_line_list))
+    print('col_line_list', col_line_list)
+    print('len(row_line_list)', len(row_line_list))
+
+    # 判断列线有没有压在黑色像素上,若有则移动
+    temp_list = []
+    for i in range(1, len(col_line_list), 2):
+        # 前一列右边线
+        line1 = col_line_list[i]
+        line1 = [int(x) for x in line1]
+        # 后一列左边线
+        if i+1 >= len(col_line_list):
+            break
+        line2 = col_line_list[i+1]
+        line2 = [int(x) for x in line2]
+
+        max_black_cnt = 10
+        black_threshold = 150
+        black_cnt2 = count_black(img[line2[1]:line2[3], line2[0]:line2[2]+1, :], threshold=black_threshold)
+        print('col black_cnt2', black_cnt2)
+        if black_cnt2 <= max_black_cnt:
+            temp_list.append(line2)
+        else:
+            black_cnt1 = count_black(img[line1[1]:line1[3], line1[0]:line1[2]+1, :], threshold=black_threshold)
+            print('col black_cnt1', black_cnt1)
+            if black_cnt1 <= max_black_cnt:
+                temp_list.append(line1)
+            else:
+                # 两条线都不符合,从右向左移寻找
+                for j in range(line2[0], line1[0], -1):
+                    black_cnt = count_black(img[line1[1]:line1[3], j:j+1, :], threshold=black_threshold)
+                    print('col black_cnt', black_cnt)
+                    if black_cnt <= max_black_cnt:
+                        temp_list.append([j, line2[1], j, line2[3]])
+                        break
+    col_line_list = temp_list
+
+    # 根据列的划线对bbox分列
+    last_line = [0, 0, 0, 0]
+    col_bbox_list = []
+    # used_bbox_list = []
+    for line in col_line_list + [[img.shape[0], 0, img.shape[0], 0]]:
+        col = []
+        for bbox in bbox_list:
+            # if bbox in used_bbox_list:
+            #     continue
+            # print('last_line, line, bbox', last_line, line, bbox)
+            iou = line_iou([[last_line[0], 0], [line[0], 0]], [[bbox[0][0], 0], [bbox[2][0], 0]], axis=0)
+            if iou >= 0.6:
+                col.append(bbox)
+                # used_bbox_list.append(bbox)
+        col.sort(key=lambda x: x[0][1])
+        col_bbox_list.append(col)
+        last_line = line
+
+    # 判断行线
+    temp_list = []
+    for i in range(1, len(row_line_list), 2):
+        # 前一行下边线
+        line1 = row_line_list[i]
+        line1 = [int(x) for x in line1]
+        # 后一行上边线
+        if i+1 >= len(row_line_list):
+            break
+        line2 = row_line_list[i+1]
+        line2 = [int(x) for x in line2]
+
+        # 判断行线之间的bbox分别属于哪一行
+        sub_bbox_list = []
+        threshold = 5
+        for bbox in bbox_list:
+            if line1[1] - threshold <= bbox[0][1] <= bbox[2][1] <= line2[1]+threshold:
+                sub_bbox_list.append(bbox)
+
+        # 根据行的h和分列判断bbox属于上一行还是下一行
+        line1_bbox_list = []
+        line2_bbox_list = []
+        if sub_bbox_list:
+            sub_bbox_list.sort(key=lambda x: x[0][1])
+            min_h = sub_bbox_list[0][0][1] - 1
+            max_h = sub_bbox_list[-1][2][1] + 1
+        for bbox in sub_bbox_list:
+            # 找到属于哪一列
+            current_col = None
+            for col in col_bbox_list:
+                if bbox in col:
+                    current_col = copy.deepcopy(col)
+                    break
+            if current_col:
+                # 行做成bbox加入列作为基准
+                line1_bbox = [[0, min_h], [], [0, min_h], []]
+                line2_bbox = [[0, max_h], [], [0, max_h], []]
+                current_col += [line1_bbox, line2_bbox]
+                current_col.sort(key=lambda x: x[0][1])
+                bbox_index = current_col.index(bbox)
+                line1_bbox_index = current_col.index(line1_bbox)
+                line2_bbox_index = current_col.index(line2_bbox)
+                print('current_col', [bbox_text_dict.get(str(x)) for x in current_col])
+                print('line1_bbox_index, bbox_index, line2_bbox_index', line1_bbox_index, bbox_index, line2_bbox_index)
+                # 计算距离
+                distance1 = 10000
+                for index in range(line1_bbox_index, bbox_index):
+                    h1 = (current_col[index][0][1] + current_col[index][2][1]) / 2
+                    h2 = (current_col[index+1][0][1] + current_col[index+1][2][1]) / 2
+                    # print(bbox_text_dict.get())
+                    distance1 = abs(h1 - h2)
+                distance2 = 10000
+                for index in range(line2_bbox_index, bbox_index, -1):
+                    h1 = (current_col[index][0][1] + current_col[index][2][1]) / 2
+                    h2 = (current_col[index-1][0][1] + current_col[index-1][2][1]) / 2
+                    distance2 = abs(h1 - h2)
+
+                print(bbox_text_dict.get(str(bbox)), distance1, distance2)
+                ratio = 1.5
+                # 属于下一行
+                if distance1 >= distance2 * ratio or distance1 >= distance2 + 8:
+                    line2_bbox_list.append(bbox)
+                # 属于上一行
+                elif distance2 >= distance1 * ratio or distance2 >= distance1 + 8:
+                    line1_bbox_list.append(bbox)
+                else:
+                    print('距离不明确,需要nsp模型介入判断')
+
+        if line1_bbox_list:
+            print('line1_bbox_list', [bbox_text_dict.get(str(x)) for x in line1_bbox_list])
+            line1_bbox_list.sort(key=lambda x: x[0][1])
+            b = line1_bbox_list[-1]
+            line1 = [line1[0], b[2][1], line1[2], b[2][1]]
+        if line2_bbox_list:
+            print('line2_bbox_list', [bbox_text_dict.get(str(x)) for x in line2_bbox_list])
+            line2_bbox_list.sort(key=lambda x: x[0][1])
+            b = line2_bbox_list[0]
+            line2 = [line2[0], b[0][1], line2[2], b[0][1]]
+
+        _line = [line1[0], (line1[1]+line2[1])/2, line1[2], (line1[3]+line2[3])/2]
+        _line = [int(x) for x in _line]
+        temp_list.append(_line)
+    row_line_list = temp_list
+
+    # 加上表格轮廓线
+    row_line_list.append([table_location[0], table_location[1], table_location[2], table_location[1]])
+    row_line_list.append([table_location[0], table_location[3], table_location[2], table_location[3]])
+    col_line_list.append([table_location[0], table_location[1], table_location[0], table_location[3]])
+    col_line_list.append([table_location[2], table_location[1], table_location[2], table_location[3]])
+
+    # 由线得到按行列排列的bbox
+    area_table_bbox_list, area_table_cell_list = get_table_bbox_list(img, [row_line_list], [col_line_list], [table_location], bbox_list)
+
+    # show
+    if is_test:
+        for line in col_line_list:
+            cv2.line(img, (int(line[0]), int(line[1])), (int(line[2]), int(line[3])), (0, 0, 255), 2)
+        for line in row_line_list:
+            cv2.line(img, (int(line[0]), int(line[1])), (int(line[2]), int(line[3])), (255, 0, 0), 2)
+        cv2.namedWindow('img', cv2.WINDOW_NORMAL)
+        cv2.imshow('img', cv2.resize(img, (768, 1024)))
+        cv2.waitKey(0)
+    return [], [], []
+
+
+def split_bbox_by_kmeans(img, bbox, bbox_text_dict):
+    sub_img = img[int(bbox[0][1]):int(bbox[2][1]), int(bbox[0][0]):int(bbox[2][0]), :]
+
+    # 从左至右扫描
+
+
+def get_table():
+    # 1. 一个单元格多行合并需解决                                            √
+    # 2. 一行多个单字合并 1007.jpg                                         √
+    # 3. ocr识别错误bbox剔除
+    # 4. 上下表格合并 距离近,列数一样,或只少了第一列 1005.jpg 1014.jpg 1033.jpg √
+    # 5. 相近行列线合并 1020.jpg 1025.jpg 1054.jpg 1068.jpg
+    # 6. 行线在合并bbox中间,需向上或向下移动 105.jpg 1054.jpg 1020.jpg
+    # 7. 贴着左边框的长bbox也当做标题分开表格 1047.jpg 1059.jpg                √
+    # 8. 判断非规整表格,单个单元格多个bbox,排除上下连接的bbox 105.jpg
+    # 9. 判断非规整表格,ocr识别漏,黑色像素多 1050.jpg                         √
+    # 10. 第一列序号ocr识别漏 1051.jpg
+    # 11. 用其他列作为分行标准,作为辅助,挑平均间隔最大的,行数也够的列 1085.jpg
+    # 12. 判断表格 两个bbox靠的太近的不能作为开始行 1106.jpg                     √
+    # 13. 列中所有行间隔都很小,聚类距离统一值 1098.jpg                          √
+    # 14. 漏列(需剔除表格中非表格部分) 1059.jpg
+    # 15. 漏行 1064.jpg 1065.jpg 1067.jpg 1085.jpg 1097.jpg 1101.jpg      √
+    # 16. 表格分割错误 1045.jpg 1051.jpg 1078.jpg 1079.jpg                  √
+    # 17. 分列时,第一行的表头选定 1051.jpg 1106.jpg 1129.jpg
+    # 18. 分割同一行中多个列 1093.jpg 1095.jpg 110.jpg
+    # 19. 表格漏了 1119.jpg 1141.jpg
+    # 20. 非规整表格判断错误,黑色像素 1122.jpg 1121.jpg                        √
+    # 21. 分列错误 1125.jpg 1158.jpg 1020.jpg                               √
+    # 22. 分行分列错误(需在第一列排除过长bbox) 1131.jpg 1132.jpg               √
+    #       1135.jpg 1136.jpg 1147.jpg
+    # 23. 表格范围外,与单元格内的文字上下相连 1134.jpg 1142.jpg
+    # 24. 第一列空单元格太多可列为非规整
+    # 25. 竖线跨越多个bbox的较中心位置,考虑剔除
+    # 26. 竖线跨越bbox,考虑竖线缩短,将跨越的那一截去掉 1020.jpg
+    # 27. 竖线插在一列中间,需调整其向右找到空白位置 1023.jpg
+
+    # label_path = glob('../data/borderless_tables/*_label.jpg')
+    # temp_label_path = []
+    # label_row_dict = {}
+    # for p in label_path:
+    #     img = cv2.imread(p)
+    #     row_img, col_img = get_lines_from_img(img)
+    #     label_row_list, is_standard = get_bbox_by_img(row_img, col_img)
+    #     label_row_dict[p] = label_row_list
+    #     if is_standard:
+    #         temp_label_path.append(p)
+    # label_path = temp_label_path
+    # print('len(label_path)', len(label_path))
+    # for p in label_path:
+    #     print(p)
+
+    with open('standard_table.txt', 'r') as f:
+        label_path_list = f.readlines()
+
+
+    # paths = glob('../data/borderless_tables/1.jpg') # merge_row
+    # paths = glob('../data/borderless_tables/5.jpg') # title
+    # paths = glob('../data/borderless_tables/26.jpg') # merge_col
+
+    paths = glob('../data/borderless_tables/59.jpg') # split bbox
+    paths = glob('../../hrnet-pytorch-main/my_dataset/borderless_tables/62.jpg')
+
+    # paths = glob('../data/borderless_tables/57.jpg')
+    paths = glob('../../hrnet-pytorch-main/my_dataset/borderless_tables/3.jpg') # not standard table
+
+    # paths = glob(r'C:\Users\Administrator\Desktop\test_pdf_table\1.png')
+    # label_path_list.append(r'C:\Users\Administrator\Desktop\test_pdf_table\1_label.jpg\n')
+
+    paths = glob('../data/borderless_tables/*.jpg')
+    # paths = glob('../data/standard_tables/*.jpg')
+    path_cnt = 0
+    all_teds = 0
+    all_standard_cnt = 0
+    for p in paths:
+        if 'label' in p:
+            continue
+
+        label_p = p[:-4] + '_label.jpg\n'
+        if label_p not in label_path_list:
+            continue
+
+        # if path_cnt <= 10:
+        #     path_cnt += 1
+        #     continue
+
+        path_cnt += 1
+
+        img = cv2.imread(p)
+
+        result = test_ocr_model(p)
+        print(p)
+        # print(result)
+        bbox_list = eval(result.get('bbox'))
+        text_list = eval(result.get('text'))
+        bbox_text_dict = {}
+        for i in range(len(text_list)):
+            bbox_text_dict[str(bbox_list[i])] = text_list[i]
+
+        # split_bbox(img, text_list, bbox_list)
+
+        # 获取全局的按行排列bbox
+        row_list = get_table_rows(bbox_list)
+
+        # bbox预处理
+        bbox_list, text_list, bbox_text_dict = bbox_preprocess(bbox_list, text_list, row_list, bbox_text_dict)
+
+        # bbox处理后再按行排列bbox
+        row_list = get_table_rows(bbox_list)
+
+        # 获取表格区域,以及区域里的按行排列bbox
+        table_location_list, area_row_list = get_table_location(row_list)
+
+        # 表格分割
+        table_location_list, area_row_list = split_table(table_location_list, area_row_list, bbox_text_dict)
+        table_location_list, area_row_list = split_table(table_location_list, area_row_list, bbox_text_dict)
+        print('fix_table_location_list', table_location_list)
+        # print('fix_area_row_list', area_row_list)
+
+        # 获取表格区域里,按列排序bbox
+        area_col_list = get_table_cols(bbox_list, table_location_list)
+
+        # 合并一列中多行bbox
+        area_row_list = merge_col_bbox_by_block(img, area_row_list, area_col_list, bbox_text_dict, bbox_list, table_location_list)
+
+        # 排除非规整表格
+        table_standard_list = delete_not_standard_table(img, area_row_list, area_col_list, table_location_list, bbox_list, bbox_text_dict)
+
+        # 上下表格合并
+        area_row_list, area_col_list, table_location_list = merge_table(area_row_list, area_col_list, table_location_list, bbox_list)
+
+        # 获取行线、列线
+        area_row_lines, area_col_lines = get_table_borders(area_row_list, area_col_list, table_location_list)
+
+        # 根据行列线生成对应bbox行列
+        area_row_list = get_bbox_list_by_lines(img, area_row_lines, area_col_lines, table_location_list, bbox_list)
+
+        # 添加列线
+        add_area_col_lines = add_col_lines(area_row_list, area_col_list, table_location_list, bbox_text_dict)
+
+        for j in range(len(area_col_lines)):
+            area_col_lines[j] += add_area_col_lines[j]
+
+        # 判断列线合法
+        area_col_lines = judge_col_lines(img, area_col_lines, table_location_list, bbox_list, bbox_text_dict)
+
+        area_col_list = get_bbox_list_by_lines(img, area_row_lines, area_col_lines, table_location_list, bbox_list, axis=1)
+        area_row_list = get_bbox_list_by_lines(img, area_row_lines, area_col_lines, table_location_list, bbox_list, axis=0)
+        #
+        # for a in area_col_list:
+        #     for c in a:
+        #         print('area_col_list', [bbox_text_dict.get(str(x)) for x in c])
+        #
+        # # 合并一列中多行bbox
+        # area_row_list = merge_col_bbox_by_block(img, area_row_list, area_col_list, bbox_text_dict, bbox_list, table_location_list)
+        #
+        # # 获取行线、列线
+        # area_row_lines, area_col_lines = get_table_borders(area_row_list, area_col_list, table_location_list)
+        #
+        # add_area_col_lines = add_col_lines(area_row_list, area_col_list, table_location_list, bbox_text_dict)
+        #
+        # for j in range(len(area_col_lines)):
+        #     area_col_lines[j] += add_area_col_lines[j]
+        #
+        # area_col_lines = judge_col_lines(img, area_col_lines, table_location_list, bbox_list)
+        #
+        # area_col_list = get_bbox_list_by_lines(img, area_row_lines, area_col_lines, table_location_list, bbox_list, axis=1)
+        # area_row_list = get_bbox_list_by_lines(img, area_row_lines, area_col_lines, table_location_list, bbox_list, axis=0)
+        #
+        #
+        add_area_row_lines = add_row_lines(area_row_list, area_col_list, table_location_list, bbox_text_dict, area_row_lines)
+
+        for j in range(len(area_row_lines)):
+            area_row_lines[j] += add_area_row_lines[j]
+        #
+        area_row_lines = judge_row_lines(img, area_row_lines, table_location_list, bbox_list, bbox_text_dict)
+
+        # 合并相近线
+        for j in range(len(area_col_lines)):
+            area_col_lines[j] = merge_lines(area_col_lines[j], axis=1)
+            area_row_lines[j] = merge_lines(area_row_lines[j], axis=0)
+
+        # area_col_lines = judge_col_lines(img, area_col_lines, table_location_list, bbox_list)
+        # area_row_lines = judge_row_lines(img, area_row_lines, table_location_list, bbox_list)
+
+
+        # # 重新生成
+        # table_location_list = []
+        # temp_area_row_list = []
+        # for temp_row_list in area_row_list:
+        #     location_list, temp_row_list = get_table_location(temp_row_list)
+        #     table_location_list += location_list
+        #     temp_area_row_list += temp_row_list
+        # area_col_list = get_table_cols(bbox_list, table_location_list)
+        # area_row_list = temp_area_row_list
+        #
+        # # 获取行线、列线
+        # area_row_lines, area_col_lines = get_table_borders(area_row_list, area_col_list, table_location_list)
+        #
+        # print('len(table_location_list)', len(table_location_list))
+
+        # for bbox in bbox_list:
+        #     cv2.rectangle(img, (int(bbox[0][0]), int(bbox[0][1])), (int(bbox[2][0]), int(bbox[2][1])),
+        #                   (0, 0, 255), 1)
+        #
+        # for i in range(len(table_location_list)):
+        #     # location = table_location_list[i]
+        #     # cv2.rectangle(img, location[0], location[1], (0, 255, 0), 1)
+        #
+        #     row_lines = area_row_lines[i]
+        #     col_lines = area_col_lines[i]
+        #     for r in row_lines:
+        #         cv2.line(img, r[0], r[1], (0, 255, 0), 1)
+        #     for c in col_lines:
+        #         cv2.line(img, c[0], c[1], (0, 255, 0), 1)
+        #
+        # cv2.imshow('img', img)
+        # cv2.waitKey(0)
+
+        # 计算标注表格和生成表格的相似度
+        if len(table_location_list) == 1:
+            # if not table_standard_list[0]:
+            #     continue
+
+            row_lines = area_row_lines[0]
+            col_lines = area_col_lines[0]
+            row_img = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
+            col_img = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
+            for r in row_lines:
+                cv2.line(row_img, r[0], r[1], (255, 255, 255), 1)
+            for c in col_lines:
+                cv2.line(col_img, c[0], c[1], (255, 255, 255), 1)
+
+            row_list, is_standard = get_bbox_by_img(row_img, col_img)
+            if not is_standard:
+                continue
+            row_list = merge_text_and_table(bbox_list, row_list)
+
+            continue_flag = 0
+            for row in row_list:
+                for b in row:
+                    if len(b) > 1:
+                        continue_flag = 1
+                        break
+            if continue_flag:
+                continue
+
+            max_len = 1
+            continue_flag = 0
+            for row in row_list:
+                if abs(max_len - len(row)) > 2:
+                    continue_flag = 1
+                    break
+                if len(row) > max_len:
+                    max_len = len(row)
+            if continue_flag:
+                continue
+
+            img_label = cv2.imread(label_p[:-1])
+            row_img1, col_img1 = get_lines_from_img(img_label)
+
+            label_row_list, label_is_standard = get_bbox_by_img(row_img1, col_img1)
+            if not label_is_standard:
+                continue
+            label_row_list = merge_text_and_table(bbox_list, label_row_list)
+
+            add_flag = 0
+            modify_flag = 0
+            for i in range(len(row_list)):
+                if i >= len(label_row_list):
+                    continue
+                row = row_list[i]
+                label_row = label_row_list[i]
+                for r in label_row:
+                    if r not in row:
+                        add_flag += 1
+                    else:
+                        if label_row.index(r) != row.index(r):
+                            modify_flag += 1
+
+            bbox_cnt = 0
+            for row in row_list:
+                for b in row:
+                    bbox_cnt += 1
+            label_bbox_cnt = 0
+            for row in label_row_list:
+                for b in row:
+                    label_bbox_cnt += 1
+
+            teds = 1 - (add_flag + modify_flag) / max(bbox_cnt, label_bbox_cnt)
+
+            print('add_flag', add_flag, 'modify_flag', modify_flag, 'bbox_cnt', bbox_cnt, 'label_bbox_cnt', label_bbox_cnt)
+            print('TEDS:', teds, p)
+            all_teds += teds
+            all_standard_cnt += 1
+            # if teds <= 0.8:
+            #     print('row_list', [y for y in [x for x in row_list]])
+            #     print('label_row_list', [y for y in [x for x in label_row_list]])
+            #     cv2.imshow('model_table', row_img+col_img)
+            #     cv2.imshow('label_table', row_img1+col_img1)
+            #     cv2.waitKey(0)
+            # for i in range(len(row_list)):
+
+    try:
+        avg_teds = all_teds / all_standard_cnt
+    except:
+        avg_teds = 0
+    print('standard table cnt', all_standard_cnt)
+    print('Avg TEDS', avg_teds)
+    return
+
+
+def get_table_new():
+    with open('standard_table.txt', 'r') as f:
+        label_path_list = f.readlines()
+
+    # 表格分割问题:1019.jpg, 1020.jpg, 1023.jpg, 1027.jpg, 1029.jpg, 1030.jpg, 1031.jpg, 1035.jpg, 1040.jpg, 1042.jpg, 1046.jpg, 1047.jpg, 1061.jpg, 1064.jpg, 1067.jpg, 1072.jpg
+    # 分列问题:1059.jpg,
+    paths = glob('../data/borderless_tables/*.jpg')
+    # paths = glob(r'C:\Users\Administrator\Desktop\test_pdf_table\1.png')
+    paths = ['1019.jpg', '1020.jpg', '1023.jpg', '1027.jpg', '1029.jpg', '1030.jpg', '1031.jpg', '1035.jpg', '1040.jpg', '1042.jpg', '1046.jpg', '1047.jpg', '1061.jpg', '1064.jpg', '1067.jpg', '1072.jpg']
+    paths = ['../data/borderless_tables/' + x for x in paths]
+    path_cnt = 0
+    for p in paths:
+        if 'label' in p:
+            continue
+
+        # label_p = p[:-4] + '_label.jpg\n'
+        # if label_p not in label_path_list:
+        #     continue
+
+        # if path_cnt <= 22:
+        #     path_cnt += 1
+        #     continue
+
+        path_cnt += 1
+
+        img = cv2.imread(p)
+
+        result = test_ocr_model(p)
+        print(p)
+
+        bbox_list = eval(result.get('bbox'))
+        text_list = eval(result.get('text'))
+
+        # 处理bbox,缩小框
+        bbox_list = shrink_bbox(img, bbox_list)
+
+        # 创建对应dict
+        bbox_text_dict = {}
+        for i in range(len(text_list)):
+            bbox_text_dict[str(bbox_list[i])] = text_list[i]
+
+        # 获取全局的按行排列bbox
+        row_list = get_table_rows(bbox_list, bbox_text_dict)
+
+        # 获取表格区域,以及区域里的按行排列bbox
+        table_location_list, area_row_list = get_table_location(row_list, bbox_text_dict)
+
+        area_row_list = merge_row_bbox_list(area_row_list)
+
+        # for a in area_row_list:
+        #     i = 0
+        #     for r in a:
+        #         print('row', i)
+        #         i += 1
+        #         for b in r:
+        #             print(bbox_text_dict.get(str(b)))
+
+        # 获取全局的按列排列bbox
+        area_col_list = get_table_cols(bbox_list, table_location_list)
+
+        # 获取行线、列线
+        area_row_lines, area_col_lines = get_table_borders(area_row_list, area_col_list, table_location_list)
+
+        # 判断列线合法
+        area_col_lines = judge_col_lines(img, area_col_lines, table_location_list, bbox_list, bbox_text_dict)
+        # # 判断行线合法
+        area_row_lines = judge_row_lines(img, area_row_lines, table_location_list, bbox_list, bbox_text_dict)
+
+        # 由线得到按行列排列的bbox
+        area_table_bbox_list, area_table_cell_list = get_table_bbox_list(img, area_row_lines, area_col_lines, table_location_list, bbox_list)
+        for a in area_table_bbox_list:
+            for r in a:
+                for c in r:
+                    # cv2.rectangle(img, c[0], c[1], (255, 0, 0), 1)
+                    for b in c:
+                        cv2.rectangle(img, [int(b[0][0]), int(b[0][1])], [int(b[2][0]), int(b[2][1])], (255, 0, 0), 1)
+        cv2.imshow('table_cell', img)
+
+        # 分割表格
+        # table_location_list, _ = split_table_new2(table_location_list, area_table_bbox_list, area_table_cell_list, area_row_list, bbox_text_dict)
+        # table_location_list, _ = split_table(table_location_list, area_row_list, bbox_text_dict)
+        # table_location_list = split_table_by_col(table_location_list, area_table_bbox_list, bbox_text_dict)
+        # table_location_list = split_table_by_table_head(table_location_list, area_table_bbox_list, bbox_text_dict)
+
+        # 重新生成按行排列bbox
+        area_row_list = get_table_rows2(area_row_list, table_location_list)
+        # for a in area_row_list:
+        #     for r in a:
+        #         for b in r:
+        #             cv2.rectangle(img, [int(b[0][0]), int(b[0][1])], [int(b[2][0]), int(b[2][1])], (255, 0, 0), 1)
+        # cv2.imshow('area_row_list', img)
+
+        # 获取全局的按列排列bbox
+        area_col_list = get_table_cols(bbox_list, table_location_list)
+
+        # 获取行线、列线
+        area_row_lines, area_col_lines = get_table_borders(area_row_list, area_col_list, table_location_list)
+
+        # 判断列线合法
+        area_col_lines = judge_col_lines(img, area_col_lines, table_location_list, bbox_list, bbox_text_dict)
+        # 判断行线合法
+        area_row_lines = judge_row_lines(img, area_row_lines, table_location_list, bbox_list, bbox_text_dict)
+
+        # 展示
+        show_result(img, bbox_list, area_row_lines, area_col_lines, table_location_list)
+    return
+
+
+def show_result(img, bbox_list, area_row_lines, area_col_lines, table_location_list):
+    for bbox in bbox_list:
+        cv2.rectangle(img, (int(bbox[0][0]), int(bbox[0][1])), (int(bbox[2][0]), int(bbox[2][1])),
+                      (0, 0, 255), 1)
+
+    for i in range(len(table_location_list)):
+        # location = table_location_list[i]
+        # cv2.rectangle(img, location[0], location[1], (0, 255, 0), 1)
+
+        row_lines = area_row_lines[i]
+        col_lines = area_col_lines[i]
+        for r in row_lines:
+            cv2.line(img, r[0], r[1], (0, 255, 0), 1)
+        for c in col_lines:
+            cv2.line(img, c[0], c[1], (0, 255, 0), 1)
+
+    cv2.namedWindow('img', cv2.WINDOW_NORMAL)
+    cv2.imshow('img', img)
+    cv2.waitKey(0)
+    return
+
+
+def get_table_borders(area_row_list, area_col_list, table_location_list):
+    area_row_lines = []
+    area_col_lines = []
+    # 循环每个表格
+    for i in range(len(area_row_list)):
+        row_list = area_row_list[i]
+        col_list = area_col_list[i]
+        location = table_location_list[i]
+
+        # 获取行线
+        row_lines = []
+        row_lines.append([[location[0][0], location[0][1]], [location[1][0], location[0][1]]])
+        for row in row_list:
+            max_h = 0
+            for bbox in row:
+                if bbox[2][1] > max_h:
+                    max_h = int(bbox[2][1])
+            row_lines.append([[location[0][0], max_h], [location[1][0], max_h]])
+        row_lines[-1][0][1] = max(location[1][1], row_lines[-1][0][1])
+        row_lines[-1][1][1] = max(location[1][1], row_lines[-1][1][1])
+
+        # 补充表格行范围
+        table_location_list[i][1][1] = max(location[1][1], row_lines[-1][1][1])
+        location = table_location_list[i]
+
+        # 获取列线
+        col_lines = []
+        col_lines.append([[location[0][0], location[0][1]], [location[0][0], location[1][1]]])
+        for col in col_list:
+            max_w = 0
+            for bbox in col:
+                if bbox[2][0] > max_w:
+                    max_w = int(bbox[2][0])
+            col_lines.append([[max_w, location[0][1]], [max_w, location[1][1]]])
+
+        # 补充表格列范围
+        table_location_list[i][1][0] = max(location[1][0], col_lines[-1][1][0])
+        location = table_location_list[i]
+        for row in row_lines:
+            row[0][0] = location[0][0]
+            row[1][0] = location[1][0]
+
+        area_row_lines.append(row_lines)
+        area_col_lines.append(col_lines)
+    return area_row_lines, area_col_lines
+
+
+def get_table_location(row_list, bbox_text_dict):
+    # for r in row_list:
+    #     print('row', r)
+
+    up_h = 10000
+    bottom_h = 0
+    left_w = 10000
+    right_w = 0
+    table_rows = 0
+    tolerance_list = []
+    area_row_list = []
+    temp_row_list = []
+    table_location_list = []
+    catalog_text_cnt = 0
+    for row in row_list:
+        if len(row) >= 2:
+            if not temp_row_list:
+                # 第一行bbox之间需大于一定值
+                max_distance = 0
+                row.sort(key=lambda x: x[0][0])
+                row_text_list = []
+                catalog_text_cnt = 0
+                bbox_height_list = [abs(row[-1][0][1] - row[-1][2][1])]
+                for i in range(1, len(row)):
+                    dis = row[i][0][0] - row[i-1][2][0]
+                    if dis >= max_distance:
+                        max_distance = dis
+                    text = bbox_text_dict.get(str(row[i-1]))
+                    row_text_list.append(bbox_text_dict.get(str(row[i-1])))
+                    match = re.findall('\\.+\d+', text)
+                    if match and len(match[0]) == len(text):
+                        catalog_text_cnt += 1
+                    bbox_height_list.append(abs(row[i][0][1] - row[i][2][1]))
+
+                # 排除
+                # if len(row) == 2:
+                #     if max_distance <= abs(row[0][2][0] - row[0][0][0]):
+                #         continue
+                # else:
+                if max_distance <= 5:
+                    continue
+
+            # 排除 '地   址', '名    称'
+            # if len(row) == 2 and len(bbox_text_dict.get(str(row[0]))) == 1:
+            #     continue
+
+            row_text_list = []
+            bbox_height_list = []
+            for i in range(len(row)):
+                text = bbox_text_dict.get(str(row[i-1]))
+                row_text_list.append(bbox_text_dict.get(str(row[i-1])))
+                match = re.findall('\\.+\d+', text)
+                if match and len(match[0]) == len(text):
+                    catalog_text_cnt += 1
+                bbox_height_list.append(abs(row[i][0][1] - row[i][2][1]))
+
+            # 排除height差别过大的
+            bbox_height_list.sort(key=lambda x: x)
+            if bbox_height_list[-1] - bbox_height_list[0] > bbox_height_list[0]:
+                continue
+
+            # 排除目录
+            if catalog_text_cnt >= 3:
+                continue
+
+            # 排除水印图
+            if len(list(set(row_text_list))) < 2/3 * len(row):
+                continue
+
+            # 排除有下划线的
+
+            table_rows += 1
+            temp_row_list.append(row)
+
+            for bbox in row:
+                if up_h > bbox[0][1]:
+                    up_h = bbox[0][1]
+                if bottom_h < bbox[2][1]:
+                    bottom_h = bbox[2][1]
+                if left_w > bbox[0][0]:
+                    left_w = bbox[0][0]
+                if right_w < bbox[2][0]:
+                    right_w = bbox[2][0]
+        else:
+            if len(tolerance_list) < 3 and table_rows > 0:
+                tolerance_list.append(row)
+                temp_row_list.append(row)
+                continue
+            if table_rows > 2 and up_h < bottom_h:
+                table_location_list.append([[int(left_w), int(up_h)],
+                                            [int(right_w), int(bottom_h)]])
+                if tolerance_list[-1] == temp_row_list[-1]:
+                    area_row_list.append(temp_row_list[:-1])
+                else:
+                    area_row_list.append(temp_row_list)
+            up_h = 10000
+            bottom_h = 0
+            left_w = 10000
+            right_w = 0
+            table_rows = 0
+            tolerance_list = []
+            temp_row_list = []
+    if temp_row_list:
+        if table_rows > 2 and up_h < bottom_h:
+            table_location_list.append([[int(left_w), int(up_h)],
+                                        [int(right_w), int(bottom_h)]])
+            area_row_list.append(temp_row_list)
+
+    return table_location_list, area_row_list
+
+
+def get_table_rows(bbox_list, bbox_text_dict):
+    bbox_list.sort(key=lambda x: (x[0][1], x[2][1], x[0][0], x[2][0]))
+    row_list = []
+    used_bbox_list = []
+    for b1 in bbox_list:
+        if b1 in used_bbox_list:
+            continue
+
+        temp_bbox_list = [b1]
+        used_bbox_list.append(b1)
+        for b2 in bbox_list:
+            if b2 in used_bbox_list:
+                continue
+
+            if abs((b1[0][1] + b1[2][1]) / 2 - (b2[0][1] + b2[2][1]) / 2) <= 10 \
+                    and line_overlap(b1[0][1], b1[2][1], b2[0][1], b2[2][1]) >= 1/2*min(b1[2][1]-b1[0][1], b2[2][1]-b2[0][1]):
+                temp_bbox_list.append(b2)
+                used_bbox_list.append(b2)
+
+        row_list.append(temp_bbox_list)
+    return row_list
+
+
+def get_table_rows2(area_row_list, table_location_list):
+    temp_area_row_list = []
+    for area in area_row_list:
+        temp_area_row_list += area
+
+    area_row_list = []
+    for location in table_location_list:
+        row_list = []
+        for row in temp_area_row_list:
+            if location[0][1] <= row[0][0][1] <= row[0][2][1] <= location[1][1]:
+                row_list.append(row)
+        area_row_list.append(row_list)
+    return area_row_list
+
+
+def get_table_bbox_row_or_col(bbox_list, axis=0):
+    bbox_list.sort(key=lambda x: (x[0][1-axis], x[2][1-axis], x[0][axis], x[2][axis]))
+    row_list = []
+    used_bbox_list = []
+    for b1 in bbox_list:
+        if b1 in used_bbox_list:
+            continue
+
+        temp_bbox_list = [b1]
+        used_bbox_list.append(b1)
+        for b2 in bbox_list:
+            if b2 in used_bbox_list:
+                continue
+
+            if abs((b1[0][1-axis] + b1[2][1-axis]) / 2 - (b2[0][1-axis] + b2[2][1-axis]) / 2) <= 10:
+                temp_bbox_list.append(b2)
+                used_bbox_list.append(b2)
+
+        row_list.append(temp_bbox_list)
+    return row_list
+
+
+def get_table_cols(bbox_list, table_location_list):
+    bbox_list.sort(key=lambda x: (x[0][0], x[2][0], x[0][1], x[2][1]))
+    all_col_list = []
+    used_bbox_list = []
+    for location in table_location_list:
+        sub_bbox_list = []
+        for b in bbox_list:
+            if location[0][1] <= (b[0][1] + b[2][1])/2 <= location[1][1]:
+                sub_bbox_list.append(b)
+
+        col_list = []
+        for b1 in sub_bbox_list:
+            if b1 in used_bbox_list:
+                continue
+
+            col_width = [b1[0][0], b1[2][0]]
+            temp_bbox_list = [b1]
+            used_bbox_list.append(b1)
+            for b2 in sub_bbox_list:
+                if b2 in used_bbox_list:
+                    continue
+
+                # 判断同一列
+                # 1. 中心点相差一定范围内
+                # 2. 左边点相差一定范围内
+                # 3. 行范围包含
+                # 4. iou大于一定值
+                if abs((b1[0][0] + b1[2][0]) / 2 - (b2[0][0] + b2[2][0]) / 2) <= 10 \
+                        or abs(b1[0][0] - b2[0][0]) <= 10 \
+                        or col_width[0] <= b2[0][0] <= b2[2][0] <= col_width[1] \
+                        or b2[0][0] <= col_width[0] <= col_width[1] <= b2[2][0] \
+                        or line_iou([[col_width[0], 0], [col_width[1], 0]], [[b2[0][0], 0], [b2[1][0], 0]], axis=0) >= 0.6:
+                    temp_bbox_list.append(b2)
+                    used_bbox_list.append(b2)
+                    if b2[0][0] < col_width[0]:
+                        col_width[0] = b2[0][0]
+                    if b2[2][0] > col_width[1]:
+                        col_width[1] = b2[2][0]
+
+            col_list.append(temp_bbox_list)
+        all_col_list.append(col_list)
+    return all_col_list
+
+
+def merge_col_bbox_by_cluster(img, area_row_list, area_col_list, bbox_text_dict, all_bbox_list, table_location_list):
+    temp_img = copy.deepcopy(img)
+
+    # 循环每个表格
+    for i in range(len(area_row_list)):
+        row = area_row_list[i]
+        col = area_col_list[i]
+
+        # 循环每一列,计算列中行之间的间隔距离
+        new_col = []
+        col_cnt = 0
+        for bbox_list in col:
+            # 获取间隔距离
+            distance_list = []
+            bbox_list.sort(key=lambda x: (x[0][1], x[1][1]))
+            text_list = [bbox_text_dict.get(str(x)) for x in bbox_list]
+            for j in range(1, len(bbox_list)):
+                dis = bbox_list[j][0][1] - bbox_list[j-1][2][1]
+                if dis < 0:
+                    dis = 0.
+                distance_list.append(dis)
+            print("\n")
+            print("distance_list", distance_list)
+
+            # 聚类获取类别组
+            data_list = [[0, x] for x in distance_list]
+            # 排除距离大于一定值的
+            data_mask_list = []
+            temp_data_list = []
+            for j in range(len(data_list)):
+                if data_list[j][1] < 5.:
+                    data_mask_list.append(True)
+                    temp_data_list.append(data_list[j])
+                else:
+                    data_mask_list.append(False)
+            data_list = temp_data_list
+            print("data_list", data_list)
+
+            cluster_list = []
+            if len(data_list) > 2:
+                # 聚类
+                pred_list = dbscan(data_list)
+                print('pred_list', pred_list)
+
+                temp_pred_list = []
+                for j in data_mask_list:
+                    if j:
+                        temp_pred_list.append(pred_list.pop(0))
+                    else:
+                        temp_pred_list.append(-1)
+                pred_list = temp_pred_list
+                print('pred_list', pred_list)
+
+                cluster_num = len(list(set(pred_list)))
+                for k in range(cluster_num):
+                    temp_list = []
+                    for j in range(len(pred_list)):
+                        if pred_list[j] == k:
+                            if temp_list:
+                                if j - temp_list[-1] == 1:
+                                    temp_list.append(j)
+                            else:
+                                temp_list.append(j)
+                        else:
+                            if temp_list:
+                                cluster_list.append(temp_list)
+                            temp_list = []
+                    if temp_list:
+                        cluster_list.append(temp_list)
+
+            elif len(data_list) > 0:
+                temp_list = []
+                for j in range(len(distance_list)):
+                    if distance_list[j] < 5.0:
+                        temp_list.append(j)
+                    else:
+                        if temp_list:
+                            cluster_list.append(temp_list)
+                        temp_list = []
+                if temp_list:
+                    cluster_list.append(temp_list)
+                # cluster_list.append([x for x in range(len(distance_list))])
+            print('text_list', text_list)
+            print('cluster_list', cluster_list)
+
+            # 合并bbox
+            new_bbox_list = copy.deepcopy(bbox_list)
+            for cluster in cluster_list:
+                merge_flag = 1
+                for dis in [distance_list[x] for x in cluster]:
+                    if dis >= 5.0:
+                        merge_flag = 0
+                        break
+                if merge_flag:
+                    b_list = bbox_list[cluster[0]:cluster[-1]+2]
+                    t_list = text_list[cluster[0]:cluster[-1]+2]
+                    min_w = 10000
+                    max_w = 0
+                    min_h = 10000
+                    max_h = 0
+                    b_list = [eval(x) for x in list(set([str(x) for x in b_list]))]
+                    for bbox in b_list:
+                        if bbox in new_bbox_list:
+                            new_bbox_list.remove(bbox)
+                        if bbox in all_bbox_list:
+                            all_bbox_list.remove(bbox)
+                        if bbox[0][0] < min_w:
+                            min_w = bbox[0][0]
+                        if bbox[0][1] < min_h:
+                            min_h = bbox[0][1]
+                        if bbox[2][0] > max_w:
+                            max_w = bbox[2][0]
+                        if bbox[2][1] > max_h:
+                            max_h = bbox[2][1]
+                    new_bbox = [[min_w, min_h], [max_w, min_h], [max_w, max_h], [min_w, max_h]]
+                    new_bbox_list.append(new_bbox)
+                    all_bbox_list.append(new_bbox)
+
+            # 根据第一列的合并结果,指导其他列合并
+            if col_cnt == 0:
+                first_col_rows = get_first_col_rows(new_bbox_list, table_location_list[i])
+                for r in first_col_rows:
+                    cv2.line(temp_img, (0, int(r)), (temp_img.shape[1], int(r)), (0, 0, 255), 1)
+                cv2.imshow('temp_img', temp_img)
+                # cv2.waitKey(0)
+            col_cnt += 1
+        # new_col.append(new_bbox_list)
+    return all_bbox_list
+
+
+def merge_col_bbox_by_block(img, area_row_list, area_col_list, bbox_text_dict, bbox_list, table_location_list):
+    temp_img = copy.deepcopy(img)
+
+    # 循环每个表格
+    for i in range(len(area_row_list)):
+        row_list = area_row_list[i]
+        col_list = area_col_list[i]
+        table_location = table_location_list[i]
+
+        sub_bbox_list = []
+        for bbox in bbox_list:
+            if table_location[0][1] <= bbox[0][1] <= table_location[1][1] \
+                    or table_location[0][1] <= bbox[1][1] <= table_location[1][1]:
+                sub_bbox_list.append(bbox)
+
+        # 对第一列聚类,合并,再根据空白分行
+        first_col = col_list[0]
+        cluster_list, distance_list = distance_cluster(first_col, axis=1)
+        merge_first_col = merge_cluster(first_col, cluster_list, distance_list)
+        merge_first_col.sort(key=lambda x: (x[0][1], x[0][0]))
+        row_lines = get_first_col_rows(merge_first_col, table_location)
+
+        # 对其他列聚类,合并
+        # merge_bbox_list = [] + first_col
+        # for col in col_list[1:]:
+        #     cluster_list = distance_cluster(col, axis=1)
+        #     merge_col = merge_cluster(col, cluster_list)
+        #     merge_bbox_list += merge_col
+
+        # 循环每一列,根据分行合并
+        new_row_list = []
+        row_lines.sort(key=lambda x: x)
+        row_cnt = 0
+        need_add_bbox = []
+        # for c in first_col:
+            # print('first col ', bbox_text_dict.get(str(c)))
+        for j in range(1, len(row_lines)):
+            print('\n')
+            top_line = row_lines[j-1]
+            bottom_line = row_lines[j]
+            new_row = []
+            if need_add_bbox:
+                # print('add')
+                new_row += need_add_bbox
+                print('add', bbox_text_dict.get(str(new_row[0])))
+            need_add_bbox = []
+            # 合并条件:
+            # 1. 完全包含
+            # 2. 处在两行之间,判断bbox与第一列的这两行的bbox高度距离
+            for bbox in sub_bbox_list:
+                if top_line <= bbox[0][1] <= bbox[2][1] <= bottom_line:
+                    new_row.append(bbox)
+                    # print('bbox, line', bbox_text_dict.get(str(bbox)), top_line, bottom_line)
+
+                else:
+                    if bbox in first_col:
+                        continue
+
+                    # 如果第一列只有一行,交界处的bbox不算
+                    if len(first_col) == 1:
+                        need_add_bbox.append(bbox)
+                        continue
+
+                    # 计算离该bbox最近的上下两个第一列的bbox
+                    first_col_center_h1 = 0
+                    first_col_center_h2 = 10000
+                    first_col_bbox1 = None
+                    first_col_bbox2 = None
+                    bbox_center_h = (bbox[0][1] + bbox[2][1]) / 2
+
+                    for b in first_col:
+                        b_center_h = (b[0][1] + b[2][1]) / 2
+                        # if bbox[0][1] <= b_center_h <= bbox[2][1]:
+                        #     first_col_center_h2 = b_center_h
+                        #     break
+                        if bbox_center_h >= b_center_h and bbox_center_h - b_center_h <= bbox_center_h - first_col_center_h1:
+                            first_col_center_h1 = b_center_h
+                            first_col_bbox1 = b
+                        if b_center_h >= bbox_center_h and b_center_h - bbox_center_h <= first_col_center_h2 - bbox_center_h:
+                            first_col_center_h2 = b_center_h
+                            first_col_bbox2 = b
+
+                    # 如果离该bbox最近的第一列的bbox,不是这一行的
+                    if new_row and first_col_bbox1 != new_row[0] and top_line < bbox[0][1] < bottom_line:
+                        need_add_bbox.append(bbox)
+                        continue
+
+                    # if top_line <= bbox[2][1] <= bottom_line \
+                    #         and abs(first_col_center_h1 - bbox_center_h) >= abs(first_col_center_h2 - bbox_center_h):
+                    #     new_row.append(bbox)
+                    # if first_col_bbox1 and first_col_bbox2:
+                    #     print('bbox1, bbox2', bbox_text_dict[str(first_col_bbox1)], bbox_text_dict[str(first_col_bbox2)],
+                    #           bbox_text_dict[str(bbox)])
+                    if top_line < bbox[0][1] < bottom_line \
+                            and abs(first_col_center_h1 - bbox_center_h) <= abs(first_col_center_h2 - bbox_center_h):
+                        new_row.append(bbox)
+                    elif top_line < bbox[0][1] < bottom_line:
+                        need_add_bbox.append(bbox)
+            for r in need_add_bbox:
+                print("next_row bbox", bbox_text_dict.get(str(r)))
+
+            print('row', row_cnt, len(new_row))
+            for b in new_row:
+                print(bbox_text_dict.get(str(b)))
+            row_cnt += 1
+            new_row_list.append(new_row)
+
+        area_row_list[i] = new_row_list
+
+        # show
+        r_cnt = 0
+        # for r in row_lines:
+        #     if r_cnt == 0 or r_cnt == len(row_lines) - 1:
+        #         cv2.line(temp_img, (0, int(r)), (temp_img.shape[1], int(r)), (255, 0, 0), 1)
+        #     else:
+        #         cv2.line(temp_img, (0, int(r)), (temp_img.shape[1], int(r)), (0, 255, 0), 1)
+        #     r_cnt += 1
+        # for b in merge_bbox_list:
+        #     cv2.rectangle(temp_img, [int(b[0][0]), int(b[0][1])], [int(b[2][0]), int(b[2][1])], (0, 0, 255), 1)
+        # cv2.imshow('temp_img', temp_img)
+
+    return area_row_list
+
+
+def distance_cluster(bbox_list, max_distance=5., axis=1):
+    # 获取间隔距离
+    distance_list = []
+    bbox_list.sort(key=lambda x: (x[0][1], x[1][1]))
+    for j in range(1, len(bbox_list)):
+        dis = bbox_list[j][0][axis] - bbox_list[j-1][2][axis]
+        if dis < 0:
+            dis = 0.
+        distance_list.append(dis)
+    print("\n")
+    print("distance_list", distance_list)
+
+    # 聚类获取类别组
+    data_list = [[0, x] for x in distance_list]
+    # 排除距离大于一定值的
+    data_mask_list = []
+    temp_data_list = []
+    for j in range(len(data_list)):
+        if data_list[j][1] < max_distance:
+            data_mask_list.append(True)
+            temp_data_list.append(data_list[j])
+        else:
+            data_mask_list.append(False)
+    data_list = temp_data_list
+    print("data_list", data_list)
+
+    cluster_list = []
+    if len(data_list) > 2:
+        # 聚类
+        pred_list = dbscan(data_list)
+        print('pred_list', pred_list)
+
+        temp_pred_list = []
+        for j in data_mask_list:
+            if j:
+                temp_pred_list.append(pred_list.pop(0))
+            else:
+                temp_pred_list.append(-1)
+        pred_list = temp_pred_list
+        print('pred_list', pred_list)
+
+        cluster_num = len(list(set(pred_list)))
+        for k in range(cluster_num):
+            temp_list = []
+            for j in range(len(pred_list)):
+                if pred_list[j] == k:
+                    if temp_list:
+                        if j - temp_list[-1] == 1:
+                            temp_list.append(j)
+                    else:
+                        temp_list.append(j)
+                else:
+                    if temp_list:
+                        cluster_list.append(temp_list)
+                    temp_list = []
+            if temp_list:
+                cluster_list.append(temp_list)
+
+    elif len(data_list) > 0:
+        temp_list = []
+        for j in range(len(distance_list)):
+            if distance_list[j] < max_distance:
+                temp_list.append(j)
+            else:
+                if temp_list:
+                    cluster_list.append(temp_list)
+                temp_list = []
+        if temp_list:
+            cluster_list.append(temp_list)
+    print('cluster_list', cluster_list)
+    return cluster_list, distance_list
+
+
+def merge_cluster(bbox_list, cluster_list, distance_list):
+    new_bbox_list = copy.deepcopy(bbox_list)
+
+    # 特殊情况:每行之间空隙小,且规律,会全被分到一个类
+    if len(cluster_list) == 1 and len(cluster_list[0]) >= 4:
+        cluster_list = [[x] for x in cluster_list[0]]
+
+    # 每行的空隙小且均匀
+    if distance_list:
+        if max(distance_list) - min(distance_list) <= 5.5:
+            cluster_list = [[i] for i in range(len(distance_list))]
+
+    # 去掉一个最大值,其他的空隙小且均匀
+    if distance_list and max(distance_list) - min(distance_list) >= 10:
+        index = distance_list.index(max(distance_list))
+        if index <= 2 and len(distance_list[index+1:]) >= 3 and max(distance_list[index+1:]) - min(distance_list[index+1:]) <= 5.5:
+            if index == 0:
+                cluster_list = [[i] for i in range(len(distance_list[index+1:]))]
+            else:
+                if max(distance_list[:index]) - min(distance_list[:index]) <= 5.5:
+                    cluster_list = [[i] for i in range(len(distance_list[:index]))]
+                    cluster_list += [[i] for i in range(len(distance_list[index+1:]))]
+
+    for cluster in cluster_list:
+        b_list = bbox_list[cluster[0]:cluster[-1]+2]
+        min_w = 10000
+        max_w = 0
+        min_h = 10000
+        max_h = 0
+        b_list = [eval(x) for x in list(set([str(x) for x in b_list]))]
+        for bbox in b_list:
+            if bbox in new_bbox_list:
+                new_bbox_list.remove(bbox)
+            if bbox[0][0] < min_w:
+                min_w = bbox[0][0]
+            if bbox[0][1] < min_h:
+                min_h = bbox[0][1]
+            if bbox[2][0] > max_w:
+                max_w = bbox[2][0]
+            if bbox[2][1] > max_h:
+                max_h = bbox[2][1]
+        new_bbox = [[min_w, min_h], [max_w, min_h], [max_w, max_h], [min_w, max_h]]
+        new_bbox_list.append(new_bbox)
+    return new_bbox_list
+
+
+def get_first_col_rows(first_col, table_location):
+    """
+    根据第一列的bbox,分行
+
+    :return:
+    """
+    location_top = table_location[0][1]
+    location_bottom = table_location[1][1]
+    row_block_list = [table_location[0][1]]
+    for i in range(len(first_col)):
+        bbox = first_col[i]
+        if i + 1 < len(first_col):
+            next_bbox = first_col[i+1]
+            bbox_distance = abs(bbox[2][1] - next_bbox[0][1])
+        else:
+            bbox_distance = 10000
+        if i == 0:
+            top_block = abs(bbox[0][1] - location_top)
+            bottom_block = min(top_block, bbox_distance)
+            sub_row = bbox[2][1] + bottom_block
+        else:
+            top_block = abs(bbox[0][1] - row_block_list[-1])
+            bottom_block = min(top_block, bbox_distance)
+            sub_row = bbox[2][1] + bottom_block
+        row_block_list.append(sub_row)
+
+    if len(row_block_list) == 2:
+        row_block_list.append(location_bottom)
+    else:
+        row_block_list[-1] = max(row_block_list[-1], location_bottom)
+    return row_block_list
+
+
+def judge_standard_table(row_list):
+    up_h = 10000
+    bottom_h = 0
+    left_w = 10000
+    right_w = 0
+    table_rows = 0
+    now_row_len = 0
+    init_flag = 0
+    tolerance_list = []
+    area_row_list = []
+    temp_row_list = []
+    table_location_list = []
+
+    for row in row_list:
+        if init_flag:
+            up_h = 10000
+            bottom_h = 0
+            left_w = 10000
+            right_w = 0
+            table_rows = 0
+            tolerance_list = []
+            temp_row_list = []
+            init_flag = 0
+
+        if len(row) >= 2:
+            if now_row_len == 0:
+                now_row_len = len(row)
+            else:
+                if len(row) != now_row_len:
+                    init_flag = 1
+                    continue
+
+            table_rows += 1
+            temp_row_list.append(row)
+            for bbox in row:
+                if up_h > bbox[0][1]:
+                    up_h = bbox[0][1]
+                if bottom_h < bbox[2][1]:
+                    bottom_h = bbox[2][1]
+                if left_w > bbox[0][0]:
+                    left_w = bbox[0][0]
+                if right_w < bbox[2][0]:
+                    right_w = bbox[2][0]
+        else:
+            if len(tolerance_list) < 1 and table_rows > 0:
+                tolerance_list.append(row)
+                temp_row_list.append(row)
+                continue
+            if table_rows > 1 and up_h < bottom_h:
+                table_location_list.append([[int(left_w), int(up_h)],
+                                            [int(right_w), int(bottom_h)]])
+                if tolerance_list[-1] == temp_row_list[-1]:
+                    area_row_list.append(temp_row_list[:-1])
+                else:
+                    area_row_list.append(temp_row_list)
+            init_flag = 1
+
+
+    return table_location_list, area_row_list
+
+
+def split_bbox(img, bbox, bbox_text_dict):
+    text = bbox_text_dict.get(str(bbox))
+
+    sub_img = img[int(bbox[0][1]):int(bbox[2][1]), int(bbox[0][0]):int(bbox[2][0]), :]
+    split_line_list = []
+    last_i_status = 1
+    # 从左到右遍历img
+    for i in range(1, sub_img.shape[1]):
+        # 若这一列黑色像素超过一定值
+        if np.where(sub_img[:, i, :] < 200)[0].size > sub_img.shape[0]/5:
+            i_status = 0
+        else:
+            i_status = 1
+        # 异或,上个像素列为黑且这个像素列为白,或上个像素列为白且这个像素列为黑
+        if last_i_status ^ i_status:
+            split_line_list.append(int(i))
+            last_i_status = i_status
+
+    # 两条分割线太近的去重
+    min_len = 5
+    last_l = split_line_list[0]
+    temp_list = [split_line_list[0]]
+    for l in split_line_list[1:]:
+        if l - last_l > min_len:
+            temp_list.append(l)
+        last_l = l
+    split_line_list = temp_list
+
+    # 若两个分割线间无黑像素,则是应该分割的
+    split_pair_list = []
+    last_line = split_line_list[0]
+    for line in split_line_list[1:]:
+        print('last_line, line', last_line, line, np.where(sub_img[:, last_line:line, :] < 100)[0].size)
+        if line - last_line >= 10 and np.where(sub_img[:, last_line:line, :] < 100)[0].size < 10:
+            split_pair_list.append([last_line, line])
+        last_line = line
+
+    print('split_pair_list', split_pair_list)
+
+    for l in split_line_list:
+        l = int(l + bbox[0][0])
+        cv2.line(img, (l, int(bbox[0][1])), (l, int(bbox[2][1])), (0, 255, 0), 2)
+    cv2.rectangle(img, (int(bbox[0][0]), int(bbox[0][1])), (int(bbox[2][0]), int(bbox[2][1])),
+                  (0, 0, 255), 1)
+    cv2.imshow('img', img)
+    cv2.waitKey(0)
+
+    # 分割得到新bbox
+    split_bbox_list = []
+    if split_pair_list:
+        start_line = 0
+        for line1, line2 in split_pair_list:
+            w1 = start_line + bbox[0][0]
+            w2 = line1 + bbox[0][0]
+            start_line = line2
+            split_bbox_list.append([[w1, bbox[0][1]], [], [w2, bbox[2][1]], []])
+        w1 = start_line + bbox[0][0]
+        w2 = bbox[2][0]
+        split_bbox_list.append([[w1, bbox[0][1]], [], [w2, bbox[2][1]], []])
+
+    print('split_bbox_list', split_bbox_list)
+
+    # 计算每个字长度
+    all_len = 0
+    bbox_len_list = []
+    for bbox in split_bbox_list:
+        _len = abs(bbox[2][0] - bbox[0][0])
+        all_len += _len
+        bbox_len_list.append(_len)
+    single_char_len = all_len / len(text)
+
+    # 根据bbox长度和单字长度比例计算得到截取后的text
+    split_text_list = []
+    text_start = 0
+    for _len in bbox_len_list:
+        text_num = int(_len / single_char_len + 0.5)
+        text_end = text_start+text_num
+        if text_end >= len(text):
+            text_end = len(text)
+        split_text_list.append(text[text_start:text_end])
+        text_start = text_end
+    print('split_text_list', split_text_list)
+
+    # 更新bbox_text_dict
+    for i, bbox in enumerate(split_bbox_list):
+        bbox_text_dict[str(bbox)] = split_text_list[i]
+
+    return split_bbox_list, bbox_text_dict
+
+
+def split_table(table_location_list, area_row_list, bbox_text_dict):
+    temp_location_list = []
+    temp_area_row_list = []
+    for i in range(len(table_location_list)):
+        location = table_location_list[i]
+        sub_row_list = area_row_list[i]
+
+        # 截断标题,对只有行中间或行开头一个bbox的行进行排除
+        need_split_index = []
+        for j in range(len(sub_row_list)):
+            row = sub_row_list[j]
+            if len(row) == 1:
+                threshold = (row[0][2][0]-row[0][0][0])*1/3
+                if row[0][0][0] + threshold <= (location[0][0]+location[1][0])/2 <= row[0][2][0] - threshold:
+                    need_split_index.append(j)
+                elif abs(location[0][0] - row[0][0][0]) <= 5 \
+                        and row[0][2][0] - row[0][0][0] >= 1/5 * (location[1][0]-location[0][0]):
+                    need_split_index.append(j)
+        print('need_split_index', need_split_index)
+
+        if not need_split_index:
+            temp_location_list.append(location)
+            temp_area_row_list.append(sub_row_list)
+        else:
+            last_index = 0
+            need_split_index.append(len(sub_row_list))
+            for index in need_split_index:
+                if index == last_index:
+                    last_index = index + 1
+                    continue
+                if len(sub_row_list[last_index:index]) < 2:
+                    last_index = index + 1
+                    continue
+                temp_area_row_list.append(sub_row_list[last_index:index])
+
+                min_w = 10000
+                max_w = 0
+                min_h = 10000
+                max_h = 0
+                for row in sub_row_list[last_index:index]:
+                    for bbox in row:
+                        if bbox[0][0] < min_w:
+                            min_w = bbox[0][0]
+                        if bbox[0][1] < min_h:
+                            min_h = bbox[0][1]
+                        if bbox[2][0] > max_w:
+                            max_w = bbox[2][0]
+                        if bbox[2][1] > max_h:
+                            max_h = bbox[2][1]
+                temp_location_list.append([[int(min_w), int(min_h)], [int(max_w), int(max_h)]])
+                last_index = index+1
+
+    return temp_location_list, temp_area_row_list
+
+
+def split_table_by_col(table_location_list, area_table_bbox_list, bbox_text_dict):
+    for i in range(len(table_location_list)):
+        location = table_location_list[i]
+        table_bbox_list = area_table_bbox_list[i]
+
+        # 循环每一行
+        split_index_list = []
+        for j in range(1, len(table_bbox_list)):
+            row = table_bbox_list[j]
+            last_row = table_bbox_list[j-1]
+            row_bbox_cnt_list = [len(x) for x in row]
+            last_row_bbox_cnt_list = [len(x) for x in last_row]
+            diff_num = 0
+            diff_flag = 0
+            for k in range(len(row_bbox_cnt_list)):
+                if row_bbox_cnt_list[k] > last_row_bbox_cnt_list[k]:
+                    if last_row_bbox_cnt_list[k] != 0:
+                        diff_flag = 1
+                    diff_num += 1
+            if diff_num > 0 and diff_flag:
+                split_index_list.append(j)
+                continue
+        print('split_index_list', split_index_list)
+
+    return table_location_list
+
+
+# def split_table_by_table_head(table_location_list, area_table_bbox_list, bbox_text_dict):
+#     new_table_location_list = []
+#     for i in range(len(table_location_list)):
+#         location = table_location_list[i]
+#         table_bbox_list = area_table_bbox_list[i]
+#
+#         # 每行单独进行表头预测
+#         table_head_row_list = []
+#         for j in range(len(table_bbox_list)):
+#             row = table_bbox_list[j]
+#             print('row', row)
+#
+#             if row.count([]) == len(row):
+#                 table_head_row_list.append([['', 0]])
+#                 continue
+#
+#             row_bbox_list = []
+#             for col in row:
+#                 for b in col:
+#                     new_b = bbox_text_dict.get(str(b))
+#                     new_b = re.sub("^[^\u4e00-\u9fa5a-zA-Z0-9]+", "", new_b)
+#                     new_b = re.sub("[^\u4e00-\u9fa5a-zA-Z0-9]+$", "", new_b)
+#                     row_bbox_list.append(new_b)
+#             result_list = predict([row_bbox_list])
+#             # 组合结果
+#             for m in range(len(result_list)):
+#                 for n in range(len(result_list[m])):
+#                     result_list[m][n] = [row_bbox_list[n], int(result_list[m][n])]
+#             result_list = result_list[0]
+#             print('table_head', result_list)
+#             table_head_row_list.append(result_list)
+#
+#         # 根据表头分割
+#         split_index_list = []
+#         for j in range(1, len(table_head_row_list)):
+#             row_head = [x[1] for x in table_head_row_list[j]]
+#             last_row_head = [x[1] for x in table_head_row_list[j-1]]
+#
+#             # [['6', 0], ['税费', 0], ['依法缴纳', 0], ['1', 0], ['次', 0], ['25000', 0], ['25000', 0]]
+#             # [['大写', 1], ['肆抢柒万元整', 0]]
+#             if 1 in row_head and 1 not in last_row_head:
+#                 split_index_list.append(j)
+#
+#             # [['供应商', 1], ['广东一线达通网络科技有限公司', 0]]
+#             # [['货物明细', 1], ['单价金额(元', 1], ['数量', 1], ['总计金额(元', 1]]
+#             if 1 in row_head and 1 in last_row_head and 0 not in row_head and row_head.count(1) != last_row_head.count(1):
+#                 split_index_list.append(j)
+#         print('split_index_list', split_index_list)
+#
+#         new_location_list = table_split_by_index(location, split_index_list, table_bbox_list)
+#         print('new_location_list, location', new_location_list, location)
+#         new_table_location_list += new_location_list
+#     print('new_table_location_list', new_table_location_list)
+#     return new_table_location_list
+
+
+def table_split_by_index(table_location, split_index_list, table_bbox_list):
+    if split_index_list:
+        # 分割表格
+        split_index_list = [0] + split_index_list + [len(table_bbox_list)]
+        split_index_list = list(set(split_index_list))
+        split_index_list.sort(key=lambda x: x)
+
+        print('split_index_list', split_index_list)
+        new_location_list = []
+        for l in range(1, len(split_index_list)):
+            index = split_index_list[l]
+            last_index = split_index_list[l-1]
+            # if index - last_index <= 2:
+            #     continue
+
+            # 获取范围
+            rows = table_bbox_list[last_index:index]
+            min_h, min_w = 10000, 10000
+            max_h, max_w = 0, 0
+            for r in rows:
+                for c in r:
+                    for b in c:
+                        if b:
+                            if b[0][0] < min_w:
+                                min_w = int(b[0][0])
+                            if b[0][1] < min_h:
+                                min_h = int(b[0][1])
+                            if b[2][0] > max_w:
+                                max_w = int(b[2][0])
+                            if b[2][1] > max_h:
+                                max_h = int(b[2][1])
+
+            new_location = [[min_w, min_h], [max_w, max_h]]
+            new_location_list.append(new_location)
+            print('new_location', new_location)
+        if new_location_list:
+            return new_location_list
+        else:
+            return [table_location]
+    else:
+        return [table_location]
+
+
+def split_table_new(table_location_list, area_table_bbox_list, area_table_cell_list, area_row_list, bbox_text_dict):
+    temp_location_list = []
+    temp_area_row_list = []
+    for k in range(len(table_location_list)):
+        table = area_table_bbox_list[k]
+        location = table_location_list[k]
+        row_list = area_row_list[k]
+        table_cell_list = area_table_cell_list[k]
+        split_row_index_list = []
+
+        # 遍历所有行
+        for i in range(len(table)):
+            row = table[i]
+            # print('row', i)
+            # for j in range(len(row)):
+            #     col = row[j]
+            #     print('col', j, ';'.join([bbox_text_dict.get(str(x)) for x in col]))
+
+            # 判断该行为表格分割行:
+            # 1. 这一行只有一列有值,且这一行的这一列中的最长的bbox,包含其他行同列中的2个以上bbox
+            # 2. 这一行只有一列有值,且这一行的这一列中的最长的bbox,跨过了多列
+
+            # 获取前后多行
+            n = 3
+            if i-n < 0:
+                last_n_rows = table[0:i]
+            else:
+                last_n_rows = table[i-n:i]
+            if i+1 >= len(table):
+                next_n_rows = []
+            elif i+n+1 >= len(table):
+                next_n_rows = table[i+1:len(table)]
+            else:
+                next_n_rows = table[i+1:i+n+1]
+
+            # 寻找一行只有一格有数据的
+            not_empty_col_cnt = 0
+            only_one_index = -1
+            for j in range(len(row)):
+                col = row[j]
+                if col:
+                    not_empty_col_cnt += len(col)
+                    only_one_index = j
+
+            if not_empty_col_cnt == 1:
+                print('only_one_index, i', only_one_index, i)
+                # 对比前后n行的同一列数据
+                for r in last_n_rows+next_n_rows:
+                    col = r[only_one_index]
+                    if len(col) > 1:
+                        print('col', [bbox_text_dict.get(str(x)) for x in col])
+                        # 找出其他行同一单元格中包含多个横向排列bbox的
+                        col_bbox_list = [col[0]]
+                        for bbox in col:
+                            for j in range(len(col_bbox_list)):
+                                bbox1 = col_bbox_list[j]
+                                if bbox1[0][0] <= bbox[0][0] <= bbox[2][0] <= bbox1[2][0]:
+                                    col_bbox_list[j] = bbox
+                                elif bbox[0][0] <= bbox1[0][0] <= bbox1[2][0] <= bbox[2][0]:
+                                    continue
+                                else:
+                                    col_bbox_list.append(bbox)
+                        if len(col_bbox_list) > 1:
+                            # 找出这一行同列最长的bbox,有没有包含其他行同列的多个bbox
+                            col = row[only_one_index]
+                            print('long col', [bbox_text_dict.get(str(x)) for x in col])
+                            col.sort(key=lambda x: abs(x[2][0]-x[0][0]))
+                            longest_bbox = col[-1]
+                            contain_cnt = 0
+                            cross_cnt = 0
+                            for bbox in col_bbox_list:
+                                if longest_bbox[0][0] <= bbox[0][0] <= bbox[2][0] <= longest_bbox[2][0]:
+                                    contain_cnt += 1
+                                if bbox[0][0] < longest_bbox[0][0] < bbox[2][0] or bbox[0][0] < longest_bbox[2][0] < bbox[2][0]:
+                                    cross_cnt += 1
+                            print('cross_cnt', cross_cnt)
+                            if contain_cnt >= 2 or cross_cnt >= 2:
+                                print('包含多个横向排列bbox', i)
+                                split_row_index_list.append(i)
+
+                # 看这一行这一列最长bbox有无跨单元格
+                col = row[only_one_index]
+                col.sort(key=lambda x: abs(x[2][0]-x[0][0]))
+                longest_bbox = col[-1]
+                cell_row = table_cell_list[i]
+                cell_col = cell_row[only_one_index]
+                threshold = 15
+
+                if cell_col[0][0]-threshold <= longest_bbox[0][0] <= longest_bbox[2][0] <= cell_col[1][0]+threshold:
+                    pass
+                else:
+                    print('最长bbox跨单元格', i)
+                    split_row_index_list.append(i)
+
+        if split_row_index_list:
+            # 分割表格
+            split_row_index_list.insert(0, -1)
+            split_row_index_list.insert(len(split_row_index_list), len(table))
+            split_row_index_list = list(set(split_row_index_list))
+            split_row_index_list.sort(key=lambda x: x)
+            print('split_row_index_list', split_row_index_list, len(table))
+            for l in range(1, len(split_row_index_list)):
+                index = split_row_index_list[l]
+                last_index = split_row_index_list[l-1]
+                if index - last_index <= 2:
+                    continue
+                start_row_index = last_index+1
+                end_row_index = index-1
+                start_row = table[last_index+1]
+                end_row = table[index-1]
+                start_row = [x for y in start_row for x in y]
+                end_row = [x for y in end_row for x in y]
+                start_row = list(filter(lambda x: x != [], start_row))
+                end_row = list(filter(lambda x: x != [], end_row))
+                if not start_row:
+                    start_row_index = last_index + 2
+                    start_row = table[start_row_index]
+                    start_row = [x for y in start_row for x in y]
+                    start_row = list(filter(lambda x: x != [], start_row))
+                if not end_row:
+                    end_row_index = index - 2
+                    end_row = table[end_row_index]
+                    end_row = [x for y in end_row for x in y]
+                    end_row = list(filter(lambda x: x != [], end_row))
+                if not start_row or not end_row or end_row_index-start_row_index < 1:
+                    continue
+
+                start_row.sort(key=lambda x: x[0][1])
+                min_h = int(start_row[0][0][1])
+                min_w = location[0][0]
+                end_row.sort(key=lambda x: x[2][1])
+                max_h = int(end_row[-1][2][1])
+                max_w = location[1][0]
+                new_location = [[min_w, min_h], [max_w, max_h]]
+                temp_location_list.append(new_location)
+                temp_area_row_list.append(row_list[last_index+1:index])
+        else:
+            temp_location_list.append(location)
+            temp_area_row_list.append(row_list)
+
+    table_location_list = temp_location_list
+    area_row_list = temp_area_row_list
+    return table_location_list, area_row_list
+
+
+def split_table_new2(table_location_list, area_table_bbox_list, area_table_cell_list, area_row_list, bbox_text_dict):
+    temp_location_list = []
+    temp_area_row_list = []
+    for k in range(len(table_location_list)):
+        table = area_table_bbox_list[k]
+        location = table_location_list[k]
+        row_list = area_row_list[k]
+        table_cell_list = area_table_cell_list[k]
+        split_row_index_list = []
+
+        # 遍历所有行
+        table_start_row_index = 0
+        for i in range(len(table)):
+            row = table[i]
+            # 判断该行为表格分割行:
+            # 1. 这一行只有一列有值,且这一行的这一列中的最长的bbox,包含其他行同列中的2个以上bbox
+            # 2. 这一行只有一列有值,且这一行的这一列中的最长的bbox,跨过了多列
+
+            # print(i, [bbox_text_dict.get(str(y)) for x in row for y in x])
+
+            # 每次找到分割行,更新
+            if table_start_row_index >= len(table):
+                break
+
+            # 获取前n行
+            n = 2
+            if i-n < table_start_row_index:
+                last_n_rows = table[table_start_row_index:i]
+            else:
+                last_n_rows = table[i-n:i]
+
+            # 寻找一行中最长的bbox
+            max_len_bbox = []
+            for col in row:
+                for b in col:
+                    if not max_len_bbox:
+                        max_len_bbox = b
+                    else:
+                        if b[2][0] - b[0][0] > max_len_bbox[2][0]-max_len_bbox[0][0]:
+                            max_len_bbox = b
+
+            # 对比前n行的数据
+            for r in last_n_rows:
+                b_list = [y for x in r for y in x]
+                # 第n行中的非上下重合的bbox
+                temp_b_list = []
+                for b in b_list:
+                    if not temp_b_list:
+                        temp_b_list.append(b)
+                    else:
+                        find_flag = 0
+                        for tb in temp_b_list:
+                            if line_overlap(tb[0][0], tb[2][0], b[0][0], b[2][0]) > 0:
+                                find_flag = 1
+                                break
+                        if not find_flag:
+                            temp_b_list.append(b)
+                b_list = temp_b_list
+
+                if len(b_list) > 1 and max_len_bbox:
+                    # 最长bbox是否包含第n行多个bbox
+                    contain_cnt = 0
+                    for b in b_list:
+                        threshold = (b[2][0]-b[0][0])/4
+                        if max_len_bbox[0][0] <= b[0][0] <= b[2][0] <= max_len_bbox[2][0]:
+                            contain_cnt += 1
+                        if b[0][0]+threshold < max_len_bbox[0][0] < b[2][0]-threshold \
+                                or b[0][0]+threshold < max_len_bbox[2][0] < b[2][0]-threshold:
+                            contain_cnt += 1
+                    # print('contain_cnt', contain_cnt)
+                    if contain_cnt >= 2:
+                        # print('包含多个横向排列bbox', i)
+                        split_row_index_list.append(i)
+                        table_start_row_index = i+1
+
+        if split_row_index_list:
+            # 分割表格
+            split_row_index_list.insert(0, -1)
+            split_row_index_list.insert(len(split_row_index_list), len(table))
+            split_row_index_list = list(set(split_row_index_list))
+            split_row_index_list.sort(key=lambda x: x)
+            print('split_row_index_list', split_row_index_list, len(table))
+            for l in range(1, len(split_row_index_list)):
+                index = split_row_index_list[l]
+                last_index = split_row_index_list[l-1]
+                if index - last_index <= 2:
+                    continue
+                start_row_index = last_index+1
+                end_row_index = index-1
+                start_row = table[last_index+1]
+                end_row = table[index-1]
+                start_row = [x for y in start_row for x in y]
+                end_row = [x for y in end_row for x in y]
+                start_row = list(filter(lambda x: x != [], start_row))
+                end_row = list(filter(lambda x: x != [], end_row))
+                if not start_row:
+                    start_row_index = last_index + 2
+                    start_row = table[start_row_index]
+                    start_row = [x for y in start_row for x in y]
+                    start_row = list(filter(lambda x: x != [], start_row))
+                if not end_row:
+                    end_row_index = index - 2
+                    end_row = table[end_row_index]
+                    end_row = [x for y in end_row for x in y]
+                    end_row = list(filter(lambda x: x != [], end_row))
+                if not start_row or not end_row or end_row_index-start_row_index < 1:
+                    continue
+
+                start_row.sort(key=lambda x: x[0][1])
+                min_h = int(start_row[0][0][1])
+                min_w = location[0][0]
+                end_row.sort(key=lambda x: x[2][1])
+                # print('end_row', [bbox_text_dict.get(str(x)) for x in end_row])
+                max_h = int(end_row[-1][2][1])
+                max_w = location[1][0]
+                new_location = [[min_w, min_h], [max_w, max_h]]
+                temp_location_list.append(new_location)
+                temp_area_row_list.append(row_list[start_row_index:end_row_index+1])
+        else:
+            temp_location_list.append(location)
+            temp_area_row_list.append(row_list)
+
+    table_location_list = temp_location_list
+    area_row_list = temp_area_row_list
+    return table_location_list, area_row_list
+
+
+def delete_not_standard_table(img, area_row_list, area_col_list, table_location_list, bbox_list, bbox_text_dict):
+    table_standard_list = []
+    for i in range(len(table_location_list)):
+        row_list = area_row_list[i]
+        col_list = area_col_list[i]
+        location = table_location_list[i]
+        table_standard = True
+
+        # 1. 只有单行或单列
+        if len(row_list) <= 1 or len(col_list) <= 1:
+            table_standard = False
+            table_standard_list.append(table_standard)
+            continue
+
+        # 1. 单个单元格过多bbox
+        for row in row_list:
+            for col in col_list:
+                inter = [j for j in row if j in col]
+                inter = [eval(x) for x in list(set([str(x) for x in inter]))]
+                if len(inter) >= 8:
+                    table_standard = False
+                    break
+
+        # 1. 判断表格中,不在bbox中的黑色像素
+        table_black_cnt = count_black(img[location[0][1]:location[1][1], location[0][0]:location[1][0], :])
+        bbox_black_cnt = 0
+        for bbox in bbox_list:
+            if location[0][1] <= bbox[0][1] <= location[1][1]:
+                sub_img = img[int(bbox[0][1]):int(bbox[2][1]), int(bbox[0][0]):int(bbox[2][0]), :]
+                if sub_img.shape[0] >= 3 and sub_img.shape[1] >= 3:
+                    bbox_black_cnt += count_black(sub_img)
+        print('table_black_cnt, bbox_black_cnt', table_black_cnt, bbox_black_cnt, bbox_black_cnt / table_black_cnt)
+        if bbox_black_cnt / table_black_cnt < 0.5:
+            table_standard = False
+
+        table_standard_list.append(table_standard)
+    print('table_standard_list', table_standard_list)
+    return table_standard_list
+
+
+def bbox_preprocess(bbox_list, text_list, row_list, bbox_text_dict):
+    # 合并同一行中多个单字bbox
+
+    for row in row_list:
+        single_bbox_list = []
+        row.sort(key=lambda x: x[0][0])
+        i = 0
+        for bbox in row:
+            if len(bbox_text_dict.get(str(bbox))) == 1 and i != len(row) - 1:
+                single_bbox_list.append(bbox)
+            else:
+                if len(single_bbox_list) >= 3:
+                    if len(bbox_text_dict.get(str(bbox))) == 1:
+                        single_bbox_list.append(bbox)
+                    new_bbox = single_bbox_list[0]
+                    new_text = ""
+                    single_bbox_list.sort(key=lambda x: x[0][0])
+                    for b in single_bbox_list:
+                        new_bbox = [[new_bbox[0][0], new_bbox[0][1]],
+                                    [b[2][0], new_bbox[0][1]],
+                                    [b[2][0], b[2][1]],
+                                    [new_bbox[0][0], b[2][1]],
+                                    ]
+                        bbox_list.remove(b)
+                        new_text += bbox_text_dict.get(str(b))
+                        text_list.remove(bbox_text_dict.get(str(b)))
+                    # print('new_bbox, new_text', new_bbox, new_text)
+                    bbox_list.append(new_bbox)
+                    text_list.append(new_text)
+                    bbox_text_dict[str(new_bbox)] = new_text
+                single_bbox_list = []
+            i += 1
+
+    return bbox_list, text_list, bbox_text_dict
+
+
+def merge_table(area_row_list, area_col_list, table_location_list, bbox_list):
+    table_location_list.sort(key=lambda x: x[0][1])
+    merge_index_list = []
+    temp_merge_list = []
+    for i in range(1, len(table_location_list)):
+        last_col_list = area_col_list[i-1]
+        col_list = area_col_list[i]
+        last_location = table_location_list[i-1]
+        location = table_location_list[i]
+        merge_flag = 0
+
+        # 获取每个列的宽度
+        col_width_list = []
+        for col in col_list:
+            col.sort(key=lambda x: x[0][0])
+            min_w = col[0][0][0]
+            col.sort(key=lambda x: x[2][0])
+            max_w = col[-1][2][0]
+            col_width_list.append([min_w, max_w])
+
+        # 获取两个表格之间的bbox,判断bbox是否跨越多列
+        threshold = 5
+        merge_flag2 = 1
+        for bbox in bbox_list:
+            if last_location[1][1]-threshold <= bbox[0][1] <= bbox[2][1] <= location[0][1]+threshold:
+                if bbox[0][0] < col_width_list[0][0] or bbox[2][0] > col_width_list[-1][1]:
+                    merge_flag2 = 0
+                    break
+                for w in col_width_list:
+                    if w[0] <= bbox[0][0] <= w[1] and bbox[2][0] - bbox[0][0] > w[1] - w[0]:
+                        merge_flag2 = 0
+                        break
+
+        # if location[0][1] - last_location[1][1] <= 20:
+        if merge_flag2:
+            if len(last_col_list) == len(col_list):
+                temp_merge_list += [i-1, i]
+                merge_flag = 1
+        if not merge_flag:
+            if temp_merge_list:
+                merge_index_list.append(temp_merge_list)
+            else:
+                merge_index_list.append([i-1])
+            temp_merge_list = []
+
+    if temp_merge_list:
+        merge_index_list.append(temp_merge_list)
+    else:
+        merge_index_list.append([len(table_location_list)-1])
+
+    # print('merge_index_list', merge_index_list)
+
+    if not merge_index_list:
+        return area_row_list, area_col_list, table_location_list
+
+    new_table_location_list = []
+    new_area_row_list = []
+    new_area_col_list = []
+    for index_list in merge_index_list:
+        if not table_location_list:
+            break
+        index_list = list(set(index_list))
+        temp_table = table_location_list[index_list[0]]
+        new_area_row_list.append(area_row_list[index_list[0]])
+        new_area_col_list.append(area_col_list[index_list[0]])
+        for index in index_list[1:]:
+            temp_table = [[min(temp_table[0][0], table_location_list[index][0][0]),
+                           min(temp_table[0][1], table_location_list[index][0][1])],
+                          [max(temp_table[1][0], table_location_list[index][1][0]),
+                           max(temp_table[1][1], table_location_list[index][1][1])]
+                          ]
+            new_area_row_list[-1] += area_row_list[index]
+            new_area_col_list[-1] += area_col_list[index]
+        new_table_location_list.append(temp_table)
+
+    return new_area_row_list, new_area_col_list, new_table_location_list
+
+
+def add_col_lines(area_row_list, area_col_list, table_location_list, bbox_text_dict):
+    """
+    对单个单元格内多列的,增加列线
+
+    :return:
+    """
+    add_area_col_lines = []
+    for i in range(len(table_location_list)):
+        row_list = area_row_list[i]
+        col_list = area_col_list[i]
+        location = table_location_list[i]
+        add_col_lines = []
+
+        new_col_list = []
+        for col in col_list:
+            row_cnt = 0
+            new_row_list = []
+            cell_col_lines = []
+            col.sort(key=lambda x: (x[0][1], x[0][0]))
+            # print('col')
+            for row in row_list:
+                row.sort(key=lambda x: (x[0][0], x[0][1]))
+                inter = [j for j in row if j in col]
+                inter = [eval(x) for x in list(set([str(x) for x in inter]))]
+                inter.sort(key=lambda x: (x[0][1], x[0][0]))
+                new_row = []
+
+                print('inter', [bbox_text_dict.get(str(x)) for x in inter])
+
+                # if inter:
+                #     # 先将同个单元格内上下重叠的bbox合并
+                #     temp_inter = []
+                #
+                #     used_bbox_list = []
+                #     for bbox1 in inter:
+                #         if bbox1 in used_bbox_list:
+                #             continue
+                #         temp_merge_bbox = [bbox1]
+                #         for bbox2 in inter:
+                #             if bbox2 in used_bbox_list:
+                #                 continue
+                #             if line_overlap(bbox1[0][0], bbox1[2][0], bbox2[0][0], bbox2[2][0]) >= 2/3 * min(bbox1[2][0]-bbox1[0][0], bbox2[2][0], bbox2[0][0]) \
+                #                     and line_overlap(bbox1[0][1], bbox1[2][1], bbox2[0][1], bbox2[2][1]) > 0:
+                #                 temp_merge_bbox += [bbox1, bbox2]
+                #                 used_bbox_list += [bbox1, bbox2]
+                #         temp_merge_bbox = [eval(y) for y in list(set([str(x) for x in temp_merge_bbox]))]
+                #         temp_inter.append(temp_merge_bbox)
+                #
+                #     inter = []
+                #     for m_bbox in temp_inter:
+                #         min_w, min_h, max_w, max_h = 10000, 10000, 0, 0
+                #         temp_text = ""
+                #         for bbox in m_bbox:
+                #             if bbox[0][0] < min_w:
+                #                 min_w = bbox[0][0]
+                #             if bbox[0][1] < min_h:
+                #                 min_h = bbox[0][1]
+                #             if bbox[2][0] > max_w:
+                #                 max_w = bbox[2][0]
+                #             if bbox[2][1] > max_h:
+                #                 max_h = bbox[2][1]
+                #             temp_text += bbox_text_dict.get(str(bbox)) + ' '
+                #         inter.append([[min_w, min_h], [max_w, min_h], [max_w, max_h], [min_w, max_h]])
+                #         bbox_text_dict[str(inter[-1])] = temp_text
+                #     print('merge inter', [bbox_text_dict.get(str(x)) for x in inter])
+
+                # 一个单元格内多个bbox
+                if len(inter) > 1:
+                    # 单元格内分行
+                    cell_row = []
+                    temp_row = [inter[0]]
+                    row_len = [inter[0][0][1], inter[0][2][1]]
+                    for bbox in inter[1:]:
+                        temp_bbox = temp_row[0]
+                        bbox_h_len = bbox[2][1] - bbox[0][1]
+                        temp_bbox_h_len = temp_bbox[2][1] - temp_bbox[0][1]
+                        # if temp_bbox[0][1]-5 <= bbox[0][1] <= bbox[2][1] <= temp_bbox[2][1]+5 \
+                        #         or bbox[0][1]-5 <= temp_bbox[0][1] <= temp_bbox[2][1] <= bbox[2][1]+5 \
+                        if line_overlap(row_len[0], row_len[1], bbox[0][1], bbox[2][1]) >= 1/3 * min(bbox_h_len, temp_bbox_h_len):
+                            temp_row.append(bbox)
+                            row_len[0] = min(row_len[0], bbox[0][1])
+                            row_len[1] = max(row_len[1], bbox[2][1])
+                            # print('in row', bbox_text_dict.get(str(bbox)), bbox_text_dict.get(str(temp_bbox)),
+                            #       row_len[0], row_len[1], bbox[0][1], bbox[2][1],
+                            #       line_overlap(row_len[0], row_len[1], bbox[0][1], bbox[2][1]),
+                            #       1/3 * min(bbox_h_len, temp_bbox_h_len))
+                        else:
+                            # print('not in row', bbox_text_dict.get(str(bbox)), bbox_text_dict.get(str(temp_bbox)),
+                            #       line_overlap(row_len[0], row_len[1], bbox[0][1], bbox[2][1]), 1/3 * min(bbox_h_len, temp_bbox_h_len))
+                            # print(bbox_text_dict.get(str(bbox)), temp_bbox[2][1] - bbox[0][1], 1/2 * min(bbox_h_len, temp_bbox_h_len),
+                            #       bbox[2][1] - temp_bbox[0][1], 1/2 * min(bbox_h_len, temp_bbox_h_len),
+                            #       line_overlap(temp_bbox[0][1], temp_bbox[2][1], bbox[0][1], bbox[2][1]), 1/3 * min(bbox_h_len, temp_bbox_h_len),
+                            #       temp_bbox[0][1], temp_bbox[2][1], bbox[0][1], bbox[2][1], bbox_text_dict.get(str(temp_bbox)))
+                            cell_row.append(temp_row)
+                            temp_row = [bbox]
+                            row_len = [bbox[0][1], bbox[2][1]]
+                    if temp_row:
+                        cell_row.append(temp_row)
+
+                    print('row_cnt', row_cnt)
+                    for c in cell_row:
+                        c.sort(key=lambda x: x[0][0])
+                        print('cell_row', [bbox_text_dict.get(str(x)) for x in c])
+
+                    if row_cnt == 0:
+                        # 获取最大列数的列
+                        temp_cell_row = copy.deepcopy(cell_row)
+                        temp_cell_row.sort(key=lambda x: len(x))
+                        max_cell_row = temp_cell_row[-1]
+
+                        # 对行内上下堆叠的进行合并
+                        max_cell_row.sort(key=lambda x: (x[0][0], x[0][1]))
+                        used_bbox_list = []
+                        merge_bbox_list = []
+                        for bbox1 in max_cell_row:
+                            temp_merge_bbox = [bbox1]
+                            if bbox1 in used_bbox_list:
+                                continue
+                            for bbox2 in max_cell_row:
+                                if bbox2 in used_bbox_list:
+                                    continue
+                                if line_overlap(bbox1[0][0], bbox1[2][0], bbox2[0][0], bbox2[2][0]) >= 2/3 * min(bbox1[2][0]-bbox1[0][0], bbox2[2][0], bbox2[0][0]):
+                                    temp_merge_bbox.append(bbox2)
+                                    used_bbox_list += [bbox1, bbox2]
+
+                            # 选范围最大的bbox
+                            temp_merge_bbox.sort(key=lambda x: (x[2][0], -x[0][0]))
+                            merge_bbox_list.append(temp_merge_bbox[-1])
+
+                        temp_cell_row[-1] = merge_bbox_list
+                        print('temp_cell_row', [bbox_text_dict.get(str(x)) for x in temp_cell_row[-1]])
+                        # print('temp_cell_row', temp_cell_row[-1])
+                        for c in temp_cell_row[-1]:
+                            cell_col_lines.append([c[0][0], c[2][0]])
+
+                        cell_col_lines.sort(key=lambda x: x[0])
+                        for c in cell_col_lines:
+                            add_col_lines.append([[int(c[1]), location[0][1]], [int(c[1]), location[1][1]]])
+
+        add_area_col_lines.append(add_col_lines)
+        #             # 循环所有行,若跨行
+        #             cell_col_lines.sort(key=lambda x: x[0])
+        #             cell_row.sort(key=lambda x: (x[0][0], x[0][1]))
+        #             print('sorted cell_col_lines', cell_col_lines)
+        #             for r in cell_row:
+        #                 right_bbox = []
+        #                 for bbox in r:
+        #                     for k in range(len(cell_col_lines)):
+        #                         if k == 0:
+        #                             min_w = -10000
+        #                             if len(cell_col_lines) <= 1:
+        #                                 max_w = cell_col_lines[k][1]
+        #                             else:
+        #                                 max_w = cell_col_lines[k+1][0]
+        #                                         # + 1/4*(cell_col_lines[k+1][1]-cell_col_lines[k+1][0])
+        #                         elif k == len(cell_col_lines) - 1:
+        #                             max_w = 10000
+        #                             if len(cell_col_lines) <= 1:
+        #                                 min_w = cell_col_lines[k-1][1]
+        #                             else:
+        #                                 min_w = cell_col_lines[k][0]
+        #                         else:
+        #                             if len(cell_col_lines) <= 1:
+        #                                 min_w = -10000
+        #                                 max_w = 10000
+        #                             else:
+        #                                 min_w = cell_col_lines[k-1][1]
+        #                                 max_w = cell_col_lines[k+1][0]
+        #                                         # + 1/4*(cell_col_lines[k+1][1]-cell_col_lines[k+1][0])
+        #
+        #                         # 判断跨行
+        #                         if min_w <= bbox[0][0] <= bbox[2][0] <= max_w:
+        #                             new_row.append(bbox)
+        #                             right_bbox.append(bbox)
+        #                         # else:
+        #                         #     print(min_w, bbox[0][0], bbox[2][0], max_w,
+        #                         #           bbox_text_dict.get(str(bbox)))
+        #
+        #                 # 有跨行,该行舍弃
+        #                 if len(right_bbox) != len(r):
+        #                     for r1 in r:
+        #                         if r1 in new_row:
+        #                             new_row.remove(r1)
+        #
+        #         # 单元格只有一个bbox
+        #         else:
+        #             new_row = inter
+        #         print('new_row', [bbox_text_dict.get(str(x)) for x in new_row])
+        #         new_row.sort(key=lambda x: x[0][0])
+        #         new_row_list.append(new_row)
+        #         row_cnt += 1
+        #     new_col_list.append(new_row_list)
+        #
+        # new_row_list = [x for x in new_col_list[0]]
+        # for col in new_col_list[1:]:
+        #     for j in range(len(col)):
+        #         new_row_list[j] += col[j]
+        #
+        # temp_new_row_list = []
+        # for r in new_row_list:
+        #     if r:
+        #         temp_new_row_list.append(r)
+        #         print('new_row_list', [bbox_text_dict.get(str(x)) for x in r])
+        # new_row_list = temp_new_row_list
+        # area_row_list[i] = new_row_list
+    return add_area_col_lines
+
+
+def judge_col_lines(img, area_col_lines, table_location_list, bbox_list, bbox_text_dict):
+    new_area_col_lines = []
+    for i in range(len(table_location_list)):
+        location = table_location_list[i]
+        col_lines = area_col_lines[i]
+        col_lines.sort(key=lambda x: x[0][0])
+
+        sub_bbox_list = []
+        for bbox in bbox_list:
+            if location[0][1] <= bbox[0][1] <= location[1][1]:
+                sub_bbox_list.append(bbox)
+
+        # 判断线穿过bbox,那一行的线去掉
+        # temp_col_lines = []
+        # for c in col_lines:
+        #     for bbox in sub_bbox_list:
+
+        # 判断新增线有没有压在黑色像素上或有没有在bbox之间
+        # temp_col_lines = []
+        # for c in col_lines:
+        #     if c[1][1] >= img.shape[1] or c[0][1] <= 0:
+        #         continue
+        #
+        #     black_cnt = count_black(img[c[0][1]:c[1][1], c[0][0]:c[1][0]+1, :])
+        #     if black_cnt > 10:
+        #         continue
+        #     temp_col_lines.append(c)
+        # col_lines = temp_col_lines
+
+        # 判断两线之间有没有完整bbox
+        col_lines = [eval(y) for y in list(set([str(x) for x in col_lines]))]
+        col_lines.sort(key=lambda x: x[0][0])
+
+        threshold = 5
+        if not col_lines:
+            new_area_col_lines.append([])
+            continue
+        temp_col_lines = [col_lines[0]]
+        for j in range(1, len(col_lines)):
+            last_col_w = temp_col_lines[-1][0][0]
+            col_w = col_lines[j][0][0]
+            for bbox in sub_bbox_list:
+                if last_col_w-threshold <= bbox[0][0] <= bbox[2][0] <= col_w+threshold:
+                    temp_col_lines.append(col_lines[j])
+                    break
+        temp_col_lines.append(col_lines[-1])
+        col_lines = temp_col_lines
+
+        # 判断线穿过bbox,向右移动到空位
+        for col in col_lines:
+            cross_bbox_list = []
+            for bbox in sub_bbox_list:
+                if bbox[0][0] < col[0][0] < bbox[2][0]:
+                    cross_bbox_list.append(bbox)
+            if cross_bbox_list:
+                # cross_bbox_list.sort(key=lambda x: x[2][0], reverse=True)
+                # for bbox in cross_bbox_list:
+                #     line_now_w = col[0][0]
+                #     line_move_w = bbox[2][0]
+                #     find_flag = 0
+                #     for bbox1 in sub_bbox_list:
+                #         if bbox1 in cross_bbox_list:
+                #             continue
+                #         if line_now_w <= bbox1[0][0] <= line_move_w:
+                #             find_flag = 1
+                #             break
+                #
+                #     if not find_flag:
+                #         col[0][0] = int(line_move_w)
+                #         col[1][0] = int(line_move_w)
+                #         break
+                cross_bbox_list.sort(key=lambda x: x[2][0], reverse=True)
+                line_move_w = cross_bbox_list[0][2][0]
+                line_now_w = col[0][0]
+                for bbox1 in sub_bbox_list:
+                    if bbox1 in cross_bbox_list:
+                        continue
+                    if line_now_w <= bbox1[0][0] <= line_move_w:
+                        line_now_w = line_move_w
+                        line_move_w = bbox1[2][0]
+                col[0][0] = int(line_move_w)
+                col[1][0] = int(line_move_w)
+
+        # 将边框线加上
+        left_col = [[location[0][0], location[0][1]], [location[0][0], location[1][1]]]
+        right_col = [[location[1][0], location[0][1]], [location[1][0], location[1][1]]]
+        if left_col not in col_lines:
+            col_lines.append(left_col)
+        if right_col not in col_lines:
+            col_lines.append(right_col)
+
+        new_area_col_lines.append(col_lines)
+    return new_area_col_lines
+
+
+def add_row_lines(area_row_list, area_col_list, table_location_list, bbox_text_dict, area_row_lines):
+    add_area_row_lines = []
+    for i in range(len(table_location_list)):
+        row_list = area_row_list[i]
+        col_list = area_col_list[i]
+        location = table_location_list[i]
+        row_lines = area_row_lines[i]
+        add_row_lines = []
+        for row in row_list:
+            col_cnt = 0
+            row.sort(key=lambda x: (x[0][0], x[0][1]))
+
+            # # 只以第一列为标准
+            # first_col = col_list[0]
+            # first_col.sort(key=lambda x: (x[0][1], x[0][0]))
+            # inter = [j for j in row if j in first_col]
+            # inter = [eval(x) for x in list(set([str(x) for x in inter]))]
+            # inter.sort(key=lambda x: (x[0][1], x[0][0]))
+
+            # 所有列都参与
+            for col in col_list:
+                col.sort(key=lambda x: (x[0][1], x[0][0]))
+                inter = [j for j in row if j in col]
+                print('col', col_cnt, [bbox_text_dict.get(str(x)) for x in col], [bbox_text_dict.get(str(x)) for x in row])
+                inter = [eval(x) for x in list(set([str(x) for x in inter]))]
+                inter.sort(key=lambda x: (x[0][1], x[0][0]))
+                print('add_row_lines inter', [bbox_text_dict.get(str(x)) for x in inter])
+
+                if len(inter) > 0:
+                    # 单元格内分行
+                    cell_row = []
+                    temp_row = [inter[0]]
+                    row_len = [inter[0][0][1], inter[0][2][1]]
+                    for bbox in inter[1:]:
+                        temp_bbox = temp_row[0]
+                        bbox_h_len = bbox[2][1] - bbox[0][1]
+                        temp_bbox_h_len = temp_bbox[2][1] - temp_bbox[0][1]
+                        if line_overlap(row_len[0], row_len[1], bbox[0][1], bbox[2][1]) >= 1/3 * min(bbox_h_len, temp_bbox_h_len):
+                            temp_row.append(bbox)
+                            row_len[0] = min(row_len[0], bbox[0][1])
+                            row_len[1] = max(row_len[1], bbox[2][1])
+                        else:
+                            cell_row.append(temp_row)
+                            temp_row = [bbox]
+                            row_len = [bbox[0][1], bbox[2][1]]
+                    if temp_row:
+                        cell_row.append(temp_row)
+
+                    print('col_cnt', col_cnt)
+                    for c in cell_row:
+                        c.sort(key=lambda x: x[0][0])
+                        print('cell_row', [bbox_text_dict.get(str(x)) for x in c])
+
+                    # 对有多行的
+                    if len(cell_row) > 0:
+                        if len(cell_row) == 1:
+                            h = int(cell_row[0][0][2][1])
+                            add_row_lines.append([[location[0][0], h], [location[1][0], h]])
+                        for j in range(1, len(cell_row)):
+                            last_row = cell_row[j-1]
+                            row1 = cell_row[j]
+                            last_row.sort(key=lambda x: x[2][1])
+                            row1.sort(key=lambda x: x[0][1])
+                            find_flag = 0
+                            for l in row_lines:
+                                if last_row[-1][2][1] <= l[0][1] <= row1[0][0][1]:
+                                    find_flag = 1
+                                    break
+                            if not find_flag:
+                                h = int(last_row[-1][2][1])
+                                if j == 1:
+                                    last_row.sort(key=lambda x: x[0][1])
+                                    h += int(last_row[0][0][1] - location[0][1])
+                                else:
+                                    last_two_row = cell_row[j-2]
+                                    last_two_row.sort(key=lambda x: x[2][1])
+                                    last_row.sort(key=lambda x: x[0][1])
+                                    h += int(last_row[0][0][1] - last_two_row[-1][2][1])
+                                add_row_lines.append([[location[0][0], h], [location[1][0], h]])
+                col_cnt += 1
+        add_area_row_lines.append(add_row_lines)
+    return add_area_row_lines
+
+
+def judge_row_lines(img, area_row_lines, table_location_list, bbox_list, bbox_text_dict):
+    new_area_row_lines = []
+    for i in range(len(table_location_list)):
+        location = table_location_list[i]
+        row_lines = area_row_lines[i]
+
+        sub_bbox_list = []
+        for bbox in bbox_list:
+            if location[0][1] <= bbox[0][1] <= location[1][1]:
+                sub_bbox_list.append(bbox)
+
+        # 判断新增线有没有压在黑色像素上或有没有在bbox之间
+        # temp_row_lines = []
+        # for c in row_lines:
+        #     if c[1][1] >= img.shape[1] or c[0][1] <= 0:
+        #         continue
+        #     if c[1][1] > location[1][1] or c[1][1] < location[0][1]:
+        #         continue
+        #     black_cnt = count_black(img[c[0][1]:c[1][1]+1, c[0][0]:c[1][0], :])
+        #     if black_cnt > 10:
+        #         continue
+        #     temp_row_lines.append(c)
+        # row_lines = temp_row_lines
+        # if not row_lines:
+        #     new_area_row_lines.append([])
+        #     continue
+
+        # 判断线穿过bbox,向下移动到空位
+        row_lines.sort(key=lambda x: x[0][1])
+        for row in row_lines:
+            # 循环找出空位
+            while True:
+                cross_bbox_list = []
+                for bbox in sub_bbox_list:
+                    # if (bbox[0][1]+bbox[2][1])/2 < row[0][1] < bbox[2][1]:
+                    if bbox[0][1] < row[0][1] < bbox[2][1]:
+                        cross_bbox_list.append(bbox)
+                if cross_bbox_list:
+                    # print('row cross_bbox_list', [bbox_text_dict.get(str(x)) for x in cross_bbox_list])
+                    cross_bbox_list.sort(key=lambda x: x[2][1], reverse=True)
+                    line_move_h = cross_bbox_list[0][2][1]
+                    move_bbox = cross_bbox_list[0]
+                    row[0][1] = int(line_move_h)
+                    row[1][1] = int(line_move_h)
+                    # print('move_bbox', bbox_text_dict.get(str(move_bbox)))
+                else:
+                    break
+
+        # 判断两线之间有没有完整bbox
+        row_lines.sort(key=lambda x: x[0][1])
+        threshold = 5
+        temp_row_lines = [row_lines[0]]
+        for j in range(1, len(row_lines)):
+            last_row_w = temp_row_lines[-1][0][1]
+            row_w = row_lines[j][0][1]
+            for bbox in sub_bbox_list:
+                if last_row_w-threshold <= bbox[0][1] <= bbox[2][1] <= row_w+threshold:
+                    temp_row_lines.append(row_lines[j])
+                    break
+        temp_row_lines.append(row_lines[-1])
+        row_lines = temp_row_lines
+
+        # 将边框线加上
+        up_row = [[location[0][0], location[0][1]], [location[1][0], location[0][1]]]
+        bottom_row = [[location[0][0], location[1][1]], [location[1][0], location[1][1]]]
+        if up_row not in row_lines:
+            row_lines.append(up_row)
+        if bottom_row not in row_lines:
+            row_lines.append(bottom_row)
+
+        new_area_row_lines.append(row_lines)
+    return new_area_row_lines
+
+
+def merge_lines(lines, axis=0, threshold=5):
+    lines.sort(key=lambda x: x[0][1-axis])
+    used_lines = []
+    new_lines = copy.deepcopy(lines)
+    for line1 in lines:
+        if line1 in used_lines:
+            continue
+        current_line = line1
+        temp_merge_lines = [line1]
+        for line2 in lines:
+            if line2 in used_lines:
+                continue
+            if abs(current_line[0][1-axis] - line2[0][1-axis]) <= threshold:
+                temp_merge_lines.append(line2)
+                used_lines.append(line2)
+                current_line = line2
+
+        # 取最右或最下线
+        temp_merge_lines.sort(key=lambda x: x[0][1-axis])
+        for l in temp_merge_lines:
+            if l in new_lines:
+                new_lines.remove(l)
+        new_lines.append(temp_merge_lines[-1])
+
+    new_lines.sort(key=lambda x: x[0][1-axis])
+    return new_lines
+
+
+def merge_row_bbox_list(area_row_list):
+    new_area_row_list = []
+    for row_list in area_row_list:
+        new_row_list = copy.deepcopy(row_list)
+        # 针对表头有多行被分在不同行
+        for i in range(1, len(row_list)-1):
+            last_row = row_list[i-1]
+            row = row_list[i]
+            next_row = row_list[i+1]
+            merge_cnt = 0
+            for last_b in last_row:
+                find_flag1 = 0
+                for next_b in next_row:
+                    if line_overlap(last_b[0][0], last_b[2][0], next_b[0][0], next_b[2][0]) \
+                            >= 0.8*min(last_b[2][0] - last_b[0][0], next_b[2][0] - next_b[0][0]):
+                        find_flag1 = 1
+                        break
+                find_flag2 = 0
+                if find_flag1:
+                    for b in row:
+                        if line_overlap(last_b[0][0], last_b[2][0], b[0][0], b[2][0]) \
+                                >= 0.8*min(last_b[2][0] - last_b[0][0], b[2][0] - b[0][0]):
+                            find_flag2 = 1
+                            break
+                if find_flag1 and not find_flag2:
+                    merge_cnt += 1
+
+            if merge_cnt == len(last_row) and merge_cnt > 1:
+                new_row_list = new_row_list[:i-1] + [last_row+row+next_row] + new_row_list[i+2:]
+        new_area_row_list.append(new_row_list)
+    return new_area_row_list
+
+
+def count_black(image_np, threshold=150):
+    lower = np.array([0, 0, 0])
+    upper = np.array([threshold, threshold, threshold])
+    mask = cv2.inRange(image_np, lower, upper)
+    cnt = np.sum(mask != 0)
+    # print("count color ", cnt)
+    return cnt
+
+
+def get_bbox_list_by_lines(img, area_row_lines, area_col_lines, table_location_list, bbox_list, axis=0):
+    area_row_list = []
+    for i in range(len(table_location_list)):
+        row_lines = area_row_lines[i]
+        col_lines = area_col_lines[i]
+
+        # 求线交点
+        cross_points = get_points_by_line(img, row_lines, col_lines)
+
+        # 交点分行
+        cross_points.sort(key=lambda x: (x[1-axis], x[axis]))
+        row_point_list = []
+        current_row = [cross_points[0]]
+        for p in cross_points[1:]:
+            if current_row[0][1-axis] == p[1-axis]:
+                current_row.append(p)
+            else:
+                row_point_list.append(current_row)
+                current_row = [p]
+        if current_row:
+            row_point_list.append(current_row)
+
+        used_bbox_list = []
+        row_list = []
+        for j in range(1, len(row_point_list)):
+            last_row = row_point_list[j-1]
+            row = row_point_list[j]
+            sub_row_list = []
+            for k in range(1, len(row)):
+                last_p = last_row[k-1]
+                p = row[k]
+                for bbox in bbox_list:
+                    if bbox in used_bbox_list:
+                        continue
+                    bbox_h_center = (bbox[0][1-axis]+bbox[2][1-axis]) / 2
+                    bbox_w_center = (bbox[0][axis]+bbox[2][axis]) / 2
+                    if last_p[1-axis] <= bbox_h_center <= p[1-axis] and last_p[axis] <= bbox_w_center <= p[axis]:
+                        sub_row_list.append(bbox)
+                        used_bbox_list.append(bbox)
+            row_list.append(sub_row_list)
+
+        area_row_list.append(row_list)
+
+    return area_row_list
+
+
+def get_table_bbox_list(img, area_row_lines, area_col_lines, table_location_list, bbox_list):
+    area_table_bbox_list = []
+    area_table_cell_list = []
+    for i in range(len(table_location_list)):
+        row_lines = area_row_lines[i]
+        col_lines = area_col_lines[i]
+
+        # 求线交点
+        cross_points = get_points_by_line(img, row_lines, col_lines)
+
+        # 交点分行
+        cross_points.sort(key=lambda x: (x[1], x[0]))
+        row_point_list = []
+        if not cross_points:
+            area_table_bbox_list.append([])
+            area_table_cell_list.append([])
+            continue
+        current_row = [cross_points[0]]
+        for p in cross_points[1:]:
+            if current_row[0][1] == p[1]:
+                current_row.append(p)
+            else:
+                row_point_list.append(current_row)
+                current_row = [p]
+        if current_row:
+            row_point_list.append(current_row)
+
+        # bbox以表格格式排列
+        used_bbox_list = []
+        row_list = []
+        row_cell_list = []
+        for j in range(1, len(row_point_list)):
+            last_row = row_point_list[j-1]
+            row = row_point_list[j]
+            col_list = []
+            col_cell_list = []
+            for k in range(1, len(row)):
+                last_p = last_row[k-1]
+                p = row[k]
+                cell = []
+                for bbox in bbox_list:
+                    if bbox in used_bbox_list:
+                        continue
+                    bbox_h_center = (bbox[0][1]+bbox[2][1]) / 2
+                    bbox_w_center = (bbox[0][0]+bbox[2][0]) / 2
+                    if last_p[1] <= bbox_h_center <= p[1] and last_p[0] <= bbox_w_center <= p[0]:
+                        cell.append(bbox)
+                        used_bbox_list.append(bbox)
+                col_list.append(cell)
+                col_cell_list.append([last_p, p])
+            row_list.append(col_list)
+            row_cell_list.append(col_cell_list)
+
+        area_table_bbox_list.append(row_list)
+        area_table_cell_list.append(row_cell_list)
+    return area_table_bbox_list, area_table_cell_list
+
+
+def get_lines_from_img(img):
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+    # 开操作提取水平线
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 1))
+    binary1 = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
+    # cv2.imshow('7,1', binary1)
+
+    # 开操作提取垂直线
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 7))
+    binary2 = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
+    # cv2.imshow('1,7', binary2)
+    #
+    # cv2.imshow('table', binary1+binary2)
+    # cv2.waitKey(0)
+
+    return binary1, binary2
+
+
+def get_bbox_by_img(row_img, col_img):
+    # cv2.imshow('table', row_img+col_img)
+
+    # 求线交点
+    point_img = np.bitwise_and(row_img, col_img)
+
+    # cv2.imshow('point_img', point_img)
+    # cv2.waitKey(0)
+
+    # 识别黑白图中的白色交叉点,将横纵坐标取出
+    ys, xs = np.where(point_img > 200)
+    cross_points = []
+    for i in range(len(xs)):
+        cross_points.append((xs[i], ys[i]))
+
+    cross_points.sort(key=lambda x: (x[0], x[1]))
+    temp_cross_points = []
+    for p1 in cross_points:
+        find_flag = 0
+        for p2 in temp_cross_points:
+            if abs(p1[1] - p2[1]) <= 5 and abs(p1[0] - p2[0]) <= 5:
+                find_flag = 1
+                break
+        if not find_flag:
+            temp_cross_points.append(p1)
+    cross_points = temp_cross_points
+
+    if not cross_points:
+        return [], False
+
+    print('cross_points', len(cross_points))
+
+    axis = 0
+    # 交点分行
+    cross_points.sort(key=lambda x: (x[1-axis], x[axis]))
+    row_point_list = []
+    current_row = [cross_points[0]]
+    for p in cross_points[1:]:
+        if abs(current_row[0][1-axis] - p[1-axis]) <= 5:
+            current_row.append(p)
+        else:
+            current_row.sort(key=lambda x: x[0])
+            row_point_list.append(current_row)
+            current_row = [p]
+    if current_row:
+        current_row.sort(key=lambda x: x[0])
+        row_point_list.append(current_row)
+
+    row_len = len(row_point_list[0])
+    for row in row_point_list:
+        # print('row_point_list row', len(row), row)
+        if row_len != len(row):
+            return [], False
+
+    row_list = []
+    standard_flag = True
+    for j in range(1, len(row_point_list)):
+        last_row = row_point_list[j-1]
+        row = row_point_list[j]
+        sub_row = []
+        for k in range(1, len(row)):
+            if k-1 >= len(last_row):
+                # print(len(last_row), len(row))
+                standard_flag = False
+                break
+            last_p = last_row[k-1]
+            p = row[k]
+            bbox = [last_p, p]
+            sub_row.append(bbox)
+        row_list.append(sub_row)
+
+    if not row_list:
+        return [], False
+
+    row_len = len(row_list[0])
+    for row in row_list:
+        if len(row) != row_len:
+            standard_flag = False
+            break
+
+    print('standard_flag', standard_flag)
+    if standard_flag:
+        new_img = np.zeros((row_img.shape[0], row_img.shape[1], 3), dtype=np.uint8)
+
+        # for row in row_list:
+        #     for b in row:
+        #         print('b', b)
+        #         cv2.rectangle(new_img, [int(b[0][0]), int(b[0][1])], [int(b[1][0]), int(b[1][1])],
+        #                       (0, 0, 255), 1)
+        #         cv2.imshow('new_img', new_img)
+        #         cv2.waitKey(0)
+    return row_list, standard_flag
+
+
+def get_points_by_line(img, row_lines, col_lines):
+    row_img = np.zeros_like(img[:, :, 0], dtype=np.uint8)
+    col_img = np.zeros_like(img[:, :, 0], dtype=np.uint8)
+    for r in row_lines:
+        cv2.line(row_img, r[0], r[1], (255, 255, 255), 1)
+    for c in col_lines:
+        cv2.line(col_img, c[0], c[1], (255, 255, 255), 1)
+
+    point_img = np.bitwise_and(row_img, col_img)
+
+    # 识别黑白图中的白色交叉点,将横纵坐标取出
+    ys, xs = np.where(point_img > 0)
+    points = []
+    for i in range(len(xs)):
+        points.append((xs[i], ys[i]))
+    points.sort(key=lambda x: (x[0], x[1]))
+    return points
+
+
+def merge_text_and_table(text_bbox_list, table_row_list):
+    used_bbox_list = []
+    row_list = []
+    for row in table_row_list:
+        sub_row_list = []
+        row.sort(key=lambda x: x[0][0])
+        for bbox1 in row:
+            sub_bbox_list = []
+            for bbox2 in text_bbox_list:
+                if bbox2 in used_bbox_list:
+                    continue
+                bbox_h_center = (bbox2[0][1]+bbox2[2][1]) / 2
+                bbox_w_center = (bbox2[0][0]+bbox2[2][0]) / 2
+                if bbox1[0][1] <= bbox_h_center <= bbox1[1][1] and bbox1[0][0] <= bbox_w_center <= bbox1[1][0]:
+                    sub_bbox_list.append(bbox2)
+                    used_bbox_list.append(bbox2)
+            sub_row_list.append(sub_bbox_list)
+        row_list.append(sub_row_list)
+    return row_list
+
+
+def shrink_bbox(img, bbox_list):
+    def return_first_black_index(image_np):
+        lower = np.array([0, 0, 0])
+        upper = np.array([150, 150, 150])
+        mask = cv2.inRange(image_np, lower, upper)
+        black_index_list = np.where(mask != 0)
+        return black_index_list
+    new_bbox_list = []
+    for bbox in bbox_list:
+        img_bbox = img[int(bbox[0][1]):int(bbox[2][1]), int(bbox[0][0]):int(bbox[2][0]), :]
+
+        if 0 in img_bbox.shape:
+            new_bbox_list.append(bbox)
+            continue
+
+        # 左右上下开始扫描,碰到黑像素即停
+        index_list = return_first_black_index(img_bbox[:, :, :])
+        if index_list[0].size == 0 or index_list[1].size == 0:
+            new_bbox_list.append(bbox)
+            continue
+        min_h = index_list[0][0]
+        max_h = index_list[0][-1]
+
+        img_bbox1 = np.swapaxes(img_bbox, 0, 1)
+        index_list = return_first_black_index(img_bbox1[:, :, :])
+        if index_list[0].size == 0 or index_list[1].size == 0:
+            new_bbox_list.append(bbox)
+            continue
+        min_w = index_list[0][0]
+        max_w = index_list[0][-1]
+
+        real_min_w = bbox[0][0] + min_w
+        real_max_w = bbox[0][0] + max_w
+        real_min_h = bbox[0][1] + min_h
+        real_max_h = bbox[0][1] + max_h
+        new_bbox = [[real_min_w, real_min_h], [real_min_w, real_max_h], [real_max_w, real_max_h], [real_max_w, real_min_h]]
+        new_bbox_list.append(new_bbox)
+
+        # cv2.imshow('img', img_bbox)
+        # cv2.imshow('shrink', img[int(new_bbox[0][1]):int(new_bbox[2][1]), int(new_bbox[0][0]):int(new_bbox[2][0]), :])
+        # cv2.waitKey(0)
+    return new_bbox_list
+
+
+def affinity_propagation(data_list):
+    """
+    聚类:近邻传播
+
+    :return:
+    """
+    data_np = np.array(data_list)
+    random_state = 170
+
+    model = AffinityPropagation(damping=0.5, convergence_iter=15, random_state=random_state).fit(data_np)
+    # cluster_centers_indices = model.cluster_centers_indices_
+    y_pred = model.labels_
+
+    if y_pred[0] == -1:
+        print('ap dp0.5 ci50')
+        model = AffinityPropagation(convergence_iter=50, random_state=random_state).fit(data_np)
+        y_pred = model.labels_
+
+    if y_pred[0] == -1:
+        print('ap dp0.7 ci15')
+        model = AffinityPropagation(damping=0.7, convergence_iter=15, random_state=random_state).fit(data_np)
+        y_pred = model.labels_
+
+    if y_pred[0] == -1:
+        print('ap dp0.7 ci50')
+        model = AffinityPropagation(damping=0.7, convergence_iter=50, random_state=random_state).fit(data_np)
+        y_pred = model.labels_
+
+    if y_pred[0] == -1:
+        print('all -1')
+        y_pred = np.zeros(y_pred.shape[0])
+
+    y_pred = y_pred.tolist()
+    return y_pred
+
+
+def dbscan(data_list):
+    """
+    聚类:dbscan
+
+    :return:
+    """
+    data_np = np.array(data_list)
+    model = DBSCAN(eps=3, min_samples=2).fit(data_np)
+    y_pred = model.labels_
+    y_pred = y_pred.tolist()
+    return y_pred
+
+
+def test_ocr_model(img_path):
+    with open(img_path, "rb") as f:
+        file_bytes = f.read()
+    file_base64 = base64.b64encode(file_bytes)
+    file_json = {"data": file_base64, "md5": 0}
+
+    _url = "http://192.168.2.103:17000/ocr"
+    # _url = "http://127.0.0.1:17000/ocr"
+
+    result = json.loads(request_post(_url, file_json))
+    return result
+
+
+def test_cho_model(text):
+    # text = "巧克力"
+    text = [x for x in text]
+    data_json = {"data": json.dumps(text)}
+    _url = "http://192.168.2.103:17058/cho"
+    result = json.loads(request_post(_url, data_json))
+    if result.get("success"):
+        decode_list = result.get("data")
+        print("char_list", text)
+        print("decode_list", decode_list)
+        return decode_list
+    else:
+        print("failed!")
+
+
+if __name__ == '__main__':
+    get_table_new()
+
+    # _l = [[18, 0], [0, 0], [14, 0], [0, 0], [12, 0], [0, 0], [14, 0], [2, 0], [15, 0], [0, 0]]
+    # # _l = [[27, 0], [26, 0], [17, 0]]
+    # print(affinity_propagation(_l))
+    # print(dbscan(_l))
+
+    # _img = cv2.imread(r'C:\Users\Administrator\Desktop\111.jpg')
+    # shrink_bbox(_img, [[[0, 0], [0, 0], [_img.shape[1], _img.shape[0]], [_img.shape[1], _img.shape[0]]]])

+ 367 - 0
botr/rules/get_table_by_rules.py

@@ -0,0 +1,367 @@
+import copy
+import cv2
+import numpy as np
+from botr.rules.table_utils import shrink_bbox, split_bbox, get_table_bbox_list, count_black
+from botr.utils import line_iou
+# from format_convert.utils import log
+
+
+def get_table_by_rule(img, text_list, bbox_list, table_location, is_test=0):
+    if not bbox_list:
+        return [], [], []
+
+    # 处理bbox,缩小框
+    bbox_list = shrink_bbox(img, bbox_list)
+
+    # 创建对应dict
+    bbox_text_dict = {}
+    for i in range(len(text_list)):
+        bbox_text_dict[str(bbox_list[i])] = text_list[i]
+
+    # print('bbox_text_dict', bbox_text_dict)
+
+    # 根据bbox_list,计算与table_location左上角坐标距离,锁定第一个bbox
+    table_left_up_point = [table_location[0], table_location[1]]
+    min_distance = 100000000000
+    first_bbox = bbox_list[0]
+    for bbox in bbox_list:
+        distance = abs(bbox[0][0] - table_left_up_point[0]) + abs(bbox[0][1] - table_left_up_point[1])
+        if distance < min_distance:
+            min_distance = distance
+            first_bbox = bbox
+
+    # print('first_bbox', first_bbox, bbox_text_dict.get(str(first_bbox)))
+
+    # # 对first_bbox预处理
+    # # 分割
+    # new_bbox_list, bbox_text_dict = split_bbox(img, first_bbox, bbox_text_dict)
+    # if new_bbox_list:
+    #     if first_bbox in bbox_list:
+    #         bbox_list.remove(first_bbox)
+    #     bbox_list += new_bbox_list
+    #     new_bbox_list.sort(key=lambda x: (x[0][0]))
+    #     first_bbox = new_bbox_list[0]
+
+    # 根据第一个bbox,得到第一行
+    first_row = []
+    bbox_list.sort(key=lambda x: (x[0][1], x[0][0]))
+    for bbox in bbox_list:
+        # h有交集
+        if first_bbox[0][1] <= bbox[0][1] <= first_bbox[2][1] \
+                or first_bbox[0][1] <= bbox[2][1] <= first_bbox[2][1] \
+                or bbox[0][1] <= first_bbox[0][1] <= bbox[2][1] \
+                or bbox[0][1] <= first_bbox[2][1] <= bbox[2][1]:
+            first_row.append(bbox)
+        # h小于first_box
+        elif bbox[2][1] <= first_bbox[0][1]:
+            first_row.append(bbox)
+
+    # 对第一行分列
+    first_row.sort(key=lambda x: (x[0][0], x[0][1]))
+    first_row_col = []
+    used_bbox = []
+    for bbox in first_row:
+        if bbox in used_bbox:
+            continue
+        temp_col = []
+        for bbox1 in first_row:
+            if bbox1 in used_bbox:
+                continue
+            if bbox1[0][0] <= bbox[0][0] <= bbox1[2][0] \
+                    or bbox1[0][0] <= bbox[2][0] <= bbox1[2][0] \
+                    or bbox[0][0] <= bbox1[0][0] <= bbox[2][0] \
+                    or bbox[0][0] <= bbox1[2][0] <= bbox[2][0]:
+                temp_col.append(bbox1)
+                used_bbox.append(bbox1)
+        first_row_col.append(temp_col)
+
+    # 根据第一个bbox,得到第一列
+    first_col = []
+    bbox_list.sort(key=lambda x: (x[0][0], x[0][1]))
+    for bbox in bbox_list:
+        # w有交集
+        if first_bbox[0][0] <= bbox[0][0] <= first_bbox[2][0] \
+                or first_bbox[0][0] <= bbox[2][0] <= first_bbox[2][0] \
+                or bbox[0][0] <= first_bbox[0][0] <= bbox[2][0] \
+                or bbox[0][0] <= first_bbox[2][0] <= bbox[2][0]:
+            first_col.append(bbox)
+        # w小于first_box
+        elif bbox[2][0] <= first_bbox[0][0]:
+            first_col.append(bbox)
+
+    # 对第一列分行
+    first_col.sort(key=lambda x: (x[0][1], x[0][0]))
+    first_col_row = []
+    current_bbox = first_col[0]
+    temp_row = []
+    for bbox in first_col:
+        if current_bbox[0][1] <= bbox[0][1] <= current_bbox[2][1] \
+                or current_bbox[0][1] <= bbox[2][1] <= current_bbox[2][1] \
+                or bbox[0][1] <= current_bbox[0][1] <= bbox[2][1] \
+                or bbox[0][1] <= current_bbox[2][1] <= bbox[2][1]:
+            temp_row.append(bbox)
+        else:
+            if temp_row:
+                temp_row.sort(key=lambda x: x[0][1])
+                first_col_row.append(temp_row)
+            temp_row = [bbox]
+            current_bbox = bbox
+    if temp_row:
+        temp_row.sort(key=lambda x: x[0][1])
+        first_col_row.append(temp_row)
+
+    # print('len(first_row)', len(first_row))
+    # print('first_row', [bbox_text_dict.get(str(x)) for x in first_row])
+    # print('first_col', [bbox_text_dict.get(str(x)) for x in first_col])
+    # print('len(first_col)', len(first_col))
+    # print('len(first_row_col)', len(first_row_col))
+    # print('len(first_col_row)', len(first_col_row))
+
+    # 划线 列
+    col_line_list = []
+    for col in first_row_col:
+        # 画2条线,根据左右bbox
+        min_w, max_w = 1000000, 0
+        # print('col', [bbox_text_dict.get(str(x)) for x in col])
+        for bbox in col:
+            if bbox[0][0] < min_w:
+                min_w = bbox[0][0]
+            if bbox[2][0] > max_w:
+                max_w = bbox[2][0]
+        col_line_list.append([min_w, table_location[1], min_w, table_location[3]])
+        col_line_list.append([max_w, table_location[1], max_w, table_location[3]])
+
+    # 划线 行
+    row_line_list = []
+    last_max_h = None
+    for row in first_col_row:
+        # 画3条线,根据上下bbox
+        min_h, max_h = 1000000, 0
+        for bbox in row:
+            if bbox[0][1] < min_h:
+                min_h = bbox[0][1]
+            if bbox[2][1] > max_h:
+                max_h = bbox[2][1]
+        row_line_list.append([table_location[0], min_h, table_location[2], min_h])
+        row_line_list.append([table_location[0], max_h, table_location[2], max_h])
+        # if last_max_h:
+        #     row_line_list.append([table_location[0], int((min_h+last_max_h)/2), table_location[2], int((min_h+last_max_h)/2)])
+        last_max_h = max_h
+
+    # print('len(col_line_list)', len(col_line_list))
+    # print('col_line_list', col_line_list)
+    # print('len(row_line_list)', len(row_line_list))
+
+    # 判断列线有没有压在黑色像素上,若有则移动
+    temp_list = []
+    for i in range(1, len(col_line_list), 2):
+        # 前一列右边线
+        line1 = col_line_list[i]
+        line1 = [int(x) for x in line1]
+        # 后一列左边线
+        if i+1 >= len(col_line_list):
+            break
+        line2 = col_line_list[i+1]
+        line2 = [int(x) for x in line2]
+
+        max_black_cnt = 10
+        black_threshold = 150
+        black_cnt1 = count_black(img[line1[1]:line1[3], line1[0]:line1[2]+1, :], threshold=black_threshold)
+        black_cnt2 = count_black(img[line2[1]:line2[3], line2[0]:line2[2]+1, :], threshold=black_threshold)
+        # print('col black_cnt1', i, black_cnt1)
+        # print('col black_cnt2', i, black_cnt2)
+        if False and black_cnt2 <= max_black_cnt and black_cnt1 <= max_black_cnt:
+            if black_cnt1 >= black_cnt2:
+                temp_list.append(line2)
+            else:
+                temp_list.append(line1)
+        elif False and black_cnt2 <= max_black_cnt:
+            temp_list.append(line2)
+        elif False and black_cnt1 <= max_black_cnt:
+            temp_list.append(line1)
+        # 两条线都不符合
+        else:
+            # 先找出最近的bbox,不能跨bbox
+            min_distance = 100000
+            min_dis_bbox = bbox_list[0]
+            for bbox in bbox_list:
+                if bbox[2][0] < line2[0]:
+                    _dis = line2[0] - bbox[2][0]
+                    if _dis < min_distance:
+                        min_distance = _dis
+                        min_dis_bbox = bbox
+
+            # 从右向左移寻找
+            right_left_index_list = []
+            right_left_cnt_list = []
+            find_flag = False
+            for j in range(line2[0], int(min_dis_bbox[2][0]), -1):
+                # 需连续3个像素列满足要求
+                if len(right_left_index_list) == 3:
+                    find_flag = True
+                    break
+                black_cnt = count_black(img[line1[1]:line1[3], j:j+1, :], threshold=black_threshold)
+                # print('col black_cnt', black_cnt)
+                right_left_cnt_list.append(black_cnt)
+                # 直接找到无黑色像素的
+                if black_cnt == 0:
+                    right_left_index_list.append(j)
+                else:
+                    right_left_index_list = []
+            if find_flag:
+                temp_list.append([right_left_index_list[1], line2[1], right_left_index_list[1], line2[3]])
+            else:
+                # 为0的找不到,就找最小的
+                # 每个位置加上前后n位求平均
+                n = 1
+                min_cnt = 1000000.
+                min_cnt_index = 0
+                for j, cnt in enumerate(right_left_cnt_list):
+                    if j < n or j > len(right_left_cnt_list) - 1 - n:
+                        continue
+                    # 小到一定程度提前结束
+                    if min_cnt <= 0.001:
+                        break
+                    last_cnt = right_left_cnt_list[j-1]
+                    next_cnt = right_left_cnt_list[j+1]
+                    avg_cnt = (last_cnt + cnt + next_cnt) / 3
+                    if avg_cnt < min_cnt:
+                        min_cnt = avg_cnt
+                        min_cnt_index = j
+
+                min_cnt_index = line2[0] - min_cnt_index
+                temp_list.append([min_cnt_index, line2[1], min_cnt_index, line2[3]])
+    col_line_list = temp_list
+
+    # 根据列的划线对bbox分列
+    last_line = [0, 0, 0, 0]
+    col_bbox_list = []
+    for line in col_line_list + [[img.shape[0], 0, img.shape[0], 0]]:
+        col = []
+        for bbox in bbox_list:
+            iou = line_iou([[last_line[0], 0], [line[0], 0]], [[bbox[0][0], 0], [bbox[2][0], 0]], axis=0)
+            if iou >= 0.6:
+                col.append(bbox)
+        col.sort(key=lambda x: x[0][1])
+        col_bbox_list.append(col)
+        last_line = line
+
+    # 判断行线
+    temp_list = []
+    for i in range(1, len(row_line_list), 2):
+        # 前一行下边线
+        line1 = row_line_list[i]
+        line1 = [int(x) for x in line1]
+        # 后一行上边线
+        if i+1 >= len(row_line_list):
+            break
+        line2 = row_line_list[i+1]
+        line2 = [int(x) for x in line2]
+
+        # 判断行线之间的bbox分别属于哪一行
+        sub_bbox_list = []
+        threshold = 5
+        for bbox in bbox_list:
+            if line1[1] - threshold <= bbox[0][1] <= bbox[2][1] <= line2[1]+threshold:
+                sub_bbox_list.append(bbox)
+
+        # 根据行的h和分列判断bbox属于上一行还是下一行
+        line1_bbox_list = []
+        line2_bbox_list = []
+        if sub_bbox_list:
+            sub_bbox_list.sort(key=lambda x: x[0][1])
+            min_h = sub_bbox_list[0][0][1] - 1
+            max_h = sub_bbox_list[-1][2][1] + 1
+        for bbox in sub_bbox_list:
+            # 找到属于哪一列
+            current_col = None
+            for col in col_bbox_list:
+                if bbox in col:
+                    current_col = copy.deepcopy(col)
+                    break
+            if current_col:
+                # 行做成bbox加入列作为基准
+                line1_bbox = [[0, min_h], [], [0, min_h], []]
+                line2_bbox = [[0, max_h], [], [0, max_h], []]
+                current_col += [line1_bbox, line2_bbox]
+                current_col.sort(key=lambda x: x[0][1])
+                bbox_index = current_col.index(bbox)
+                line1_bbox_index = current_col.index(line1_bbox)
+                line2_bbox_index = current_col.index(line2_bbox)
+                # print('current_col', [bbox_text_dict.get(str(x)) for x in current_col])
+                # print('line1_bbox_index, bbox_index, line2_bbox_index', line1_bbox_index, bbox_index, line2_bbox_index)
+                # 计算距离
+                distance1 = 10000
+                for index in range(line1_bbox_index, bbox_index):
+                    h1 = (current_col[index][0][1] + current_col[index][2][1]) / 2
+                    h2 = (current_col[index+1][0][1] + current_col[index+1][2][1]) / 2
+                    # print(bbox_text_dict.get())
+                    distance1 = abs(h1 - h2)
+                distance2 = 10000
+                for index in range(line2_bbox_index, bbox_index, -1):
+                    h1 = (current_col[index][0][1] + current_col[index][2][1]) / 2
+                    h2 = (current_col[index-1][0][1] + current_col[index-1][2][1]) / 2
+                    distance2 = abs(h1 - h2)
+
+                # print(bbox_text_dict.get(str(bbox)), distance1, distance2)
+                ratio = 1.5
+                # 属于下一行
+                if distance1 >= distance2 * ratio or distance1 >= distance2 + 8:
+                    line2_bbox_list.append(bbox)
+                # 属于上一行
+                elif distance2 >= distance1 * ratio or distance2 >= distance1 + 8:
+                    line1_bbox_list.append(bbox)
+                else:
+                    print('距离不明确,需要nsp模型介入判断')
+
+        if line1_bbox_list:
+            # print('line1_bbox_list', [bbox_text_dict.get(str(x)) for x in line1_bbox_list])
+            line1_bbox_list.sort(key=lambda x: x[0][1])
+            b = line1_bbox_list[-1]
+            line1 = [line1[0], b[2][1], line1[2], b[2][1]]
+        if line2_bbox_list:
+            # print('line2_bbox_list', [bbox_text_dict.get(str(x)) for x in line2_bbox_list])
+            line2_bbox_list.sort(key=lambda x: x[0][1])
+            b = line2_bbox_list[0]
+            line2 = [line2[0], b[0][1], line2[2], b[0][1]]
+
+        _line = [line1[0], (line1[1]+line2[1])/2, line1[2], (line1[3]+line2[3])/2]
+        _line = [int(x) for x in _line]
+        temp_list.append(_line)
+    row_line_list = temp_list
+
+    # 加上表格轮廓线
+    threshold = 5
+    min_w = max(table_location[0], 0+threshold)
+    max_w = min(table_location[2], img.shape[1]-threshold)
+    min_h = max(table_location[1], 0+threshold)
+    max_h = min(table_location[3], img.shape[0]-threshold)
+    row_line_list.append([min_w, min_h, max_w, min_h])
+    row_line_list.append([min_w, max_h, max_w, max_h])
+    col_line_list.append([min_w, min_h, min_w, max_h])
+    col_line_list.append([max_w, min_h, max_w, max_h])
+
+    # 由线得到按行列排列的bbox
+    row_line_list = [[int(x[0]), int(x[1]), int(x[2]), int(x[3])] for x in row_line_list]
+    col_line_list = [[int(x[0]), int(x[1]), int(x[2]), int(x[3])] for x in col_line_list]
+    table_bbox_list, table_cell_list = get_table_bbox_list(img, [row_line_list], [col_line_list], [table_location], bbox_list)
+
+    # 线合并
+    line_list = row_line_list + col_line_list
+
+    # show
+    if is_test:
+        for r in table_cell_list:
+            for c in r:
+                cv2.rectangle(img, c[0], c[1], (0, 255, 0), 1)
+        cv2.imshow('table_cell', img)
+
+        for line in col_line_list:
+            cv2.line(img, (int(line[0]), int(line[1])), (int(line[2]), int(line[3])), (0, 0, 255), 2)
+        for line in row_line_list:
+            cv2.line(img, (int(line[0]), int(line[1])), (int(line[2]), int(line[3])), (255, 0, 0), 2)
+        cv2.namedWindow('img', cv2.WINDOW_NORMAL)
+        cv2.imshow('img', cv2.resize(img, (768, 1024)))
+        cv2.waitKey(0)
+
+    return line_list, table_cell_list, table_location

+ 226 - 0
botr/rules/table_utils.py

@@ -0,0 +1,226 @@
+import numpy as np
+import cv2
+
+
+def shrink_bbox(img, bbox_list):
+    def return_first_black_index(image_np):
+        lower = np.array([0, 0, 0])
+        upper = np.array([150, 150, 150])
+        mask = cv2.inRange(image_np, lower, upper)
+        black_index_list = np.where(mask != 0)
+        return black_index_list
+    new_bbox_list = []
+    for bbox in bbox_list:
+        img_bbox = img[int(bbox[0][1]):int(bbox[2][1]), int(bbox[0][0]):int(bbox[2][0]), :]
+
+        if 0 in img_bbox.shape:
+            new_bbox_list.append(bbox)
+            continue
+
+        # 左右上下开始扫描,碰到黑像素即停
+        index_list = return_first_black_index(img_bbox[:, :, :])
+        if index_list[0].size == 0 or index_list[1].size == 0:
+            new_bbox_list.append(bbox)
+            continue
+        min_h = index_list[0][0]
+        max_h = index_list[0][-1]
+
+        img_bbox1 = np.swapaxes(img_bbox, 0, 1)
+        index_list = return_first_black_index(img_bbox1[:, :, :])
+        if index_list[0].size == 0 or index_list[1].size == 0:
+            new_bbox_list.append(bbox)
+            continue
+        min_w = index_list[0][0]
+        max_w = index_list[0][-1]
+
+        real_min_w = bbox[0][0] + min_w
+        real_max_w = bbox[0][0] + max_w
+        real_min_h = bbox[0][1] + min_h
+        real_max_h = bbox[0][1] + max_h
+        new_bbox = [[real_min_w, real_min_h], [real_min_w, real_max_h], [real_max_w, real_max_h], [real_max_w, real_min_h]]
+        new_bbox_list.append(new_bbox)
+
+        # cv2.imshow('img', img_bbox)
+        # cv2.imshow('shrink', img[int(new_bbox[0][1]):int(new_bbox[2][1]), int(new_bbox[0][0]):int(new_bbox[2][0]), :])
+        # cv2.waitKey(0)
+    return new_bbox_list
+
+
+def split_bbox(img, bbox, bbox_text_dict):
+    text = bbox_text_dict.get(str(bbox))
+
+    sub_img = img[int(bbox[0][1]):int(bbox[2][1]), int(bbox[0][0]):int(bbox[2][0]), :]
+    split_line_list = []
+    last_i_status = 1
+    # 从左到右遍历img
+    for i in range(1, sub_img.shape[1]):
+        # 若这一列黑色像素超过一定值
+        if np.where(sub_img[:, i, :] < 200)[0].size > sub_img.shape[0]/5:
+            i_status = 0
+        else:
+            i_status = 1
+        # 异或,上个像素列为黑且这个像素列为白,或上个像素列为白且这个像素列为黑
+        if last_i_status ^ i_status:
+            split_line_list.append(int(i))
+            last_i_status = i_status
+
+    # 两条分割线太近的去重
+    min_len = 5
+    last_l = split_line_list[0]
+    temp_list = [split_line_list[0]]
+    for l in split_line_list[1:]:
+        if l - last_l > min_len:
+            temp_list.append(l)
+        last_l = l
+    split_line_list = temp_list
+
+    # 若两个分割线间无黑像素,则是应该分割的
+    split_pair_list = []
+    last_line = split_line_list[0]
+    for line in split_line_list[1:]:
+        print('last_line, line', last_line, line, np.where(sub_img[:, last_line:line, :] < 100)[0].size)
+        if line - last_line >= 10 and np.where(sub_img[:, last_line:line, :] < 100)[0].size < 10:
+            split_pair_list.append([last_line, line])
+        last_line = line
+
+    print('split_pair_list', split_pair_list)
+
+    for l in split_line_list:
+        l = int(l + bbox[0][0])
+        cv2.line(img, (l, int(bbox[0][1])), (l, int(bbox[2][1])), (0, 255, 0), 2)
+    cv2.rectangle(img, (int(bbox[0][0]), int(bbox[0][1])), (int(bbox[2][0]), int(bbox[2][1])),
+                  (0, 0, 255), 1)
+    cv2.imshow('img', img)
+    cv2.waitKey(0)
+
+    # 分割得到新bbox
+    split_bbox_list = []
+    if split_pair_list:
+        start_line = 0
+        for line1, line2 in split_pair_list:
+            w1 = start_line + bbox[0][0]
+            w2 = line1 + bbox[0][0]
+            start_line = line2
+            split_bbox_list.append([[w1, bbox[0][1]], [], [w2, bbox[2][1]], []])
+        w1 = start_line + bbox[0][0]
+        w2 = bbox[2][0]
+        split_bbox_list.append([[w1, bbox[0][1]], [], [w2, bbox[2][1]], []])
+
+    print('split_bbox_list', split_bbox_list)
+
+    # 计算每个字长度
+    all_len = 0
+    bbox_len_list = []
+    for bbox in split_bbox_list:
+        _len = abs(bbox[2][0] - bbox[0][0])
+        all_len += _len
+        bbox_len_list.append(_len)
+    single_char_len = all_len / len(text)
+
+    # 根据bbox长度和单字长度比例计算得到截取后的text
+    split_text_list = []
+    text_start = 0
+    for _len in bbox_len_list:
+        text_num = int(_len / single_char_len + 0.5)
+        text_end = text_start+text_num
+        if text_end >= len(text):
+            text_end = len(text)
+        split_text_list.append(text[text_start:text_end])
+        text_start = text_end
+    print('split_text_list', split_text_list)
+
+    # 更新bbox_text_dict
+    for i, bbox in enumerate(split_bbox_list):
+        bbox_text_dict[str(bbox)] = split_text_list[i]
+
+    return split_bbox_list, bbox_text_dict
+
+
+def count_black(image_np, threshold=150):
+    lower = np.array([0, 0, 0])
+    upper = np.array([threshold, threshold, threshold])
+    mask = cv2.inRange(image_np, lower, upper)
+    cnt = np.sum(mask != 0)
+    # print("count color ", cnt)
+    return cnt
+
+
+def get_points_by_line(img, row_lines, col_lines):
+    row_img = np.zeros_like(img[:, :, 0], dtype=np.uint8)
+    col_img = np.zeros_like(img[:, :, 0], dtype=np.uint8)
+    for r in row_lines:
+        cv2.line(row_img, [r[0], r[1]], [r[2], r[3]], (255, 255, 255), 1)
+    for c in col_lines:
+        cv2.line(col_img, [c[0], c[1]], [c[2], c[3]], (255, 255, 255), 1)
+
+    point_img = np.bitwise_and(row_img, col_img)
+
+    # 识别黑白图中的白色交叉点,将横纵坐标取出
+    ys, xs = np.where(point_img > 0)
+    points = []
+    for i in range(len(xs)):
+        points.append((xs[i], ys[i]))
+    points.sort(key=lambda x: (x[0], x[1]))
+    return points
+
+
+def get_table_bbox_list(img, area_row_lines, area_col_lines, table_location_list, bbox_list):
+    area_table_bbox_list = []
+    area_table_cell_list = []
+    for i in range(len(table_location_list)):
+        row_lines = area_row_lines[i]
+        col_lines = area_col_lines[i]
+
+        # 求线交点
+        cross_points = get_points_by_line(img, row_lines, col_lines)
+
+        # for p in cross_points:
+        #     cv2.circle(img, p, 2, (0, 0, 255), 2)
+        # cv2.imshow('cross_points', img)
+
+        # 交点分行
+        cross_points.sort(key=lambda x: (x[1], x[0]))
+        row_point_list = []
+        if not cross_points:
+            area_table_bbox_list.append([])
+            area_table_cell_list.append([])
+            continue
+        current_row = [cross_points[0]]
+        for p in cross_points[1:]:
+            if current_row[0][1] == p[1]:
+                current_row.append(p)
+            else:
+                row_point_list.append(current_row)
+                current_row = [p]
+        if current_row:
+            row_point_list.append(current_row)
+
+        # bbox以表格格式排列
+        used_bbox_list = []
+        row_list = []
+        row_cell_list = []
+        for j in range(1, len(row_point_list)):
+            last_row = row_point_list[j-1]
+            row = row_point_list[j]
+            col_list = []
+            col_cell_list = []
+            for k in range(1, len(row)):
+                last_p = last_row[k-1]
+                p = row[k]
+                cell = []
+                for bbox in bbox_list:
+                    if bbox in used_bbox_list:
+                        continue
+                    bbox_h_center = (bbox[0][1]+bbox[2][1]) / 2
+                    bbox_w_center = (bbox[0][0]+bbox[2][0]) / 2
+                    if last_p[1] <= bbox_h_center <= p[1] and last_p[0] <= bbox_w_center <= p[0]:
+                        cell.append(bbox)
+                        used_bbox_list.append(bbox)
+                col_list.append(cell)
+                col_cell_list.append([last_p, p])
+            row_list.append(col_list)
+            row_cell_list.append(col_cell_list)
+
+        area_table_bbox_list.append(row_list)
+        area_table_cell_list.append(row_cell_list)
+    return area_table_bbox_list[0], area_table_cell_list[0]

+ 117 - 0
botr/utils.py

@@ -0,0 +1,117 @@
+import json
+import socket
+import traceback
+
+import cv2
+import requests
+from PIL import Image
+import numpy as np
+
+
+def request_post(url, param, time_out=1000, use_zlib=False):
+    fails = 0
+    text = json.dumps([-2])
+    session_ocr = requests.Session()
+    while True:
+        try:
+            if fails >= 1:
+                break
+
+            result = session_ocr.post(url, data=param, timeout=time_out)
+
+            if result.status_code == 200:
+                text = result.text
+                break
+            else:
+                print('result.status_code', result.status_code)
+                print('result.text', result.text)
+                fails += 1
+                continue
+        except socket.timeout:
+            fails += 1
+            print('timeout! fail times:', fails)
+        except:
+            fails += 1
+            print('fail! fail times:', fails)
+            traceback.print_exc()
+    return text
+
+
+def line_iou(line1, line2, axis=0):
+    inter = min(line1[1][axis], line2[1][axis]) - max(line1[0][axis], line2[0][axis])
+    # union = max(line1[1][axis], line2[1][axis]) - min(line1[0][axis], line2[0][axis])
+    union = min(abs(line1[0][axis]-line1[1][axis]), abs(line2[0][axis]-line2[1][axis]))
+    if union in [0, 0.]:
+        iou = 0.
+    else:
+        iou = inter / union
+    return iou
+
+
+def pil_resize(image_np, height, width):
+    image_pil = Image.fromarray(cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB))
+    image_pil = image_pil.resize((int(width), int(height)), Image.BICUBIC)
+    image_np = cv2.cvtColor(np.asarray(image_pil), cv2.COLOR_RGB2BGR)
+    return image_np
+
+
+def get_best_predict_size2(image_np, threshold=3000):
+    h, w = image_np.shape[:2]
+    scale = threshold / max(h, w)
+    h = int(h * scale)
+    w = int(w * scale)
+    return h, w
+
+
+def line_overlap(a1, a2, b1, b2):
+    start = max(a1, b1)
+    end = min(a2, b2)
+    d = end - start
+    if d < 0:
+        return 0
+    else:
+        return d
+
+
+def get_table_iou(x1_min, y1_min, x1_max, y1_max, x2_min, y2_min, x2_max, y2_max):
+    # 计算矩形框1的宽度、高度和面积
+    width1 = x1_max - x1_min
+    height1 = y1_max - y1_min
+    area1 = width1 * height1
+
+    # 计算矩形框2的宽度、高度和面积
+    width2 = x2_max - x2_min
+    height2 = y2_max - y2_min
+    area2 = width2 * height2
+
+    # 计算相交矩形框的左上角和右下角坐标
+    x_intersection_min = max(x1_min, x2_min)
+    y_intersection_min = max(y1_min, y2_min)
+    x_intersection_max = min(x1_max, x2_max)
+    y_intersection_max = min(y1_max, y2_max)
+
+    # 计算相交矩形框的宽度和高度
+    intersection_width = max(0, x_intersection_max - x_intersection_min)
+    intersection_height = max(0, y_intersection_max - y_intersection_min)
+
+    # 计算相交矩形框的面积
+    intersection_area = intersection_width * intersection_height
+
+    # 判断包含关系并调整相交面积
+    if (x1_min <= x2_min) and (y1_min <= y2_min) and (x1_max >= x2_max) and (y1_max >= y2_max):
+        union_area = area2
+    elif (x2_min <= x1_min) and (y2_min <= y1_min) and (x2_max >= x1_max) and (y2_max >= y1_max):
+        union_area = area1
+    else:
+        # 计算并集矩形框的面积
+        # union_area = area1 + area2 - intersection_area
+        union_area = min(area1, area2)
+
+    # 计算IoU
+    iou = intersection_area / union_area
+
+    return iou
+
+
+if __name__ == '__main__':
+    print(get_table_iou(1, 1, 4, 4, 0, 0, 3, 3))

+ 237 - 0
botr/yolov8/model.py

@@ -0,0 +1,237 @@
+import logging
+import os
+from copy import deepcopy
+import torch
+import numpy as np
+import cv2
+import torch.nn as nn
+from format_convert.utils import log
+from botr.yolov8.module import Conv, Conv2, RepConv, ConvTranspose, DWConv, Detect, parse_model, fuse_conv_and_bn, \
+    fuse_deconv_and_bn
+from botr.yolov8.yolo_utils import yaml_load, initialize_weights, smart_inference_mode, \
+    attempt_load_one_weight, non_max_suppression, scale_boxes, LetterBox, LoadPilAndNumpy
+
+cfg_path = os.path.abspath(os.path.dirname(__file__)) + '/yolov8_model.yaml'
+
+
+class DetectionModel(nn.Module):
+    """YOLOv8 detection model."""
+
+    def __init__(self, cfg=cfg_path, ch=3):
+        super().__init__()
+        self.yaml = yaml_load(cfg)  # cfg dict
+
+        # Define model
+        self.model, self.save = parse_model(deepcopy(self.yaml), ch=ch)  # model, savelist
+        self.names = {i: f'{i}' for i in range(self.yaml['nc'])}  # default names dict
+        self.inplace = True
+
+        # Build strides
+        m = self.model[-1]  # Detect()
+        if isinstance(m, Detect):
+            s = 256  # 2x min stride
+            m.inplace = self.inplace
+            forward = lambda x: self.forward(x)
+            m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))])  # forward
+            self.stride = m.stride
+            m.bias_init()  # only run once
+
+        # Init weights, biases
+        initialize_weights(self)
+
+    def is_fused(self, thresh=10):
+        """
+        Check if the model has less than a certain threshold of BatchNorm layers.
+
+        Args:
+            thresh (int, optional): The threshold number of BatchNorm layers. Default is 10.
+
+        Returns:
+            (bool): True if the number of BatchNorm layers in the model is less than the threshold, False otherwise.
+        """
+        bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k)  # normalization layers, i.e. BatchNorm2d()
+        return sum(isinstance(v, bn) for v in self.modules()) < thresh  # True if < 'thresh' BatchNorm layers in model
+
+    def fuse(self):
+        """
+        Fuse the `Conv2d()` and `BatchNorm2d()` layers of the model into a single layer, in order to improve the
+        computation efficiency.
+
+        Returns:
+            (nn.Module): The fused model is returned.
+        """
+        if not self.is_fused():
+            for m in self.model.modules():
+                if isinstance(m, (Conv, Conv2, DWConv)) and hasattr(m, 'bn'):
+                    if isinstance(m, Conv2):
+                        m.fuse_convs()
+                    m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
+                    delattr(m, 'bn')  # remove batchnorm
+                    m.forward = m.forward_fuse  # update forward
+                if isinstance(m, ConvTranspose) and hasattr(m, 'bn'):
+                    m.conv_transpose = fuse_deconv_and_bn(m.conv_transpose, m.bn)
+                    delattr(m, 'bn')  # remove batchnorm
+                    m.forward = m.forward_fuse  # update forward
+                if isinstance(m, RepConv):
+                    m.fuse_convs()
+                    m.forward = m.forward_fuse  # update forward
+        return self
+
+    def _forward_once(self, x):
+        """
+        Perform a forward pass through the network.
+
+        Args:
+            x (torch.Tensor): The input tensor to the model
+        Returns:
+            (torch.Tensor): The last output of the model.
+        """
+        y, dt = [], []  # outputs
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+            x = m(x)  # run
+            y.append(x if m.i in self.save else None)  # save output
+        return x
+
+    def forward(self, x):
+        """Run forward pass on input image(s) with optional augmentation and profiling."""
+        return self._forward_once(x)  # single-scale inference, train
+
+
+class Predictor:
+    """
+    Predictor
+
+    A class for creating predictors.
+    """
+
+    def __init__(self, image_size, device, model):
+        """
+        Initializes the BasePredictor class.
+
+        Args:
+            cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
+            overrides (dict, optional): Configuration overrides. Defaults to None.
+        """
+        self.iou = 0.7
+        self.agnostic_nms = False
+        self.max_det = 300
+        self.filter_classes = None
+        self.confidence = 0.25  # default conf=0.25
+
+        # Usable if setup is done
+        self.model = None
+        self.imgsz = image_size
+        self.device = device
+        self.dataset = None
+        self.stride = 32
+
+        # 读取模型
+        self.setup_model(model)
+        log('setup model: yolo v8 once!')
+
+    def preprocess(self, im):
+        """Prepares input image before inference.
+
+        Args:
+            im (torch.Tensor | List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list.
+        """
+        im = np.stack(self.pre_transform(im))
+        im = im[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
+        im = np.ascontiguousarray(im)  # contiguous
+        im = torch.from_numpy(im)
+
+        # NOTE: assuming im with (b, 3, h, w) if it's a tensor
+        img = im.to(self.device)
+        img = img.float()  # uint8 to fp16/32
+        img /= 255  # 0 - 255 to 0.0 - 1.0
+        return img
+
+    def pre_transform(self, im):
+        """Pre-tranform input image before inference.
+
+        Args:
+            im (List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list.
+
+        Return: A list of transformed imgs.
+        """
+        same_shapes = all(x.shape == im[0].shape for x in im)
+        auto = same_shapes
+        return [LetterBox(self.imgsz, auto=auto, stride=self.stride)(image=x) for x in im]
+
+    def postprocess(self, preds, img, orig_imgs):
+        """Postprocesses predictions and returns a list of Results objects."""
+        preds = non_max_suppression(preds,
+                                    self.confidence,
+                                    self.iou,
+                                    agnostic=self.agnostic_nms,
+                                    max_det=self.max_det,
+                                    classes=self.filter_classes)
+
+        results = []
+        for i, pred in enumerate(preds):
+            orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
+            if not isinstance(orig_imgs, torch.Tensor):
+                pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
+            results.append(pred)
+        return results
+
+    def setup_source(self, source):
+        """Sets up source and inference mode."""
+        self.dataset = LoadPilAndNumpy(source, imgsz=self.imgsz)
+
+    def setup_model(self, model):
+        """Initialize YOLO model with given parameters and set it to evaluation mode."""
+        self.model = attempt_load_one_weight(model,
+                                             device=self.device,
+                                             inplace=True)[0]
+        self.model.float().eval()
+
+    @smart_inference_mode()
+    def stream_inference(self, source=None):
+        """Streams real-time inference on camera feed and saves results to file."""
+        # Setup model
+        # if not self.model:
+        #     self.setup_model(model)
+
+        # Setup source every time predict is called
+        self.setup_source(source)
+
+        results = []
+        for batch in self.dataset:
+            path, im0s, vid_cap, s = batch
+
+            # Preprocess
+            im = self.preprocess(im0s)
+
+            # Inference
+            preds = self.model(im)
+
+            # Postprocess
+            result = self.postprocess(preds, im, im0s)
+            results.append(result[0].tolist())
+            print('stream_inference self.results', result[0].tolist())
+
+        return results
+
+    def predict(self, source=None, show=False):
+        """Method used for CLI prediction. It uses always generator as outputs as not required by CLI mode."""
+        # source = cv2.imread(source)
+        results = self.stream_inference(source)
+        if show:
+            self.show(source, results[0])
+        return results
+
+    def show(self, source, result):
+        for r in result:
+            bbox = r[:4]
+            bbox = [int(x) for x in bbox]
+            confidence = r[4]
+            cv2.rectangle(source, bbox[:2], bbox[2:4], color=(0, 0, 255), thickness=1)
+            cv2.putText(source, str(round(confidence, 2)), (bbox[0], bbox[1]),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
+        cv2.imshow('result', source)
+        cv2.waitKey(0)
+
+

+ 436 - 0
botr/yolov8/module.py

@@ -0,0 +1,436 @@
+import contextlib
+import logging
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+
+from botr.yolov8.yolo_utils import make_anchors, dist2bbox, make_divisible
+
+
+def autopad(k, p=None, d=1):  # kernel, padding, dilation
+    """Pad to 'same' shape outputs."""
+    if d > 1:
+        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
+    return p
+
+
+class Conv(nn.Module):
+    """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
+    default_act = nn.SiLU()  # default activation
+
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
+        """Initialize Conv layer with given arguments including activation."""
+        super().__init__()
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
+
+    def forward(self, x):
+        """Apply convolution, batch normalization and activation to input tensor."""
+        return self.act(self.bn(self.conv(x)))
+
+    def forward_fuse(self, x):
+        """Perform transposed convolution of 2D data."""
+        return self.act(self.conv(x))
+
+
+class Conv2(Conv):
+    """Simplified RepConv module with Conv fusing."""
+
+    def __init__(self, c1, c2, k=3, s=1, p=None, g=1, d=1, act=True):
+        """Initialize Conv layer with given arguments including activation."""
+        super().__init__(c1, c2, k, s, p, g=g, d=d, act=act)
+        self.cv2 = nn.Conv2d(c1, c2, 1, s, autopad(1, p, d), groups=g, dilation=d, bias=False)  # add 1x1 conv
+
+    def forward(self, x):
+        """Apply convolution, batch normalization and activation to input tensor."""
+        return self.act(self.bn(self.conv(x) + self.cv2(x)))
+
+    def fuse_convs(self):
+        """Fuse parallel convolutions."""
+        w = torch.zeros_like(self.conv.weight.data)
+        i = [x // 2 for x in w.shape[2:]]
+        w[:, :, i[0]:i[0] + 1, i[1]:i[1] + 1] = self.cv2.weight.data.clone()
+        self.conv.weight.data += w
+        self.__delattr__('cv2')
+
+
+class DWConv(Conv):
+    """Depth-wise convolution."""
+
+    def __init__(self, c1, c2, k=1, s=1, d=1, act=True):  # ch_in, ch_out, kernel, stride, dilation, activation
+        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
+
+
+class ConvTranspose(nn.Module):
+    """Convolution transpose 2d layer."""
+    default_act = nn.SiLU()  # default activation
+
+    def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
+        """Initialize ConvTranspose2d layer with batch normalization and activation function."""
+        super().__init__()
+        self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn)
+        self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity()
+        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
+
+    def forward(self, x):
+        """Applies transposed convolutions, batch normalization and activation to input."""
+        return self.act(self.bn(self.conv_transpose(x)))
+
+    def forward_fuse(self, x):
+        """Applies activation and convolution transpose operation to input."""
+        return self.act(self.conv_transpose(x))
+
+
+class RepConv(nn.Module):
+    """RepConv is a basic rep-style block, including training and deploy status
+    This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
+    """
+    default_act = nn.SiLU()  # default activation
+
+    def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
+        super().__init__()
+        assert k == 3 and p == 1
+        self.g = g
+        self.c1 = c1
+        self.c2 = c2
+        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
+
+        self.bn = nn.BatchNorm2d(num_features=c1) if bn and c2 == c1 and s == 1 else None
+        self.conv1 = Conv(c1, c2, k, s, p=p, g=g, act=False)
+        self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)
+
+    def forward_fuse(self, x):
+        """Forward process"""
+        return self.act(self.conv(x))
+
+    def forward(self, x):
+        """Forward process"""
+        id_out = 0 if self.bn is None else self.bn(x)
+        return self.act(self.conv1(x) + self.conv2(x) + id_out)
+
+    def get_equivalent_kernel_bias(self):
+        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
+        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
+        kernelid, biasid = self._fuse_bn_tensor(self.bn)
+        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
+
+    def _avg_to_3x3_tensor(self, avgp):
+        channels = self.c1
+        groups = self.g
+        kernel_size = avgp.kernel_size
+        input_dim = channels // groups
+        k = torch.zeros((channels, input_dim, kernel_size, kernel_size))
+        k[np.arange(channels), np.tile(np.arange(input_dim), groups), :, :] = 1.0 / kernel_size ** 2
+        return k
+
+    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+        if kernel1x1 is None:
+            return 0
+        else:
+            return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
+
+    def _fuse_bn_tensor(self, branch):
+        if branch is None:
+            return 0, 0
+        if isinstance(branch, Conv):
+            kernel = branch.conv.weight
+            running_mean = branch.bn.running_mean
+            running_var = branch.bn.running_var
+            gamma = branch.bn.weight
+            beta = branch.bn.bias
+            eps = branch.bn.eps
+        elif isinstance(branch, nn.BatchNorm2d):
+            if not hasattr(self, 'id_tensor'):
+                input_dim = self.c1 // self.g
+                kernel_value = np.zeros((self.c1, input_dim, 3, 3), dtype=np.float32)
+                for i in range(self.c1):
+                    kernel_value[i, i % input_dim, 1, 1] = 1
+                self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
+            kernel = self.id_tensor
+            running_mean = branch.running_mean
+            running_var = branch.running_var
+            gamma = branch.weight
+            beta = branch.bias
+            eps = branch.eps
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape(-1, 1, 1, 1)
+        return kernel * t, beta - running_mean * gamma / std
+
+    def fuse_convs(self):
+        if hasattr(self, 'conv'):
+            return
+        kernel, bias = self.get_equivalent_kernel_bias()
+        self.conv = nn.Conv2d(in_channels=self.conv1.conv.in_channels,
+                              out_channels=self.conv1.conv.out_channels,
+                              kernel_size=self.conv1.conv.kernel_size,
+                              stride=self.conv1.conv.stride,
+                              padding=self.conv1.conv.padding,
+                              dilation=self.conv1.conv.dilation,
+                              groups=self.conv1.conv.groups,
+                              bias=True).requires_grad_(False)
+        self.conv.weight.data = kernel
+        self.conv.bias.data = bias
+        for para in self.parameters():
+            para.detach_()
+        self.__delattr__('conv1')
+        self.__delattr__('conv2')
+        if hasattr(self, 'nm'):
+            self.__delattr__('nm')
+        if hasattr(self, 'bn'):
+            self.__delattr__('bn')
+        if hasattr(self, 'id_tensor'):
+            self.__delattr__('id_tensor')
+
+
+class DFL(nn.Module):
+    """
+    Integral module of Distribution Focal Loss (DFL).
+    Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
+    """
+
+    def __init__(self, c1=16):
+        """Initialize a convolutional layer with a given number of input channels."""
+        super().__init__()
+        self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
+        x = torch.arange(c1, dtype=torch.float)
+        self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
+        self.c1 = c1
+
+    def forward(self, x):
+        """Applies a transformer layer on input tensor 'x' and returns a tensor."""
+        b, c, a = x.shape  # batch, channels, anchors
+        return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
+        # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a)
+
+
+class Concat(nn.Module):
+    """Concatenate a list of tensors along dimension."""
+
+    def __init__(self, dimension=1):
+        """Concatenates a list of tensors along a specified dimension."""
+        super().__init__()
+        self.d = dimension
+
+    def forward(self, x):
+        """Forward pass for the YOLOv8 mask Proto module."""
+        return torch.cat(x, self.d)
+
+
+class SPPF(nn.Module):
+    """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
+
+    def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * 4, c2, 1, 1)
+        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
+
+    def forward(self, x):
+        """Forward pass through Ghost Convolution block."""
+        x = self.cv1(x)
+        y1 = self.m(x)
+        y2 = self.m(y1)
+        return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
+
+
+class Bottleneck(nn.Module):
+    """Standard bottleneck."""
+
+    def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):  # ch_in, ch_out, shortcut, groups, kernels, expand
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, k[0], 1)
+        self.cv2 = Conv(c_, c2, k[1], 1, g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        """'forward()' applies the YOLOv5 FPN to input data."""
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class C2f(nn.Module):
+    """CSP Bottleneck with 2 convolutions."""
+
+    def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super().__init__()
+        self.c = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
+        self.cv2 = Conv((2 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
+        self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
+
+    def forward(self, x):
+        """Forward pass through C2f layer."""
+        y = list(self.cv1(x).chunk(2, 1))
+        y.extend(m(y[-1]) for m in self.m)
+        return self.cv2(torch.cat(y, 1))
+
+    def forward_split(self, x):
+        """Forward pass using split() instead of chunk()."""
+        y = list(self.cv1(x).split((self.c, self.c), 1))
+        y.extend(m(y[-1]) for m in self.m)
+        return self.cv2(torch.cat(y, 1))
+
+
+class Detect(nn.Module):
+    """YOLOv8 Detect head for detection models."""
+    dynamic = False  # force grid reconstruction
+    export = False  # export mode
+    shape = None
+    anchors = torch.empty(0)  # init
+    strides = torch.empty(0)  # init
+
+    def __init__(self, nc=80, ch=()):  # detection layer
+        super().__init__()
+        self.nc = nc  # number of classes
+        self.nl = len(ch)  # number of detection layers
+        self.reg_max = 16  # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
+        self.no = nc + self.reg_max * 4  # number of outputs per anchor
+        self.stride = torch.zeros(self.nl)  # strides computed during build
+        c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc)  # channels
+        self.cv2 = nn.ModuleList(
+            nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
+        self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
+        self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
+
+    def forward(self, x):
+        """Concatenates and returns predicted bounding boxes and class probabilities."""
+        shape = x[0].shape  # BCHW
+        for i in range(self.nl):
+            x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
+        if self.training:
+            return x
+        elif self.dynamic or self.shape != shape:
+            self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
+            self.shape = shape
+
+        x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
+        if self.export and self.format in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'):  # avoid TF FlexSplitV ops
+            box = x_cat[:, :self.reg_max * 4]
+            cls = x_cat[:, self.reg_max * 4:]
+        else:
+            box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
+        dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
+        y = torch.cat((dbox, cls.sigmoid()), 1)
+        return y if self.export else (y, x)
+
+    def bias_init(self):
+        """Initialize Detect() biases, WARNING: requires stride availability."""
+        m = self  # self.model[-1]  # Detect() module
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
+        # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # nominal class frequency
+        for a, b, s in zip(m.cv2, m.cv3, m.stride):  # from
+            a[-1].bias.data[:] = 1.0  # box
+            b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)
+
+
+def fuse_conv_and_bn(conv, bn):
+    """Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/."""
+    fusedconv = nn.Conv2d(conv.in_channels,
+                          conv.out_channels,
+                          kernel_size=conv.kernel_size,
+                          stride=conv.stride,
+                          padding=conv.padding,
+                          dilation=conv.dilation,
+                          groups=conv.groups,
+                          bias=True).requires_grad_(False).to(conv.weight.device)
+
+    # Prepare filters
+    w_conv = conv.weight.clone().view(conv.out_channels, -1)
+    w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
+    fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
+
+    # Prepare spatial bias
+    b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
+    b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
+    fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
+
+    return fusedconv
+
+
+def fuse_deconv_and_bn(deconv, bn):
+    """Fuse ConvTranspose2d() and BatchNorm2d() layers."""
+    fuseddconv = nn.ConvTranspose2d(deconv.in_channels,
+                                    deconv.out_channels,
+                                    kernel_size=deconv.kernel_size,
+                                    stride=deconv.stride,
+                                    padding=deconv.padding,
+                                    output_padding=deconv.output_padding,
+                                    dilation=deconv.dilation,
+                                    groups=deconv.groups,
+                                    bias=True).requires_grad_(False).to(deconv.weight.device)
+
+    # Prepare filters
+    w_deconv = deconv.weight.clone().view(deconv.out_channels, -1)
+    w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
+    fuseddconv.weight.copy_(torch.mm(w_bn, w_deconv).view(fuseddconv.weight.shape))
+
+    # Prepare spatial bias
+    b_conv = torch.zeros(deconv.weight.size(1), device=deconv.weight.device) if deconv.bias is None else deconv.bias
+    b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
+    fuseddconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
+
+    return fuseddconv
+
+
+def parse_model(d, ch):
+    # Parse a YOLO model.yaml dictionary into a PyTorch model
+    import ast
+
+    # Args
+    max_channels = float('inf')
+    nc, act, scales = (d.get(x) for x in ('nc', 'act', 'scales'))
+    depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape'))
+    if scales:
+        scale = d.get('scale')
+        if not scale:
+            scale = tuple(scales.keys())[0]
+            logging.warning(f"WARNING ⚠️ no model scale passed. Assuming scale='{scale}'.")
+        depth, width, max_channels = scales[scale]
+
+    if act:
+        Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
+
+    ch = [ch]
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
+        m = getattr(torch.nn, m[3:]) if 'nn.' in m else globals()[m]  # get module
+        for j, a in enumerate(args):
+            if isinstance(a, str):
+                with contextlib.suppress(ValueError):
+                    args[j] = locals()[a] if a in locals() else ast.literal_eval(a)
+
+        n = n_ = max(round(n * depth), 1) if n > 1 else n  # depth gain
+        if m in (Conv, ConvTranspose, Bottleneck, SPPF, DWConv, C2f, nn.ConvTranspose2d):
+            c1, c2 = ch[f], args[0]
+            if c2 != nc:  # if c2 not equal to number of classes (i.e. for Classify() output)
+                c2 = make_divisible(min(c2, max_channels) * width, 8)
+
+            args = [c1, c2, *args[1:]]
+            if m in (C2f,):
+                args.insert(2, n)  # number of repeats
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum(ch[x] for x in f)
+        elif m in (Detect,):
+            args.append([ch[x] for x in f])
+        else:
+            c2 = ch[f]
+
+        m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace('__main__.', '')  # module type
+        m.np = sum(x.numel() for x in m_.parameters())  # number params
+        m_.i, m_.f, m_.type = i, f, t  # attach index, 'from' index, type
+
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        if i == 0:
+            ch = []
+        ch.append(c2)
+    return nn.Sequential(*layers), sorted(save)

+ 28 - 0
botr/yolov8/predict.py

@@ -0,0 +1,28 @@
+import os
+import sys
+from glob import glob
+import cv2
+import torch
+sys.path.append(os.path.abspath(os.path.dirname(__file__)) + '/../../')
+from botr.yolov8.model import Predictor
+
+
+ROOT = os.path.abspath(os.path.dirname(__file__)) + '/../../'
+model_path = ROOT + 'botr/yolov8/weights.pt'
+image_size = 640
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def detect(image_np, predictor=None, show=False):
+    if predictor is None:
+        predictor = Predictor(image_size, device, model_path)
+    result_list = predictor.predict(image_np, show=show)
+    return result_list
+
+
+if __name__ == '__main__':
+    p = r'C:\Users\Administrator\Desktop\test_b_table\real2.png'
+    paths = glob(r'C:\Users\Administrator\Desktop\test_b_table\*.png')
+    for p in paths:
+        img = cv2.imread(p)
+        detect(img, show=True)

BIN
botr/yolov8/weights.pt


+ 122 - 0
botr/yolov8/yolo_interface.py

@@ -0,0 +1,122 @@
+import base64
+import json
+import os
+import sys
+import traceback
+import torch
+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../")
+from botr.yolov8.model import Predictor
+from botr.yolov8.predict import detect
+from format_convert.max_compute_config import max_compute
+MAX_COMPUTE = max_compute
+import time
+import cv2
+from flask import Flask, request
+from format_convert.utils import request_post, log, get_md5_from_bytes, get_platform, bytes2np
+from format_convert import _global
+
+
+ROOT = os.path.abspath(os.path.dirname(__file__)) + '/../../'
+model_path = ROOT + 'botr/yolov8/weights.pt'
+
+# 接口配置
+app = Flask(__name__)
+
+
+@app.route('/yolo', methods=['POST'])
+def _yolo():
+    _global._init()
+    _global.update({"port": globals().get("port")})
+    start_time = time.time()
+
+    log("into yolo_interface _yolo")
+    try:
+        if not request.form:
+            log("yolo no data!")
+            return json.dumps({"b_table_list": str([-9])})
+
+        yolo_predictor = globals().get("global_yolo_predictor")
+        if yolo_predictor is None:
+            image_size = 640
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            # device = 'cpu'
+            yolo_predictor = Predictor(image_size, device, model_path)
+            globals().update({"global_yolo_predictor": yolo_predictor})
+
+        data = request.form.get("data")
+        _md5 = request.form.get("md5")
+        _global.update({"md5": _md5})
+
+        b_table_list = yolo(data, yolo_predictor).get('b_table_list')
+        return json.dumps({"b_table_list": b_table_list})
+    except TimeoutError:
+        return json.dumps({"b_table_list": str([-5])})
+    except:
+        traceback.print_exc()
+        return json.dumps({"b_table_list": str([-1])})
+    finally:
+        log("yolo interface finish time " + str(time.time()-start_time))
+
+
+def yolo(data, predictor):
+    log("into yolo_interface yolo")
+    try:
+        img_data = base64.b64decode(data)
+        img = bytes2np(img_data)
+        b_table_list = detect(img, predictor)
+        return {"b_table_list": b_table_list}
+    except TimeoutError:
+        raise TimeoutError
+
+
+def test_yolo_model(from_remote=True):
+    _global._init()
+    from format_convert.convert_image import get_best_predict_size, image_process
+    if get_platform() == "Windows":
+        file_path = "C:/Users/Administrator/Desktop/error2.png"
+        file_path = "C:/Users/Administrator/Downloads/1652672734044.jpg"
+    else:
+        file_path = "1.jpg"
+    image_np = cv2.imread(file_path)
+    best_h, best_w = get_best_predict_size(image_np)
+    image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA)
+    cv2.imwrite(file_path, image_resize)
+    with open(file_path, "rb") as f:
+        file_bytes = f.read()
+    file_base64 = base64.b64encode(file_bytes)
+    _md5 = get_md5_from_bytes(file_bytes)[0]
+
+    _global.update({"port": 15010, "md5": _md5})
+
+    if from_remote:
+        file_json = {"data": file_base64, "is_from_pdf": False, "md5": _md5}
+
+        # _url = "http://192.168.2.104:18000/otr"
+        _url = "http://127.0.0.1:18000/otr"
+        r = json.loads(request_post(_url, file_json))
+    else:
+        # otr_model = OtrModels().get_model()
+        # r = otr(file_base64, otr_model, is_from_pdf=False)
+        r = image_process(image_resize, file_path)
+    print(r)
+
+
+if __name__ == '__main__':
+    if len(sys.argv) == 2:
+        port = int(sys.argv[1])
+    elif len(sys.argv) == 3:
+        port = int(sys.argv[1])
+        using_gpu_index = int(sys.argv[2])
+    else:
+        port = 18080
+        using_gpu_index = 0
+    # app.run(host='0.0.0.0', port=port, processes=1, threaded=False, debug=False)
+    app.run(host='0.0.0.0', port=port)
+    log("YOLO running "+str(port))
+
+    # test_yolo_model(False)
+
+    # print(json.dumps([-2]))
+
+    # otr_model = OtrModels().get_model()
+    # otr("11", otr_model)

+ 466 - 0
botr/yolov8/yolo_utils.py

@@ -0,0 +1,466 @@
+import logging
+import math
+import re
+import time
+from pathlib import Path
+from types import SimpleNamespace
+
+import cv2
+import torch
+import torchvision
+import yaml
+import numpy as np
+import torch.nn as nn
+from PIL import Image
+
+
+def yaml_load(file='data.yaml', append_filename=False):
+    """
+    Load YAML data from a file.
+
+    Args:
+        file (str, optional): File name. Default is 'data.yaml'.
+        append_filename (bool): Add the YAML filename to the YAML dictionary. Default is False.
+
+    Returns:
+        dict: YAML data and file name.
+    """
+    with open(file, errors='ignore', encoding='utf-8') as f:
+        s = f.read()  # string
+
+        # Remove special characters
+        if not s.isprintable():
+            s = re.sub(r'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]+', '', s)
+
+        # Add YAML filename to dict and return
+        return {**yaml.safe_load(s), 'yaml_file': str(file)} if append_filename else yaml.safe_load(s)
+
+
+def smart_inference_mode():
+    """Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator."""
+
+    def decorate(fn):
+        torch_version = re.findall('\d+', torch.__version__)
+        if int(torch_version[0]) >= 1 and int(torch_version[1]) >= 9:
+            TORCH_1_9 = True
+        else:
+            TORCH_1_9 = False
+        """Applies appropriate torch decorator for inference mode based on torch version."""
+        return (torch.inference_mode if TORCH_1_9 else torch.no_grad)()(fn)
+
+    return decorate
+
+
+def make_anchors(feats, strides, grid_cell_offset=0.5):
+    """Generate anchors from features."""
+    anchor_points, stride_tensor = [], []
+    assert feats is not None
+    dtype, device = feats[0].dtype, feats[0].device
+    for i, stride in enumerate(strides):
+        _, _, h, w = feats[i].shape
+        sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset  # shift x
+        sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset  # shift y
+        torch_version = re.findall('\d+', torch.__version__)
+        if int(torch_version[0]) >= 1 and int(torch_version[1]) >= 10:
+            TORCH_1_10 = True
+        else:
+            TORCH_1_10 = False
+        sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx)
+        anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2))
+        stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device))
+    return torch.cat(anchor_points), torch.cat(stride_tensor)
+
+
+def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
+    """Transform distance(ltrb) to box(xywh or xyxy)."""
+    lt, rb = distance.chunk(2, dim)
+    x1y1 = anchor_points - lt
+    x2y2 = anchor_points + rb
+    if xywh:
+        c_xy = (x1y1 + x2y2) / 2
+        wh = x2y2 - x1y1
+        return torch.cat((c_xy, wh), dim)  # xywh bbox
+    return torch.cat((x1y1, x2y2), dim)  # xyxy bbox
+
+
+def attempt_load_one_weight(weight, device=None, inplace=True):
+    """Loads a single model weights."""
+    from botr.yolov8.module import Detect
+    from botr.yolov8.model import DetectionModel
+
+    model = DetectionModel()
+    ckpt = model.load_state_dict(torch.load(weight))
+    model.to(device).float()
+    model = model.fuse().eval()  # model in eval mode
+
+    # Module compatibility updates
+    for m in model.modules():
+        t = type(m)
+        if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect):
+            m.inplace = inplace  # torch 1.7.0 compatibility
+        elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
+            m.recompute_scale_factor = None  # torch 1.11.0 compatibility
+
+    # Return model and ckpt
+    return model, ckpt
+
+
+def xywh2xyxy(x):
+    """
+    Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
+    top-left corner and (x2, y2) is the bottom-right corner.
+
+    Args:
+        x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
+    Returns:
+        y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
+    """
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
+    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
+    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
+    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
+    return y
+
+
+def box_iou(box1, box2, eps=1e-7):
+    """
+    Calculate intersection-over-union (IoU) of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Based on https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+
+    Args:
+        box1 (torch.Tensor): A tensor of shape (N, 4) representing N bounding boxes.
+        box2 (torch.Tensor): A tensor of shape (M, 4) representing M bounding boxes.
+        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
+
+    Returns:
+        (torch.Tensor): An NxM tensor containing the pairwise IoU values for every element in box1 and box2.
+    """
+
+    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+    (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
+    inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp_(0).prod(2)
+
+    # IoU = inter / (area1 + area2 - inter)
+    return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
+
+
+def clip_boxes(boxes, shape):
+    """
+    It takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the
+    shape
+
+    Args:
+      boxes (torch.Tensor): the bounding boxes to clip
+      shape (tuple): the shape of the image
+    """
+    if isinstance(boxes, torch.Tensor):  # faster individually
+        boxes[..., 0].clamp_(0, shape[1])  # x1
+        boxes[..., 1].clamp_(0, shape[0])  # y1
+        boxes[..., 2].clamp_(0, shape[1])  # x2
+        boxes[..., 3].clamp_(0, shape[0])  # y2
+    else:  # np.array (faster grouped)
+        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
+        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
+
+
+def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
+    """
+    Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
+    (img1_shape) to the shape of a different image (img0_shape).
+
+    Args:
+      img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
+      boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
+      img0_shape (tuple): the shape of the target image, in the format of (height, width).
+      ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
+                         calculated based on the size difference between the two images.
+
+    Returns:
+      boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
+    """
+    if ratio_pad is None:  # calculate from img0_shape
+        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
+        pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
+            (img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1)  # wh padding
+    else:
+        gain = ratio_pad[0][0]
+        pad = ratio_pad[1]
+
+    boxes[..., [0, 2]] -= pad[0]  # x padding
+    boxes[..., [1, 3]] -= pad[1]  # y padding
+    boxes[..., :4] /= gain
+    clip_boxes(boxes, img0_shape)
+    return boxes
+
+
+def non_max_suppression(
+        prediction,
+        conf_thres=0.25,
+        iou_thres=0.45,
+        classes=None,
+        agnostic=False,
+        multi_label=False,
+        labels=(),
+        max_det=300,
+        nc=0,  # number of classes (optional)
+        max_time_img=0.05,
+        max_nms=30000,
+        max_wh=7680,
+):
+    """
+    Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
+
+    Arguments:
+        prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
+            containing the predicted boxes, classes, and masks. The tensor should be in the format
+            output by a model, such as YOLO.
+        conf_thres (float): The confidence threshold below which boxes will be filtered out.
+            Valid values are between 0.0 and 1.0.
+        iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
+            Valid values are between 0.0 and 1.0.
+        classes (List[int]): A list of class indices to consider. If None, all classes will be considered.
+        agnostic (bool): If True, the model is agnostic to the number of classes, and all
+            classes will be considered as one.
+        multi_label (bool): If True, each box may have multiple labels.
+        labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner
+            list contains the apriori labels for a given image. The list should be in the format
+            output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
+        max_det (int): The maximum number of boxes to keep after NMS.
+        nc (int): (optional) The number of classes output by the model. Any indices after this will be considered masks.
+        max_time_img (float): The maximum time (seconds) for processing one image.
+        max_nms (int): The maximum number of boxes into torchvision.ops.nms().
+        max_wh (int): The maximum box width and height in pixels
+
+    Returns:
+        (List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
+            shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
+            (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
+    """
+
+    # Checks
+    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
+    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
+    if isinstance(prediction, (list, tuple)):  # YOLOv8 model in validation model, output = (inference_out, loss_out)
+        prediction = prediction[0]  # select only inference output
+
+    device = prediction.device
+    mps = 'mps' in device.type  # Apple MPS
+    if mps:  # MPS not fully supported yet, convert tensors to CPU before NMS
+        prediction = prediction.cpu()
+    bs = prediction.shape[0]  # batch size
+    nc = nc or (prediction.shape[1] - 4)  # number of classes
+    nm = prediction.shape[1] - nc - 4
+    mi = 4 + nc  # mask start index
+    xc = prediction[:, 4:mi].amax(1) > conf_thres  # candidates
+
+    # Settings
+    # min_wh = 2  # (pixels) minimum box width and height
+    time_limit = 0.5 + max_time_img * bs  # seconds to quit after
+    redundant = True  # require redundant detections
+    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
+    merge = False  # use merge-NMS
+
+    t = time.time()
+    output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
+    for xi, x in enumerate(prediction):  # image index, image inference
+        # Apply constraints
+        # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0  # width-height
+        x = x.transpose(0, -1)[xc[xi]]  # confidence
+
+        # Cat apriori labels if autolabelling
+        if labels and len(labels[xi]):
+            lb = labels[xi]
+            v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
+            v[:, :4] = lb[:, 1:5]  # box
+            v[range(len(lb)), lb[:, 0].long() + 4] = 1.0  # cls
+            x = torch.cat((x, v), 0)
+
+        # If none remain process next image
+        if not x.shape[0]:
+            continue
+
+        # Detections matrix nx6 (xyxy, conf, cls)
+        box, cls, mask = x.split((4, nc, nm), 1)
+        box = xywh2xyxy(box)  # center_x, center_y, width, height) to (x1, y1, x2, y2)
+        if multi_label:
+            i, j = (cls > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
+        else:  # best class only
+            conf, j = cls.max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
+
+        # Filter by class
+        if classes is not None:
+            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+
+        # Apply finite constraint
+        # if not torch.isfinite(x).all():
+        #     x = x[torch.isfinite(x).all(1)]
+
+        # Check shape
+        n = x.shape[0]  # number of boxes
+        if not n:  # no boxes
+            continue
+        x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence and remove excess boxes
+
+        # Batched NMS
+        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
+        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
+        i = i[:max_det]  # limit detections
+        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
+            # Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
+            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
+            weights = iou * scores[None]  # box weights
+            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
+            if redundant:
+                i = i[iou.sum(1) > 1]  # require redundancy
+
+        output[xi] = x[i]
+        if mps:
+            output[xi] = output[xi].to(device)
+        if (time.time() - t) > time_limit:
+            logging.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
+            break  # time limit exceeded
+
+    return output
+
+
+def make_divisible(x, divisor):
+    """Returns nearest x divisible by divisor."""
+    if isinstance(divisor, torch.Tensor):
+        divisor = int(divisor.max())  # to int
+    return math.ceil(x / divisor) * divisor
+
+
+def initialize_weights(model):
+    """Initialize model weights to random values."""
+    for m in model.modules():
+        t = type(m)
+        if t is nn.Conv2d:
+            pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+        elif t is nn.BatchNorm2d:
+            m.eps = 1e-3
+            m.momentum = 0.03
+        elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
+            m.inplace = True
+
+
+def get_num_params(model):
+    """Return the total number of parameters in a YOLO model."""
+    return sum(x.numel() for x in model.parameters())
+
+
+def get_num_gradients(model):
+    """Return the total number of parameters with gradients in a YOLO model."""
+    return sum(x.numel() for x in model.parameters() if x.requires_grad)
+
+
+class LetterBox:
+    """Resize image and padding for detection, instance segmentation, pose."""
+
+    def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32):
+        """Initialize LetterBox object with specific parameters."""
+        self.new_shape = new_shape
+        self.auto = auto
+        self.scaleFill = scaleFill
+        self.scaleup = scaleup
+        self.stride = stride
+
+    def __call__(self, labels=None, image=None):
+        """Return updated labels and image with added border."""
+        if labels is None:
+            labels = {}
+        img = labels.get('img') if image is None else image
+        shape = img.shape[:2]  # current shape [height, width]
+        new_shape = labels.pop('rect_shape', self.new_shape)
+        if isinstance(new_shape, int):
+            new_shape = (new_shape, new_shape)
+
+        # Scale ratio (new / old)
+        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+        if not self.scaleup:  # only scale down, do not scale up (for better val mAP)
+            r = min(r, 1.0)
+
+        # Compute padding
+        ratio = r, r  # width, height ratios
+        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+        if self.auto:  # minimum rectangle
+            dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride)  # wh padding
+        elif self.scaleFill:  # stretch
+            dw, dh = 0.0, 0.0
+            new_unpad = (new_shape[1], new_shape[0])
+            ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+
+        dw /= 2  # divide padding into 2 sides
+        dh /= 2
+        if labels.get('ratio_pad'):
+            labels['ratio_pad'] = (labels['ratio_pad'], (dw, dh))  # for evaluation
+
+        if shape[::-1] != new_unpad:  # resize
+            img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+        img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,
+                                 value=(114, 114, 114))  # add border
+
+        if len(labels):
+            labels = self._update_labels(labels, ratio, dw, dh)
+            labels['img'] = img
+            labels['resized_shape'] = new_shape
+            return labels
+        else:
+            return img
+
+    def _update_labels(self, labels, ratio, padw, padh):
+        """Update labels."""
+        labels['instances'].convert_bbox(format='xyxy')
+        labels['instances'].denormalize(*labels['img'].shape[:2][::-1])
+        labels['instances'].scale(*ratio)
+        labels['instances'].add_padding(padw, padh)
+        return labels
+
+
+class LoadPilAndNumpy:
+
+    def __init__(self, im0, imgsz=640):
+        """Initialize PIL and Numpy Dataloader."""
+        if not isinstance(im0, list):
+            im0 = [im0]
+        self.paths = [getattr(im, 'filename', f'image{i}.jpg') for i, im in enumerate(im0)]
+        self.im0 = [self._single_check(im) for im in im0]
+        self.imgsz = imgsz
+        self.mode = 'image'
+        # Generate fake paths
+        self.bs = len(self.im0)
+        self.source_type = ''
+
+    @staticmethod
+    def _single_check(im):
+        """Validate and format an image to numpy array."""
+        assert isinstance(im, (Image.Image, np.ndarray)), f'Expected PIL/np.ndarray image type, but got {type(im)}'
+        if isinstance(im, Image.Image):
+            if im.mode != 'RGB':
+                im = im.convert('RGB')
+            im = np.asarray(im)[:, :, ::-1]
+            im = np.ascontiguousarray(im)  # contiguous
+        return im
+
+    def __len__(self):
+        """Returns the length of the 'im0' attribute."""
+        return len(self.im0)
+
+    def __next__(self):
+        """Returns batch paths, images, processed images, None, ''."""
+        if self.count == 1:  # loop only once as it's batch inference
+            raise StopIteration
+        self.count += 1
+        return self.paths, self.im0, None, ''
+
+    def __iter__(self):
+        """Enables iteration for class LoadPilAndNumpy."""
+        self.count = 0
+        return self

+ 46 - 0
botr/yolov8/yolov8_model.yaml

@@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 1  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]  # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024]  # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768]   # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512]   # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512]   # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2f, [512]]  # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)

+ 247 - 88
format_convert/convert_image.py

@@ -4,8 +4,10 @@ import inspect
 import io
 import logging
 import os
+import re
 import sys
 import time
+from glob import glob
 import requests
 import numpy as np
 from PIL import Image
@@ -19,9 +21,11 @@ from format_convert.utils import judge_error_code, add_div, LineTable, get_table
 from format_convert.convert_need_interface import from_otr_interface, from_ocr_interface, from_gpu_interface_redis, \
     from_idc_interface, from_isr_interface
 from format_convert.table_correct import get_rotated_image
+from botr.extract_table import get_table
 
 
-def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, use_ocr=True):
+def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False,
+                  b_table_from_text=False, pdf_obj_list=[], pdf_layout_size=()):
     from format_convert.convert_tree import _Table, _Sentence
 
     def get_cluster(t_list, b_list, axis):
@@ -206,36 +210,125 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
         log("otr resize bbox recover " + str(time.time()-start_time))
         return list_line
 
-    def table_process(list_line, text_list, bbox_list):
+    def botr_process(_image_np, table_list2, text_list2, box_list2, text_box_list2, obj_in_table_list2,
+                     from_pdf=False, pdf_obj_list=[], pdf_layout_size=()):
+        if from_pdf:
+            # 交叉验证 ocr结果与pdf obj,暂时使用pdf提取的
+            h_ratio = _image_np.shape[0] / pdf_layout_size[1]
+            w_ratio = _image_np.shape[1] / pdf_layout_size[0]
+            pdf_text_list = []
+            pdf_box_list = []
+            for obj in pdf_obj_list:
+                # pdf坐标是上下颠倒的
+                obj.bbox = (obj.bbox[0], pdf_layout_size[1]-obj.bbox[1],
+                            obj.bbox[2], pdf_layout_size[1]-obj.bbox[3])
+
+                # 根据两个页面大小比例调整坐标
+                obj.bbox = (obj.bbox[0]*w_ratio, obj.bbox[1]*h_ratio,
+                            obj.bbox[2]*w_ratio, obj.bbox[3]*h_ratio)
+
+                # 剔除水印字
+                text = re.sub('[\n ]', '', obj.get_text())
+                if len(text) == 1 and abs(obj.bbox[0] - obj.bbox[2]) >= 70:
+                    continue
+
+                pdf_box_list.append([[int(obj.bbox[0]), int(obj.bbox[3])],
+                                     [],
+                                     [int(obj.bbox[2]), int(obj.bbox[1])],
+                                     []
+                                     ])
+                pdf_text_list.append(text)
+
+            pdf_text_box_list = get_text_box_obj(pdf_text_list, pdf_box_list)
+
+            text_list2 = pdf_text_list
+            box_list2 = pdf_box_list
+            text_box_list2 = pdf_text_box_list
+
+        _text_box_list, _table_list, _obj_in_table_list = get_table(_image_np, table_list2, text_list2, box_list2, text_box_list2)
+        # print('_text_box_list', len(_text_box_list))
+        # print('_obj_in_table_list', len(_obj_in_table_list))
+        # print('text_box_list2', len(text_box_list2))
+        # print('obj_in_table_list2', len(obj_in_table_list2))
+
+        # 保存无边框表格文件
+        if _table_list:
+            save_b_table(_image_np, text_box_list2, from_pdf)
+
+        text_box_list2 += _text_box_list
+        text_box_list2 = list(set(text_box_list2))
+        table_list2 += _table_list
+        obj_in_table_list2 = obj_in_table_list2.union(_obj_in_table_list)
+        # print('text_box_list2', len(text_box_list2))
+        # print('obj_in_table_list2', len(obj_in_table_list2))
+        return text_box_list2, table_list2, obj_in_table_list2
+
+    def table_process(list_line, list_text_boxes):
         # 调用现成方法形成表格
         try:
-            from format_convert.convert_tree import TableLine
-            list_lines = []
-            for line in list_line:
-                list_lines.append(LTLine(1, (line[0], line[1]), (line[2], line[3])))
-            from format_convert.convert_tree import TextBox
-            list_text_boxes = []
-            for i in range(len(bbox_list)):
-                bbox = bbox_list[i]
-                b_text = text_list[i]
-                list_text_boxes.append(TextBox([bbox[0][0], bbox[0][1],
-                                                bbox[2][0], bbox[2][1]], b_text))
-
-            # for _textbox in list_text_boxes:
-            #     print("==",_textbox.get_text())
-            lt = LineTable()
-            # print('text_list', text_list)
-            # print('bbox_list', bbox_list)
-            # print('list_line', list_line)
-            tables, obj_in_table, _ = lt.recognize_table(list_text_boxes, list_lines, False)
-
-            # 合并同一行textbox
-            list_text_boxes = merge_textbox(list_text_boxes, obj_in_table)
-            return list_text_boxes, tables, obj_in_table
+            if list_line:
+                from format_convert.convert_tree import TableLine
+                list_lines = []
+                for line in list_line:
+                    list_lines.append(LTLine(1, (line[0], line[1]), (line[2], line[3])))
+
+                lt = LineTable()
+                tables, obj_in_table, _ = lt.recognize_table(list_text_boxes, list_lines, False)
+                if not tables:
+                    return list_text_boxes, tables, obj_in_table
+
+                # 合并同一行textbox
+                # list_text_boxes = merge_textbox(list_text_boxes, obj_in_table)
+                return list_text_boxes, tables, obj_in_table
+            else:
+                return list_text_boxes, [], set()
         except:
             traceback.print_exc()
             return [-8], [-8], [-8]
 
+    def get_text_box_obj(_text_list, _bbox_list):
+        from format_convert.convert_tree import TextBox
+        _text_box_list = []
+        for i in range(len(_bbox_list)):
+            bbox = _bbox_list[i]
+            b_text = _text_list[i]
+            _text_box_list.append(TextBox([bbox[0][0], bbox[0][1],
+                                          bbox[2][0], bbox[2][1]], b_text))
+        return _text_box_list
+
+    def save_b_table(image_np2, text_box_list2, from_pdf=False):
+        _start_time = time.time()
+        _path = '/data/fangjiasheng/format_conversion_maxcompute/save_b_table'
+        # _path = 'D:/Project/format_conversion_maxcompute/save_b_table'
+        max_index = 20000
+        if os.path.exists(_path):
+            file_list = glob(_path + '/*')
+            if file_list:
+                file_index_list = [int(re.split('[/.\\\\-]', x)[-2]) for x in file_list]
+                file_index_list.sort(key=lambda x: x)
+                index = file_index_list[-1] + 1
+            else:
+                index = 0
+            if index > max_index:
+                return
+
+            # 文件md5
+            from format_convert import _global
+            _md5 = _global.get("md5")
+
+            _image_path = _path + '/' + str(_md5) + '-' + str(index) + '.png'
+            cv2.imwrite(_image_path, image_np2)
+            log('save b_table image success!')
+
+            if from_pdf:
+                _file_path = _path + '/' + str(_md5) + '-' + str(index) + '.txt'
+                new_text_box_list2 = [str(x) + '\n' for x in text_box_list2]
+                with open(_file_path, 'w') as f:
+                    f.writelines(new_text_box_list2)
+                log('save b_table txt success!')
+
+        log('save_b_table cost: ' + str(time.time()-_start_time))
+
     log("into image_preprocess")
     try:
         if image_np is None:
@@ -243,78 +336,153 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
         if image_np.shape[0] <= 20 or image_np.shape[1] <= 20:
             return []
 
-        # 判断是否需要长图分割
-        slice_flag = need_image_slice(image_np)
-        log("need_image_slice " + str(slice_flag) + " " + str(image_np.shape))
-        idc_flag = False
-        image_np_list = [image_np]
-        if slice_flag:
-            # 方向分类
-            image_np = idc_process(image_np)
-            idc_flag = True
-            if isinstance(image_np, list):
-                return image_np
-
-            # 再判断
-            if need_image_slice(image_np):
-                # 长图分割
-                image_np_list = image_slice_new(image_np)
-        if len(image_np_list) < 1:
-            log("image_slice failed!")
+        if not b_table_from_text:
+            # 判断是否需要长图分割
+            slice_flag = need_image_slice(image_np)
+            log("need_image_slice " + str(slice_flag) + " " + str(image_np.shape))
+            idc_flag = False
             image_np_list = [image_np]
-            # return [-10]
-
-        all_obj_list = []
-        _add_y = 0
-        for image_np in image_np_list:
-            # print("sub image shape", image_np.shape)
-            # 整体分辨率限制
-            threshold = 2048
-            if image_np.shape[0] > threshold or image_np.shape[1] > threshold:
-                h, w = get_best_predict_size2(image_np, threshold=threshold)
-                log("global image resize " + str(image_np.shape[:2]) + " -> " + str(h) + "," + str(w))
-                image_np = pil_resize(image_np, h, w)
-
-            # 印章去除
-            image_np = isr_process(image_np)
-            if isinstance(image_np, list):
-                return image_np
-
-            # 文字识别
-            text_list, box_list = ocr_process(image_np)
-            if judge_error_code(text_list):
-                return text_list
-
-            # 判断ocr识别是否正确
-            if ocr_cant_read(text_list, box_list) and not idc_flag:
+            if slice_flag:
                 # 方向分类
                 image_np = idc_process(image_np)
-                # cv2.imshow("idc_process", image_np)
-                # cv2.waitKey(0)
+                idc_flag = True
+                if isinstance(image_np, list):
+                    return image_np
+
+                # 再判断
+                if need_image_slice(image_np):
+                    # 长图分割
+                    image_np_list = image_slice_new(image_np)
+            if len(image_np_list) < 1:
+                log("image_slice failed!")
+                image_np_list = [image_np]
+                # return [-10]
+
+            all_obj_list = []
+            _add_y = 0
+            for image_np in image_np_list:
+                # print("sub image shape", image_np.shape)
+                # 整体分辨率限制
+                threshold = 2048
+                if image_np.shape[0] > threshold or image_np.shape[1] > threshold:
+                    h, w = get_best_predict_size2(image_np, threshold=threshold)
+                    log("global image resize " + str(image_np.shape[:2]) + " -> " + str(h) + "," + str(w))
+                    image_np = pil_resize(image_np, h, w)
+
+                # 印章去除
+                image_np = isr_process(image_np)
                 if isinstance(image_np, list):
                     return image_np
 
                 # 文字识别
-                text_list1, box_list_1 = ocr_process(image_np)
-                if judge_error_code(text_list1):
-                    return text_list1
+                text_list, box_list = ocr_process(image_np)
+                if judge_error_code(text_list):
+                    return text_list
+
+                # 判断ocr识别是否正确
+                if ocr_cant_read(text_list, box_list) and not idc_flag:
+                    # 方向分类
+                    image_np = idc_process(image_np)
+                    # cv2.imshow("idc_process", image_np)
+                    # cv2.waitKey(0)
+                    if isinstance(image_np, list):
+                        return image_np
+
+                    # 文字识别
+                    text_list1, box_list_1 = ocr_process(image_np)
+                    if judge_error_code(text_list1):
+                        return text_list1
+
+                    # 比较字数
+                    # print("ocr process", len("".join(text_list)), len("".join(text_list1)))
+                    if len("".join(text_list)) < len("".join(text_list1)):
+                        text_list = text_list1
+                        box_list = box_list_1
+
+                # 表格识别
+                line_list = otr_process(image_np)
+                if judge_error_code(line_list):
+                    return line_list
+
+                # 生成TextBox对象
+                text_box_list = get_text_box_obj(text_list, box_list)
+
+                # 表格生成
+                text_box_list, table_list, obj_in_table_list = table_process(line_list, text_box_list)
+                if judge_error_code(table_list):
+                    return table_list
+
+                # 无边框表格识别
+                start_time = time.time()
+                text_box_list, table_list, obj_in_table_list = botr_process(image_np, table_list,
+                                                                            text_list, box_list,
+                                                                            text_box_list,
+                                                                            obj_in_table_list,
+                                                                            b_table_from_text,
+                                                                            pdf_obj_list,
+                                                                            pdf_layout_size,
+                                                                            )
+                log('botr process cost: ' + str(time.time()-start_time))
+
+                # 合并非表格的同一行TextBox
+                text_box_list = merge_textbox(text_box_list, obj_in_table_list)
+
+                # 对象生成
+                obj_list = []
+                for table in table_list:
+                    obj_list.append(_Table(table["table"], table["bbox"]))
+                for text_box in text_box_list:
+                    if text_box not in obj_in_table_list:
+                        obj_list.append(_Sentence(text_box.get_text(), text_box.bbox))
+
+                # 修正y
+                if len(image_np_list) > 1:
+                    list_y = []
+                    for obj in obj_list:
+                        obj.y += _add_y
+                        list_y.append(obj.y)
+                    if len(list_y) > 0:
+                        _add_y = max(list_y)
+
+                # 合并
+                all_obj_list += obj_list
 
-                # 比较字数
-                # print("ocr process", len("".join(text_list)), len("".join(text_list1)))
-                if len("".join(text_list)) < len("".join(text_list1)):
-                    text_list = text_list1
-                    box_list = box_list_1
+        else:
+            all_obj_list = []
+            table_list = []
+            text_list = []
+            box_list = []
+            text_box_list = []
+            obj_in_table_list = set()
 
             # 表格识别
             line_list = otr_process(image_np)
             if judge_error_code(line_list):
                 return line_list
 
+            # 生成TextBox对象
+            text_box_list = get_text_box_obj(text_list, box_list)
+
             # 表格生成
-            text_box_list, table_list, obj_in_table_list = table_process(line_list, text_list, box_list)
+            text_box_list, table_list, obj_in_table_list = table_process(line_list, text_box_list)
             if judge_error_code(table_list):
                 return table_list
 
+            # 无边框表格识别
+            start_time = time.time()
+            text_box_list, table_list, obj_in_table_list = botr_process(image_np, table_list,
+                                                                        text_list, box_list,
+                                                                        text_box_list,
+                                                                        obj_in_table_list,
+                                                                        b_table_from_text,
+                                                                        pdf_obj_list,
+                                                                        pdf_layout_size,
+                                                                        )
+            log('botr process cost: ' + str(time.time()-start_time))
+
+            # 合并非表格的同一行TextBox
+            text_box_list = merge_textbox(text_box_list, obj_in_table_list)
+
             # 对象生成
             obj_list = []
             for table in table_list:
@@ -323,15 +491,6 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
                 if text_box not in obj_in_table_list:
                     obj_list.append(_Sentence(text_box.get_text(), text_box.bbox))
 
-            # 修正y
-            if len(image_np_list) > 1:
-                list_y = []
-                for obj in obj_list:
-                    obj.y += _add_y
-                    list_y.append(obj.y)
-                if len(list_y) > 0:
-                    _add_y = max(list_y)
-
             # 合并
             all_obj_list += obj_list
 

+ 73 - 9
format_convert/convert_need_interface.py

@@ -1,8 +1,6 @@
 # encoding=utf8
 import base64
-import inspect
 import json
-import logging
 import multiprocessing
 import os
 import pickle
@@ -11,9 +9,11 @@ import sys
 import time
 import uuid
 import cv2
-import redis
+import torch
 from werkzeug.exceptions import NotFound
 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
+from botr.yolov8.yolo_interface import yolo
+from botr.yolov8.model import Predictor
 from atc.atc_interface import AtcModels, atc
 from idc.idc_interface import IdcModels, idc
 from isr.isr_interface import IsrModels, isr
@@ -21,7 +21,7 @@ import traceback
 import requests
 from format_convert import _global
 from format_convert.utils import get_platform, get_sequential_data, judge_error_code, request_post, get_ip_port, \
-    get_intranet_ip, get_logger, log, get_args_from_config, get_using_ip
+    get_intranet_ip, get_logger, log, get_args_from_config, get_using_ip, np2bytes, set_flask_global
 from ocr.ocr_interface import ocr, OcrModels
 from otr.otr_interface import otr, OtrModels
 from format_convert.libreoffice_interface import office_convert
@@ -663,6 +663,64 @@ def from_atc_interface(text, from_remote=FROM_REMOTE):
         return [-11]
 
 
+def from_yolo_interface(image_stream, from_remote=FROM_REMOTE):
+    log("into from_yolo_interface")
+    start_time = time.time()
+    try:
+        base64_stream = base64.b64encode(image_stream)
+
+        # 调用接口
+        try:
+            if from_remote:
+                retry_times_1 = 3
+                # 重试
+                while retry_times_1:
+                    ip_port = interface_pool_gunicorn("yolo")
+                    if judge_error_code(ip_port):
+                        return ip_port
+                    _url = ip_port + "/yolo"
+                    log('yolo _url ' + _url)
+                    r = json.loads(request_post(_url, {"data": base64_stream,
+                                                       "md5": _global.get("md5")},
+                                                time_out=60))
+                    log("get interface return")
+                    if type(r) == list:
+                        # 接口连不上换个端口重试
+                        if retry_times_1 <= 1:
+                            return r
+                        else:
+                            retry_times_1 -= 1
+                            log("retry post yolo_interface... left times " + str(retry_times_1))
+                            continue
+                    if judge_error_code(r):
+                        return r
+                    break
+            else:
+                if globals().get("global_yolo_predictor") is None:
+                    print("=========== init yolo model ===========")
+                    ROOT = os.path.abspath(os.path.dirname(__file__)) + '/../'
+                    model_path = ROOT + 'botr/yolov8/weights.pt'
+                    image_size = 640
+                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+                    yolo_predictor = Predictor(image_size, device, model_path)
+                    globals().update({"global_yolo_predictor": yolo_predictor})
+                r = yolo(data=base64_stream,
+                         predictor=globals().get("global_yolo_predictor"))
+        except TimeoutError:
+            return [-5]
+        except requests.exceptions.ConnectionError as e:
+            return [-2]
+
+        _dict = r
+        b_table_list = _dict.get("b_table_list")
+        log("from_yolo_interface cost time " + str(time.time()-start_time))
+        return b_table_list
+    except Exception as e:
+        log("from_yolo_interface error!")
+        traceback.print_exc()
+        return [-11]
+
+
 # def from_schedule_interface(interface_type):
 #     try:
 #         _ip = "http://" + get_intranet_ip()
@@ -740,6 +798,7 @@ def interface_pool_gunicorn(interface_type):
     ip_port_dict = _global.get("ip_port")
     try:
         if ip_port_dict is None or ip_port_flag_dict is None:
+            print('_global', _global.get_dict())
             raise NotFound
 
         # 负载均衡, 选取有该接口的ip
@@ -781,7 +840,7 @@ def interface_pool_gunicorn(interface_type):
         log(ip_port)
         return ip_port
     except NotFound:
-        log("ip_flag or ip_port_dict is None! checkout config")
+        log("ip_port or ip_port_dict is None! checkout config")
         return [-2]
     except:
         traceback.print_exc()
@@ -1026,9 +1085,14 @@ def interface_pool_gunicorn_old(interface_type):
 
 
 if __name__ == "__main__":
-    from format_convert.utils import set_flask_global
     _global._init()
     set_flask_global()
-
-    for i in range(10):
-        print("result", interface_pool_gunicorn("otr"))
+    _img = cv2.imread(r"C:/Users/Administrator/Desktop/test_b_table/error11.png")
+    _img_bytes = np2bytes(_img)
+    b_list = from_yolo_interface(_img_bytes, from_remote=True)
+    for l in b_list:
+        for b in l:
+            cv2.rectangle(_img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 0, 255), 2)
+    cv2.namedWindow('img', cv2.WINDOW_NORMAL)
+    cv2.imshow('img', _img)
+    cv2.waitKey(0)

+ 81 - 0
format_convert/convert_pdf.py

@@ -1213,6 +1213,62 @@ class PDFConvert:
 
         return True
 
+    def judge_b_table(self, lt_text_list):
+        # 先分行
+        lt_text_list.sort(key=lambda x: (x.bbox[1], x.bbox[0]))
+        lt_text_row_list = []
+        current_h = lt_text_list[0].bbox[1]
+        row = []
+        threshold = 2
+        for lt_text in lt_text_list:
+            bbox = lt_text.bbox
+            if current_h - threshold <= bbox[1] <= current_h + threshold:
+                row.append(lt_text)
+            else:
+                if row:
+                    lt_text_row_list.append(row)
+                row = [lt_text]
+                current_h = lt_text.bbox[1]
+        if row:
+            lt_text_row_list.append(row)
+
+        # print('lt_text_row_list')
+        # for r in lt_text_row_list:
+        #     print('r', [x.get_text() for x in r])
+        # 判断文本中间是否是空格,或一行文本中间有多个
+        is_b_table_flag = False
+        is_b_table_cnt = 3
+        tolerate_cnt = 2
+        t_cnt = 0
+        row_cnt = 0
+        for row in lt_text_row_list:
+            # 水印行跳过
+            if len(row) == 1 and len(row[0].get_text()[:-1]) == 1:
+                continue
+            if len(row) == 1:
+                text = row[0].get_text()
+                bbox = row[0].bbox
+                match = re.search('[ ]{3,}', text)
+                if match and re.search('[\u4e00-\u9fff]{2,}', text[:match.span()[0]]) \
+                        and re.search('[\u4e00-\u9fff]{2,}', text[match.span()[1]:]):
+                    row_cnt += 1
+                    t_cnt = 0
+                else:
+                    # 容忍
+                    if t_cnt < tolerate_cnt:
+                        t_cnt += 1
+                        continue
+                    row_cnt = 0
+            else:
+                row_cnt += 1
+                t_cnt = 0
+
+            if row_cnt >= is_b_table_cnt:
+                is_b_table_flag = True
+                break
+        log('pdf is_b_table_flag ' + str(is_b_table_flag))
+        return is_b_table_flag
+
     def convert_page(self, page, page_no):
         # pdf page.annots为None,不经过get_layout,直接ocr
         # if page.annots is None:
@@ -1268,6 +1324,21 @@ class PDFConvert:
             if not self.is_text_legal(lt_text_list, page_no):
                 return
 
+            # 根据text规律,判断该页是否可能有无边框表格
+            start_time = time.time()
+            if self.judge_b_table(lt_text_list):
+                page_image = self.get_page_image(page_no)
+                if judge_error_code(page_image):
+                    self._page.error_code = page_image
+                else:
+                    _image = _Image(page_image[1], page_image[0])
+                    _image.is_from_pdf = True
+                    _image.b_table_from_text = True
+                    _image.b_table_text_obj_list = lt_text_list
+                    _image.b_table_layout_size = (layout.width, layout.height)
+                    self._page.add_child(_image)
+                log('convert_pdf judge_b_table set image cost: ' + str(time.time()-start_time))
+
             try:
                 lt_line_list = self.get_page_lines(layout, page_no)
             except:
@@ -1336,6 +1407,16 @@ class PDFConvert:
             if not self.is_text_legal(lt_text_list, page_no):
                 return
 
+            # 根据text规律,判断该页是否可能有无边框表格
+            if self.judge_b_table(lt_text_list):
+                page_image = self.get_page_image(page_no)
+                if judge_error_code(page_image):
+                    self._page.error_code = page_image
+                else:
+                    _image = _Image(page_image[1], page_image[0])
+                    _image.is_from_pdf = True
+                    self._page.add_child(_image)
+
             # lt_line_list = self.get_text_lines(page, page_no)
             try:
                 lt_line_list = self.get_page_lines(layout, page_no)

+ 8 - 6
format_convert/convert_test.py

@@ -24,10 +24,11 @@ def test_one(p, from_remote=False):
 
     data = {"file": file_base64, "type": p.split(".")[-1], "filemd5": 100}
     if from_remote:
-        _url = 'http://121.46.18.113:15010/convert'
+        # _url = 'http://121.46.18.113:15010/convert'
         # _url = 'http://192.168.2.103:15010/convert'
+        # _url = 'http://192.168.2.102:15011/convert'
         # _url = 'http://172.16.160.65:15010/convert'
-        # _url = 'http://127.0.0.1:15010/convert'
+        _url = 'http://127.0.0.1:15010/convert'
         result = json.loads(request_post(_url, data, time_out=10000))
         text_str = ""
         for t in result.get("result_html"):
@@ -61,10 +62,11 @@ if __name__ == '__main__':
         # file_path = "C:/Users/Administrator/Desktop/test_xls/merge_cell.xlsx"
         # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_Interface/20210609202634853485.xlsx"
         # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_ODPS/1624325845476.pdf"
-        # file_path = "C:/Users/Administrator/Downloads/QQ图片20230616105216.jpg"
-        # file_path = "C:/Users/Administrator/Desktop/test_xls/error2.xlsx"
-        # file_path = "C:/Users/Administrator/Desktop/test_image/error9-2.png"
-        file_path = "C:/Users/Administrator/Desktop/test_pdf/直接读表格线error/error51.pdf"
+        # file_path = "C:/Users/Administrator/Downloads/1687842668787.pdf"
+        # file_path = "C:/Users/Administrator/Desktop/test_doc/error8.doc"
+        # file_path = "C:/Users/Administrator/Desktop/test_image/error3.png"
+        file_path = "C:/Users/Administrator/Desktop/test_b_table/error15.png"
+        # file_path = "C:/Users/Administrator/Desktop/test_pdf/直接读中文error/error2.pdf"
     else:
         file_path = "1660296734009.pdf"
     test_one(file_path, from_remote=True)

+ 19 - 4
format_convert/convert_tree.py

@@ -99,6 +99,12 @@ class _Image:
         self.error_code = None
         # objs in tables
         self.in_table_objs = set()
+        # 是否是文本形成的无边框表格
+        self.b_table_from_text = False
+        # pdf读取的文本对象
+        self.b_table_text_obj_list = []
+        # pdf layout的尺寸
+        self.b_table_layout_size = (0, 0)
 
     def add_child(self, child):
         if child.error_code is None:
@@ -128,11 +134,17 @@ class _Image:
         return
 
     def convert(self):
-        # 二进制转numpy
-        # image_np = Image.open(io.BytesIO(self.content))
-        # image_np = cv2.cvtColor(np.asarray(image_np), cv2.COLOR_RGB2BGR)
         image_np = cv2.imread(self.path)
-        obj_list = image_process(image_np, self.path, self.is_from_pdf, self.is_from_docx, use_ocr=True)
+        obj_list = image_process(image_np, self.path, self.is_from_pdf, self.is_from_docx,
+                                 self.b_table_from_text, self.b_table_text_obj_list,
+                                 self.b_table_layout_size)
+        if self.b_table_from_text:
+            temp_list = []
+            for obj in obj_list:
+                if isinstance(obj, _Table):
+                    temp_list.append(obj)
+            obj_list = temp_list
+
         if judge_error_code(obj_list):
             self.error_code = obj_list
             return
@@ -192,6 +204,9 @@ class TextBox:
     def get_text(self):
         return self.text
 
+    def __str__(self):
+        return '(%s@#@%s)' % (str(self.text), '@'.join([str(x) for x in self.bbox]))
+
 
 class TableLine:
     def __init__(self, bbox):

+ 9 - 1
format_convert/interface.yml

@@ -36,13 +36,17 @@ MASTER:
     port_start: [18060]
     port_no: [2]
 
+  YOLO:
+    port_start: [18080]
+    port_no: [2]
+
   OFFICE:
     port_start: [16000]
     port_no: [25]
 
 
 SLAVE:
-  ip: ['http://192.168.0.114']
+  ip: []
 
   PATH:
     python: ['/data/anaconda3/envs/tf2/bin/python']
@@ -73,6 +77,10 @@ SLAVE:
     port_start:
     port_no:
 
+  YOLO:
+    port_start:
+    port_no:
+
   OFFICE:
     port_start:
     port_no:

+ 15 - 0
format_convert/monitor_process_config.py

@@ -22,6 +22,7 @@ otr_port_list = get_args_from_config(ip_port_dict, ip, "otr")
 idc_port_list = get_args_from_config(ip_port_dict, ip, "idc")
 isr_port_list = get_args_from_config(ip_port_dict, ip, "isr")
 atc_port_list = get_args_from_config(ip_port_dict, ip, "atc")
+yolo_port_list = get_args_from_config(ip_port_dict, ip, "yolo")
 soffice_port_list = get_args_from_config(ip_port_dict, ip, "office", "MASTER")
 if soffice_port_list:
     soffice_port_list = soffice_port_list[0]
@@ -38,6 +39,7 @@ print("otr_port_list", otr_port_list)
 print("idc_port_list", idc_port_list)
 print("isr_port_list", isr_port_list)
 print("atc_port_list", atc_port_list)
+print("yolo_port_list", yolo_port_list)
 print("soffice_port_list", soffice_port_list)
 
 # 根据port生成gunicorn语句
@@ -46,6 +48,7 @@ otr_comm_list = []
 isr_comm_list = []
 idc_comm_list = []
 atc_comm_list = []
+yolo_comm_list = []
 for i in range(len(ocr_port_list)):
     ocr_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(ocr_port_list[i]))
                          + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
@@ -66,6 +69,10 @@ for i in range(len(atc_port_list)):
     atc_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(atc_port_list[i]))
                          + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
                          + project_path_list[i] + "/atc atc_interface:app" + std_out_gpu)
+for i in range(len(yolo_port_list)):
+    yolo_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(yolo_port_list[i]))
+                         + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
+                         + project_path_list[i] + "/botr/yolov8 yolo_interface:app" + std_out_gpu)
 
 convert_comm = "nohup " + gunicorn_path_list[0] + " -w " + str(len(convert_port_list)) + " -t 300 -b 0.0.0.0:# --chdir " \
                + project_path_list[0] + "/format_convert convert:app" + std_out
@@ -97,6 +104,8 @@ def restart(process_type, port, index=0):
         _comm = re.sub("#", port, isr_comm_list[index])
     elif process_type == "atc":
         _comm = re.sub("#", port, atc_comm_list[index])
+    elif process_type == "yolo":
+        _comm = re.sub("#", port, yolo_comm_list[index])
     else:
         _comm = "netstat -nltp"
         print("no process_type", process_type)
@@ -205,6 +214,12 @@ def monitor():
                 if p not in current_port_list:
                     restart("atc", p, index=j)
 
+    if yolo_port_list:
+        for j in range(len(yolo_port_list)):
+            for p in yolo_port_list[j][:1]:
+                if p not in current_port_list:
+                    restart("yolo", p, index=j)
+
     if soffice_port_list:
         for p in soffice_port_list:
             if p not in current_port_list:

+ 2 - 2
format_convert/utils.py

@@ -1438,7 +1438,7 @@ def get_ip_port(node_type=None, interface_type=None):
         node_type_list = [node_type]
 
     if interface_type is None:
-        interface_type_list = ["convert", "ocr", "otr", "office", "path", "isr", "idc", "atc"]
+        interface_type_list = ["convert", "ocr", "otr", "office", "path", "isr", "idc", "atc", "yolo"]
     else:
         interface_type_list = [interface_type]
 
@@ -1672,7 +1672,7 @@ def set_flask_global():
     ip_port_dict = get_ip_port()
     for _k in ip_port_dict.keys():
         ip_port_flag.update({_k: {}})
-        for interface in ["ocr", "otr", "convert", "idc", "isr", "atc", "office"]:
+        for interface in ["ocr", "otr", "convert", "idc", "isr", "atc", 'yolo', "office"]:
             if ip_port_dict.get(_k).get("MASTER"):
                 if ip_port_dict.get(_k).get("MASTER").get(interface):
                     ip_port_flag[_k][interface] = 0