Pārlūkot izejas kodu

Merge branch 'master' of http://192.168.2.103:3000/luojiehua/BIDI_ML_INFO_EXTRACTION

luojiehua 3 mēneši atpakaļ
vecāks
revīzija
7cf3273634

BIN
BiddingKG/dl/table_head/model_40_2_0.959.pth


+ 94 - 0
BiddingKG/dl/table_head/models/model_torch.py

@@ -73,6 +73,100 @@ class TableHeadModel(nn.Module):
         cnn3d_x = torch.permute(cnn3d_x, [2, 3, 1, 0])
         cnn3d_x = cnn3d_x.contiguous().view(row, col, char_num * self.char_embed_expand)
 
+        # dnn
+        x = self.dense3(cnn3d_x)
+        x = self.ln_dnn_2(x)
+        x = self.relu(x)
+        x = self.dense4(x)
+        x = self.sigmoid(x)
+        x = torch.squeeze(x, -1)
+        return x
+
+
+class TableHeadModel2(nn.Module):
+    def __init__(self):
+        super(TableHeadModel2, self).__init__()
+        self.char_num = 20
+        self.char_embed = 60
+        self.char_embed_expand = 128
+
+        self.dense0 = nn.Linear(self.char_embed, self.char_embed_expand)
+
+        self.dense3 = nn.Linear(self.char_num * self.char_embed_expand, 64)
+        self.dense4 = nn.Linear(64, 1)
+
+        self.sigmoid = nn.Sigmoid()
+
+        self.ln_dnn_2 = nn.LayerNorm([64])
+
+        self.device = torch.device("cpu")
+
+        self.relu = nn.LeakyReLU()
+        self.dropout = nn.Dropout(0.6)
+
+        # self.cnn1d_0 = nn.Conv1d(self.char_embed_expand,
+        #                          self.char_embed_expand,
+        #                          (3,), padding=self.get_padding(3))
+        # self.cnn1d_1 = nn.Conv1d(self.char_embed_expand,
+        #                          self.char_embed_expand,
+        #                          (3,), padding=self.get_padding(3))
+
+        encoder_layer1 = nn.TransformerEncoderLayer(d_model=self.char_embed_expand, nhead=2,
+                                                    dim_feedforward=128, batch_first=True)
+        self.transformer1 = nn.TransformerEncoder(encoder_layer1, 2)
+        self.ln_encoder_0 = nn.LayerNorm([self.char_embed_expand])
+
+        self.cnn3d_0 = nn.Conv3d(self.char_embed_expand, self.char_embed_expand,
+                                 (3, 3, 3), padding=self.get_padding(3))
+        self.cnn3d_1 = nn.Conv3d(self.char_embed_expand, self.char_embed_expand,
+                                 (3, 3, 3), padding=self.get_padding(3))
+        # self.cnn3d_2 = nn.Conv3d(self.char_embed, self.char_embed,
+        #                          (3, 3, 3), padding=self.get_padding(3))
+
+    def get_padding(self, kernel_size, stride=1):
+        return (kernel_size - 1) // 2 * stride
+
+    def forward(self, x):
+        batch, row, col, char_num, char_embed = x.shape
+
+        # Embedding
+        x = torch.squeeze(x, 0)
+        x = x.view([row*col, char_num, char_embed])
+        x = self.dense0(x)
+
+        # transformer
+        box_attention = self.transformer1(x)
+        box_attention = self.ln_encoder_0(box_attention)
+        box_attention = torch.permute(box_attention, [0, 2, 1])
+        box_attention = box_attention.contiguous().view(row, col, char_num, self.char_embed_expand)
+        box_attention = torch.unsqueeze(box_attention, 0)
+
+        # cnn1d_x = torch.permute(cnn1d_x, [0, 2, 1])
+        # cnn1d_x = self.cnn1d_0(cnn1d_x)
+        # cnn1d_x = self.relu(cnn1d_x)
+        # cnn1d_x = self.dropout(cnn1d_x)
+        # cnn1d_x = self.cnn1d_1(cnn1d_x)
+        # cnn1d_x = self.relu(cnn1d_x)
+        # cnn1d_x = self.dropout(cnn1d_x)
+        #
+        # cnn1d_x = torch.permute(cnn1d_x, [0, 2, 1])
+        # cnn1d_x = cnn1d_x.contiguous().view(row, col, char_num, self.char_embed_expand)
+        # cnn1d_x = torch.unsqueeze(cnn1d_x, 0)
+        # print(cnn1d_x.shape)
+
+        # cnn 3d
+        cnn3d_x = torch.permute(box_attention, [0, 4, 3, 1, 2])
+        cnn3d_x = self.cnn3d_0(cnn3d_x)
+        cnn3d_x = self.relu(cnn3d_x)
+        cnn3d_x = self.dropout(cnn3d_x)
+        cnn3d_x = self.cnn3d_1(cnn3d_x)
+        cnn3d_x = self.relu(cnn3d_x)
+        cnn3d_x = self.dropout(cnn3d_x)
+
+        cnn3d_x = torch.squeeze(cnn3d_x, 0)
+        cnn3d_x = torch.permute(cnn3d_x, [2, 3, 1, 0])
+        cnn3d_x = cnn3d_x.contiguous().view(row, col, char_num * self.char_embed_expand)
+
         # dnn
         x = self.dense3(cnn3d_x)
         x = self.ln_dnn_2(x)

+ 4 - 1
BiddingKG/dl/table_head/predict_torch.py

@@ -6,10 +6,12 @@ from torch.utils.data import DataLoader
 
 sys.path.append(os.path.abspath(os.path.dirname(__file__) + "/../../../"))
 from BiddingKG.dl.table_head.models.model_torch import TableHeadModel
+# from BiddingKG.dl.table_head.models.model_torch import TableHeadModel2
 from BiddingKG.dl.table_head.pre_process_torch import CustomDatasetTiny40, set_same_table_head, set_label
 
 device = torch.device("cpu")
-model_path = os.path.abspath(os.path.dirname(__file__)) + '/model_40_0.951.pth'
+model_path = os.path.abspath(os.path.dirname(__file__)) + '/model_40_0.959.pth'
+# model_path = os.path.abspath(os.path.dirname(__file__)) + '/model_40_2_0.959.pth'
 batch_size = 1
 
 
@@ -18,6 +20,7 @@ def predict(table_text_list):
         print("="*15, "init table_head model", "="*15)
         # 实例化模型
         model = TableHeadModel()
+        # model = TableHeadModel2()
         model.to(device)
         model.load_state_dict(torch.load(model_path, map_location=torch.device(device)))
         # 将模型设置为评估模式