model.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. #! -*- coding:utf-8 -*-
  2. import os,sys
  3. # parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  4. # sys.path.insert(0,parentdir)
  5. # import json
  6. import numpy as np
  7. # from random import choice
  8. # from tqdm import tqdm
  9. from BiddingKG.dl.common.models import *
  10. from itertools import groupby
  11. def seq_padding(X, padding=0):
  12. L = [len(x) for x in X]
  13. ML = max(L)
  14. return np.array([
  15. np.concatenate([x, [padding] * (ML - len(x))]) if len(x) < ML else x for x in X
  16. ])
  17. from keras.layers import *
  18. from keras.models import Model
  19. import keras.backend as K
  20. from keras.callbacks import Callback
  21. from keras.optimizers import Adam
  22. def seq_gather(x):
  23. """seq是[None, seq_len, s_size]的格式,
  24. idxs是[None, 1]的格式,在seq的第i个序列中选出第idxs[i]个向量,
  25. 最终输出[None, s_size]的向量。
  26. """
  27. seq, idxs = x
  28. idxs = K.cast(idxs, 'int32')
  29. batch_idxs = K.arange(0, K.shape(seq)[0])
  30. batch_idxs = K.expand_dims(batch_idxs, 1)
  31. idxs = K.concatenate([batch_idxs, idxs], 1)
  32. return K.tf.gather_nd(seq, idxs)
  33. def seq_maxpool(x):
  34. """seq是[None, seq_len, s_size]的格式,
  35. mask是[None, seq_len, 1]的格式,先除去mask部分,
  36. 然后再做maxpooling。
  37. """
  38. seq, mask = x
  39. seq -= (1 - mask) * 1e10
  40. return K.max(seq, 1, keepdims=True)
  41. def dilated_gated_conv1d(seq, mask, dilation_rate=1):
  42. """膨胀门卷积(残差式)
  43. """
  44. dim = K.int_shape(seq)[-1]
  45. h = Conv1D(dim*2, 3, padding='same', dilation_rate=dilation_rate)(seq)
  46. def _gate(x):
  47. dropout_rate = 0.2
  48. s, h = x
  49. g, h = h[:, :, :dim], h[:, :, dim:]
  50. g = K.in_train_phase(K.dropout(g, dropout_rate), g)
  51. g = K.sigmoid(g)
  52. return g * s + (1 - g) * h
  53. seq = Lambda(_gate)([seq, h])
  54. seq = Lambda(lambda x: x[0] * x[1])([seq, mask])
  55. return seq
  56. class OurLayer(Layer):
  57. """定义新的Layer,增加reuse方法,允许在定义Layer时调用现成的层
  58. """
  59. def reuse(self, layer, *args, **kwargs):
  60. if not layer.built:
  61. if len(args) > 0:
  62. inputs = args[0]
  63. else:
  64. inputs = kwargs['inputs']
  65. if isinstance(inputs, list):
  66. input_shape = [K.int_shape(x) for x in inputs]
  67. else:
  68. input_shape = K.int_shape(inputs)
  69. layer.build(input_shape)
  70. outputs = layer.call(*args, **kwargs)
  71. for w in layer.trainable_weights:
  72. if w not in self._trainable_weights:
  73. self._trainable_weights.append(w)
  74. for w in layer.non_trainable_weights:
  75. if w not in self._non_trainable_weights:
  76. self._non_trainable_weights.append(w)
  77. for u in layer.updates:
  78. if not hasattr(self, '_updates'):
  79. self._updates = []
  80. if u not in self._updates:
  81. self._updates.append(u)
  82. return outputs
  83. class OurBidirectional(OurLayer):
  84. """自己封装双向RNN,允许传入mask,保证对齐
  85. """
  86. def __init__(self, layer, **args):
  87. super(OurBidirectional, self).__init__(**args)
  88. self.forward_layer = layer.__class__.from_config(layer.get_config())
  89. self.backward_layer = layer.__class__.from_config(layer.get_config())
  90. self.forward_layer.name = 'forward_' + self.forward_layer.name
  91. self.backward_layer.name = 'backward_' + self.backward_layer.name
  92. def reverse_sequence(self, x, mask):
  93. """这里的mask.shape是[batch_size, seq_len, 1]
  94. """
  95. seq_len = K.round(K.sum(mask, 1)[:, 0])
  96. seq_len = K.cast(seq_len, 'int32')
  97. return tf.reverse_sequence(x, seq_len, seq_dim=1)
  98. def call(self, inputs):
  99. x, mask = inputs
  100. x_forward = self.reuse(self.forward_layer, x)
  101. x_backward = self.reverse_sequence(x, mask)
  102. x_backward = self.reuse(self.backward_layer, x_backward)
  103. x_backward = self.reverse_sequence(x_backward, mask)
  104. x = K.concatenate([x_forward, x_backward], -1)
  105. if K.ndim(x) == 3:
  106. return x * mask
  107. else:
  108. return x
  109. def compute_output_shape(self, input_shape):
  110. return input_shape[0][:-1] + (self.forward_layer.units * 2,)
  111. class Attention(Layer):
  112. """多头注意力机制
  113. """
  114. def __init__(self, nb_head, size_per_head, **kwargs):
  115. self.nb_head = nb_head
  116. self.size_per_head = size_per_head
  117. self.out_dim = nb_head * size_per_head
  118. super(Attention, self).__init__(**kwargs)
  119. def build(self, input_shape):
  120. super(Attention, self).build(input_shape)
  121. q_in_dim = input_shape[0][-1]
  122. k_in_dim = input_shape[1][-1]
  123. v_in_dim = input_shape[2][-1]
  124. self.q_kernel = self.add_weight(name='q_kernel',
  125. shape=(q_in_dim, self.out_dim),
  126. initializer='glorot_normal')
  127. self.k_kernel = self.add_weight(name='k_kernel',
  128. shape=(k_in_dim, self.out_dim),
  129. initializer='glorot_normal')
  130. self.v_kernel = self.add_weight(name='w_kernel',
  131. shape=(v_in_dim, self.out_dim),
  132. initializer='glorot_normal')
  133. def mask(self, x, mask, mode='mul'):
  134. if mask is None:
  135. return x
  136. else:
  137. for _ in range(K.ndim(x) - K.ndim(mask)):
  138. mask = K.expand_dims(mask, K.ndim(mask))
  139. if mode == 'mul':
  140. return x * mask
  141. else:
  142. return x - (1 - mask) * 1e10
  143. def call(self, inputs):
  144. q, k, v = inputs[:3]
  145. v_mask, q_mask = None, None
  146. if len(inputs) > 3:
  147. v_mask = inputs[3]
  148. if len(inputs) > 4:
  149. q_mask = inputs[4]
  150. # 线性变换
  151. qw = K.dot(q, self.q_kernel)
  152. kw = K.dot(k, self.k_kernel)
  153. vw = K.dot(v, self.v_kernel)
  154. # 形状变换
  155. qw = K.reshape(qw, (-1, K.shape(qw)[1], self.nb_head, self.size_per_head))
  156. kw = K.reshape(kw, (-1, K.shape(kw)[1], self.nb_head, self.size_per_head))
  157. vw = K.reshape(vw, (-1, K.shape(vw)[1], self.nb_head, self.size_per_head))
  158. # 维度置换
  159. qw = K.permute_dimensions(qw, (0, 2, 1, 3))
  160. kw = K.permute_dimensions(kw, (0, 2, 1, 3))
  161. vw = K.permute_dimensions(vw, (0, 2, 1, 3))
  162. # Attention
  163. a = K.batch_dot(qw, kw, [3, 3]) / self.size_per_head**0.5
  164. a = K.permute_dimensions(a, (0, 3, 2, 1))
  165. a = self.mask(a, v_mask, 'add')
  166. a = K.permute_dimensions(a, (0, 3, 2, 1))
  167. a = K.softmax(a)
  168. # 完成输出
  169. o = K.batch_dot(a, vw, [3, 2])
  170. o = K.permute_dimensions(o, (0, 2, 1, 3))
  171. o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim))
  172. o = self.mask(o, q_mask, 'mul')
  173. return o
  174. def compute_output_shape(self, input_shape):
  175. return (input_shape[0][0], input_shape[0][1], self.out_dim)
  176. def position_id(x):
  177. if isinstance(x, list) and len(x) == 2:
  178. x, r = x
  179. else:
  180. r = 0
  181. pid = K.arange(K.shape(x)[1])
  182. pid = K.expand_dims(pid, 0)
  183. pid = K.tile(pid, [K.shape(x)[0], 1])
  184. return K.abs(pid - K.cast(r, 'int32'))
  185. add_dict = load(os.path.dirname(__file__)+'/../relation_extraction/add_words_dict.pkl')
  186. add_words = ['<unk>','<company/org>','<location>','<phone>','<contact_person>']
  187. def get_words_matrix(words):
  188. model_w2v = getModel_w2v()
  189. if words in add_words:
  190. return add_dict[words]
  191. else:
  192. item_not_space = re.sub("\s*", "", words)
  193. if item_not_space in model_w2v.vocab:
  194. return model_w2v[item_not_space]
  195. else:
  196. return add_dict['<unk>']
  197. entity_type_dict = {
  198. 'org': '<company/org>',
  199. 'company': '<company/org>',
  200. 'location': '<location>',
  201. 'phone': '<phone>',
  202. 'person': '<contact_person>'
  203. }
  204. class Relation_extraction():
  205. def __init__(self,is_train=False):
  206. self.is_train = is_train
  207. # self.words_vocab = load(os.path.dirname(__file__)+'/../relation_extraction/words_vocab.pkl')
  208. # id2word = {i: j for i, j in enumerate(self.words_vocab)}
  209. # self.words2id = {j: i for i, j in id2word.items()}
  210. self.words_size = 128
  211. self.id2predicate = {
  212. 0: "rel_person", # 公司——联系人
  213. 1: "rel_phone", # 联系人——电话
  214. 2: "rel_address" # 公司——地址
  215. }
  216. self.predicate2id = dict({j: i for i, j in self.id2predicate.items()})
  217. self.num_classes = len(self.id2predicate)
  218. self.maxlen = 512
  219. # self.word2vec = None
  220. # if self.is_train:
  221. # self.word2vec = load('words2v_matrix.pkl')
  222. self.model_path = os.path.dirname(__file__)+'/../relation_extraction/models/my_best_model_oneoutput2.weights'
  223. self.get_model()
  224. if self.model_path:
  225. self.train_model.load_weights(self.model_path)
  226. def get_model(self):
  227. words_size = self.words_size
  228. t2_in = Input(shape=(None,words_size)) # 词向量
  229. t3_in = Input(shape=(None,)) # mask列表
  230. s1_in = Input(shape=(None,))
  231. k1_in = Input(shape=(1,))
  232. o1_in = Input(shape=(None, self.num_classes))
  233. t2, t3, s1, k1, o1 = t2_in, t3_in, s1_in, k1_in, o1_in
  234. mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(t3)
  235. pid = Lambda(position_id)(t2)
  236. position_embedding = Embedding(self.maxlen, words_size, embeddings_initializer='zeros')
  237. pv = position_embedding(pid)
  238. # t2 = Embedding(len(self.words2id), words_size, weights=[self.word2vec] if self.is_train else None, trainable=True,name="words_embedding")(t2)
  239. t = Add()([t2, pv])
  240. t = Dropout(0.25)(t)
  241. t = Lambda(lambda x: x[0] * x[1])([t, mask])
  242. if K.tensorflow_backend._get_available_gpus():
  243. # GPU训练时使用mask机制的双向RNN
  244. t = OurBidirectional(CuDNNGRU(64, return_sequences=True))([t,mask])
  245. else:
  246. # CPU预测时使用keras自带的双向RNN,不用mask
  247. t = Bidirectional(GRU(64,return_sequences=True,reset_after=True))(t)
  248. t_dim = K.int_shape(t)[-1]
  249. pn1 = Dense(words_size, activation='relu')(t)
  250. pn1 = Dense(1, activation='sigmoid')(pn1)
  251. h = Attention(8, 16)([t, t, t, mask])
  252. h = Concatenate()([t, h])
  253. h = Conv1D(words_size, 3, activation='relu', padding='same')(h)
  254. ps1 = Dense(1, activation='sigmoid')(h)
  255. ps1 = Lambda(lambda x: x[0] * x[1])([ps1, pn1])
  256. self.subject_model = Model([t2_in,t3_in], [ps1]) # 预测subject的模型
  257. t_max = Lambda(seq_maxpool)([t, mask])
  258. pc = Dense(words_size, activation='relu')(t_max)
  259. pc = Dense(self.num_classes, activation='sigmoid')(pc)
  260. def get_k_inter(x, n=6):
  261. seq, k1 = x
  262. # k_inter = [K.round(k1 * a + k2 * (1 - a)) for a in np.arange(n) / (n - 1.)]
  263. k_inter = [seq_gather([seq, k1])] * 2
  264. k_inter = [K.expand_dims(k, 1) for k in k_inter]
  265. k_inter = K.concatenate(k_inter, 1)
  266. return k_inter
  267. k = Lambda(get_k_inter, output_shape=(2, t_dim))([t, k1])
  268. if K.tensorflow_backend._get_available_gpus():
  269. k = Bidirectional(CuDNNGRU(t_dim))(k)
  270. else:
  271. k = Bidirectional(GRU(t_dim, reset_after=True))(k)
  272. k1v = position_embedding(Lambda(position_id)([t, k1]))
  273. kv = Concatenate()([k1v, k1v])
  274. k = Lambda(lambda x: K.expand_dims(x[0], 1) + x[1])([k, kv])
  275. h = Attention(8, 16)([t, t, t, mask])
  276. h = Concatenate()([t, h, k])
  277. h = Conv1D(words_size, 3, activation='relu', padding='same')(h)
  278. po = Dense(1, activation='sigmoid')(h)
  279. po1 = Dense(self.num_classes, activation='sigmoid')(h)
  280. po1 = Lambda(lambda x: x[0] * x[1] * x[2] * x[3])([po, po1, pc, pn1])
  281. self.object_model = Model([t2_in,t3_in,k1_in], [po1])
  282. train_model = Model([t2_in,t3_in, s1_in, k1_in, o1_in],
  283. [ps1, po1])
  284. # loss
  285. s1 = K.expand_dims(s1, 2)
  286. s1_loss = K.binary_crossentropy(s1, ps1)
  287. s1_loss = K.sum(s1_loss * mask) / K.sum(mask)
  288. o1_loss = K.sum(K.binary_crossentropy(o1, po1), 2, keepdims=True)
  289. o1_loss = K.sum(o1_loss * mask) / K.sum(mask)
  290. loss = s1_loss + o1_loss
  291. train_model.add_loss(loss)
  292. train_model.compile(optimizer=Adam(1e-3))
  293. # train_model.summary()
  294. self.train_model = train_model
  295. def extract_items(self,text_in, words, rate=0.5):
  296. text_words = text_in
  297. R = []
  298. # _t2 = [self.words2id.get(c, 1) for c in words]
  299. _t2 = np.zeros((len(words), self.words_size))
  300. for i in range(len(words)):
  301. _t2[i] = np.array(get_words_matrix(words[i]))
  302. _t2 = np.array([_t2])
  303. _t3 = [1 for _ in words]
  304. _t3 = np.array([_t3])
  305. _k1 = self.subject_model.predict([_t2,_t3])
  306. _k1 = _k1[0, :, 0]
  307. _k1 = np.where(_k1 > rate)[0]
  308. _subjects = []
  309. for i in _k1:
  310. _subject = text_in[i]
  311. _subjects.append((_subject, i, i))
  312. if _subjects:
  313. _t2 = np.repeat(_t2, len(_subjects), 0)
  314. _t3 = np.repeat(_t3, len(_subjects), 0)
  315. _k1, _ = np.array([_s[1:] for _s in _subjects]).T.reshape((2, -1, 1))
  316. _o1 = self.object_model.predict([_t2,_t3,_k1])
  317. for i, _subject in enumerate(_subjects):
  318. _oo1 = np.where(_o1[i] > 0.5)
  319. for _ooo1, _c1 in zip(*_oo1):
  320. _object = text_in[_ooo1]
  321. _predicate = self.id2predicate[_c1]
  322. R.append((_subject[0], _predicate, _object))
  323. return R
  324. else:
  325. return []
  326. def predict(self,text, words):
  327. res = self.extract_items(text,words)
  328. return res
  329. @staticmethod
  330. def get_predata(entity_list,list_sentence):
  331. list_sentence = sorted(list_sentence, key=lambda x: x.sentence_index)
  332. entity_list = sorted(entity_list,key=lambda x:(x.sentence_index,x.begin_index))
  333. pre_data = []
  334. text_data = []
  335. last_sentence_index = -1
  336. for key, group in groupby(entity_list,key=lambda x:x.sentence_index):
  337. if key-last_sentence_index>1:
  338. for i in range(last_sentence_index+1,key):
  339. pre_data.extend(list_sentence[i].tokens)
  340. text_data.extend([0]*len(list_sentence[i].tokens))
  341. group = list(group)
  342. for i in range(len(group)):
  343. ent = group[i]
  344. _tokens = list_sentence[key].tokens
  345. if i==len(group)-1:
  346. if i==0:
  347. pre_data.extend(_tokens[:ent.begin_index])
  348. text_data.extend([0]*len(_tokens[:ent.begin_index]))
  349. pre_data.append(entity_type_dict[ent.entity_type])
  350. text_data.append(ent)
  351. pre_data.extend(_tokens[ent.end_index+1:])
  352. text_data.extend([0]*len(_tokens[ent.end_index+1:]))
  353. break
  354. else:
  355. pre_data.append(entity_type_dict[ent.entity_type])
  356. text_data.append(ent)
  357. pre_data.extend(_tokens[ent.end_index+1:])
  358. text_data.extend([0]*len(_tokens[ent.end_index+1:]))
  359. break
  360. if i==0:
  361. pre_data.extend(_tokens[:ent.begin_index])
  362. text_data.extend([0] * len(_tokens[:ent.begin_index]))
  363. pre_data.append(entity_type_dict[ent.entity_type])
  364. text_data.append(ent)
  365. pre_data.extend(_tokens[ent.end_index+1:group[i+1].begin_index])
  366. text_data.extend([0] * len(_tokens[ent.end_index+1:group[i+1].begin_index]))
  367. else:
  368. pre_data.append(entity_type_dict[ent.entity_type])
  369. text_data.append(ent)
  370. pre_data.extend(_tokens[ent.end_index+1:group[i + 1].begin_index])
  371. text_data.extend([0] * len(_tokens[ent.end_index+1:group[i+1].begin_index]))
  372. last_sentence_index = key
  373. return text_data,pre_data
  374. def save_model():
  375. graph = tf.Graph()
  376. with graph.as_default() as graph:
  377. with tf.Session(graph=graph).as_default() as sess:
  378. test_model = Relation_extraction()
  379. tf.saved_model.simple_save(sess,
  380. "models2/object_model/",
  381. inputs={"input0": test_model.object_model.input[0],
  382. "input1": test_model.object_model.input[1],
  383. "input2": test_model.object_model.input[2]},
  384. outputs={"outputs": test_model.object_model.output})
  385. tf.saved_model.simple_save(sess,
  386. "models2/subject_model/",
  387. inputs={"input0": test_model.subject_model.input[0],
  388. "input1": test_model.subject_model.input[1]},
  389. outputs={"outputs": test_model.subject_model.output})
  390. if __name__ == '__main__':
  391. test_model = Relation_extraction()
  392. test_model.train_model.summary()
  393. print("object_model=====================")
  394. test_model.object_model.summary()
  395. print("subject_model=======================")
  396. test_model.subject_model.summary()
  397. # save_model()
  398. # ['<pad>','<unk>','<company/org>','<location>','<phone>','<contact_person>']
  399. # add_words = ['<unk>','<company/org>','<location>','<phone>','<contact_person>']
  400. # add_dict = dict()
  401. # for layer in test_model.train_model.layers:
  402. # if layer.name=="words_embedding":
  403. # save(layer.get_weights()[0],"trained_words.pkl")
  404. # for i,j in zip(add_words,layer.get_weights()[0][1:6]):
  405. # add_dict[i] = j
  406. # print(i,'\n',j)
  407. # print(layer.get_weights()[0][1:6])
  408. # save(add_dict,"add_words_dict.pkl")
  409. text_in = "索引||号||:||014583788||/||2018-00038||,||成文||日期||:||2018-11-19||,||关于||国家税务总局都昌县税务局||办公楼||七||楼||会议室||维修||改造||项目||综合||比价||成交||公告||,||关于||国家税务总局都昌县税务局||办公楼七楼会议室||维修||改造||项目||(||比价||编号||:||JXXL2018-JJ-DC001||)||综合||比价||成交||公告||,||江西新立建设管理有限公司九江分公司||受||国家税务总局都昌县税务局||委托||,||就||其||办公楼||七||楼||会议室||维修||改造||项目||(||控制||价||:||294788.86||元||)||进行||综合||比价||方式||,||比价||活动||于||2018年||11月||16日||15:30||在||都昌县万里大道和平宾馆旁三楼||江西新立建设管理有限公司九江分公司||进行||,||经||比价||小组||评审||,||比价人||确定||,||现||将||比价||结果||公式||如下||:||序号||:||1||,||比价||编号||,||JXXL2018-JJ-DC001||,||项目||内容||名称||,||都昌县税务局||办公楼||七||楼||会议室||维修||改造||项目||,||数量||:||1||,||成交||供应商||名称||,||江西芙蓉建筑工程有限公司||,||成交价||(||元||)||,||284687.67||。||一||、||比价||小组||成员||:||杨忠辉||李燕杨瑾||,||本||公告||自||发布||之||日||起||1||个||工作日||内||若||无||异议||,||将||向||中标人||发出||《||成交||通知书||》||,||二||、||联系||方式||,||单位||:||国家税务总局都昌县税务局||,||比价||代理||机构||:||江西新立建设管理有限公司九江分公司||,||联系人||:||詹女士||,||电话||:||15979976088||,||江西新立建设管理有限公司九江分公司"
  410. words = "索引||号||:||014583788||/||2018-00038||,||成文||日期||:||2018-11-19||,||关于||国家税务总局都昌县税务局||" \
  411. "办公楼||七||楼||会议室||维修||改造||项目||综合||比价||成交||公告||,||关于||国家税务总局都昌县税务局||办公楼七楼会议室||" \
  412. "维修||改造||项目||(||比价||编号||:||JXXL2018-JJ-DC001||)||综合||比价||成交||公告||,||<company/org>||" \
  413. "受||国家税务总局都昌县税务局||委托||,||就||其||办公楼||七||楼||会议室||维修||改造||项目||(||控制||价||:||294788.86||元||)||" \
  414. "进行||综合||比价||方式||,||比价||活动||于||2018年||11月||16日||15:30||在||都昌县万里大道和平宾馆旁三楼||<company/org>||" \
  415. "进行||,||经||比价||小组||评审||,||比价人||确定||,||现||将||比价||结果||公式||如下||:||序号||:||1||,||比价||编号||," \
  416. "||JXXL2018-JJ-DC001||,||项目||内容||名称||,||都昌县税务局||办公楼||七||楼||会议室||维修||改造||项目||,||数量||:||1||,||成交||" \
  417. "供应商||名称||,||<company/org>||,||成交价||(||元||)||,||284687.67||。||一||、||比价||小组||成员||:||杨忠辉||李燕杨瑾||," \
  418. "||本||公告||自||发布||之||日||起||1||个||工作日||内||若||无||异议||,||将||向||中标人||发出||《||成交||通知书||》||,||二||、||联系||方式||," \
  419. "||单位||:||<company/org>||,||比价||代理||机构||:||<company/org>||,||联系人||:||<contact_person>||,||电话||:||<phone>||,||江西新立建设管理有限公司九江分公司"
  420. # text_in = "索引"
  421. # words = "索引"
  422. # res = test_model.predict(text_in.split("||"),words.split("||"))
  423. # print(res)
  424. # print(test_model.predict(text_in.split("||"),words.split("||")))