luojiehua
/
BIDI_ML_INFO_EXTRACTION


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
							import os
import sys
import tensorflow as tf
from keras.callbacks import Callback
from keras.layers import Layer, warnings
import numpy as np
sys.path.append(os.path.dirname(__file__))
from pre_process import get_best_padding_size


class BatchReshape1(Layer):
    """
    将表格的行列维度合并到Batch维度中
    (batch, rows, cols, character_num, character_embed) -> (batch*rows*cols, character_num, character_embed)
    """

    def __init__(self, character_num, character_embed):
        super(BatchReshape1, self).__init__()
        self.character_num = character_num
        self.character_embed = character_embed

    def call(self, inputs, mask=None, **kwargs):
        batch = tf.shape(inputs)[0]
        height = tf.shape(inputs)[1]
        width = tf.shape(inputs)[2]

        outputs = tf.reshape(inputs, (batch*height*width,
                                      self.character_num, self.character_embed))
        return outputs

    def compute_output_shape(self, input_shape):
        return None, self.character_num, self.character_embed


class BatchReshape2(Layer):
    """
    将Batch维度中的行列拆分出来
    (batch*rows*cols, cell_embed) -> (batch, rows, cols, cell_embed)
    """

    def __init__(self, cell_embed):
        super(BatchReshape2, self).__init__()
        self.cell_embed = cell_embed

    def call(self, inputs, mask=None, **kwargs):
        input1 = inputs[0]
        input2 = inputs[1]

        batch = tf.shape(input1)[0]
        height = tf.shape(input1)[1]
        width = tf.shape(input1)[2]

        outputs = tf.reshape(input2, (batch, height, width, self.cell_embed))
        return outputs

    def compute_output_shape(self, input_shape):
        return None, None, None, self.cell_embed


class BatchReshape3(Layer):
    """
    将表格的行维度合并到Batch维度中
    (batch, rows, cols, cell_embed) -> (batch*rows, cols, cell_embed)
    """

    def __init__(self, cell_embed):
        super(BatchReshape3, self).__init__()
        self.cell_embed = cell_embed

    def call(self, inputs, mask=None, **kwargs):
        batch = tf.shape(inputs)[0]
        height = tf.shape(inputs)[1]
        width = tf.shape(inputs)[2]

        outputs = tf.reshape(inputs, (batch*height, width, self.cell_embed))
        return outputs

    def compute_output_shape(self, input_shape):
        return None, None, self.cell_embed


class BatchReshape4(Layer):
    """
    将Batch维度中的行拆出来
    (batch*rows, cols, cell_embed) -> (batch, rows, cols, cell_embed)
    """

    def __init__(self, cell_embed):
        super(BatchReshape4, self).__init__()
        self.supports_masking = True
        self.cell_embed = cell_embed

    def compute_mask(self, inputs, mask=None):
        print(mask)
        # if mask[0] is None:
        #     return mask

        # input1 = inputs[0]
        # input2 = inputs[1]
        # batch = tf.shape(input1)[0]
        # height = tf.shape(input1)[1]
        # width = tf.shape(input1)[2]
        #
        # mask_tensor = tf.reshape(mask[1], (batch, height, width, self.cell_embed))
        return mask

    def call(self, inputs, mask=None, **kwargs):
        input1 = inputs[0]
        input2 = inputs[1]

        batch = tf.shape(input1)[0]
        height = tf.shape(input1)[1]
        width = tf.shape(input1)[2]

        outputs = tf.reshape(input2, (batch, height, width, self.cell_embed))
        return outputs

    def compute_output_shape(self, input_shape):
        return None, None, None, self.cell_embed


class BatchReshape5(Layer):
    """
    将表格的行维度合并到Batch维度中
    (batch, rows, cols, cell_embed) -> (batch, rows*cols, cell_embed)
    """

    def __init__(self, cell_embed):
        super(BatchReshape5, self).__init__()
        self.cell_embed = cell_embed

    def call(self, inputs, mask=None, **kwargs):
        batch = tf.shape(inputs)[0]
        height = tf.shape(inputs)[1]
        width = tf.shape(inputs)[2]

        outputs = tf.reshape(inputs, (batch, height*width, self.cell_embed))
        return outputs

    def compute_output_shape(self, input_shape):
        return None, None, self.cell_embed


class BatchReshape6(Layer):
    """
    将Batch维度中的行拆出来
    (batch, rows*cols, cell_embed) -> (batch, rows, cols, cell_embed)
    """

    def __init__(self, cell_embed):
        super(BatchReshape6, self).__init__()
        self.cell_embed = cell_embed

    def call(self, inputs, mask=None, **kwargs):
        input1 = inputs[0]
        input2 = inputs[1]

        batch = tf.shape(input1)[0]
        height = tf.shape(input1)[1]
        width = tf.shape(input1)[2]

        outputs = tf.reshape(input2, (batch, height, width, self.cell_embed))
        return outputs

    def compute_output_shape(self, input_shape):
        return None, None, None, self.cell_embed


class MyPadding(Layer):
    def __init__(self, pad_height, pad_width, cell_embed):
        super(MyPadding, self).__init__()
        self.pad_height = pad_height
        self.pad_width = pad_width
        self.cell_embed = cell_embed

    def call(self, inputs, mask=None, **kwargs):
        batch = tf.shape(inputs)[0]
        height = tf.shape(inputs)[1]
        width = tf.shape(inputs)[2]

        outputs = tf.pad(inputs, [[0, 0],
                                  [0, self.pad_height - height],
                                  [0, self.pad_width - width],
                                  [0, 0]])

        return outputs

    def compute_output_shape(self, input_shape):
        return None, None, None, self.cell_embed


class MySplit(Layer):
    def __init__(self, height, width, **kwargs):
        super(MySplit, self).__init__(**kwargs)
        self.height = height
        self.width = width

    def call(self, inputs, mask=None, **kwargs):
        outputs = inputs[:, 0:self.height, 0:self.width]
        return outputs

    def compute_output_shape(self, input_shape):
        return None, None, None


class MyModelCheckpoint(Callback):
    def __init__(self, filepath, monitor='val_loss', verbose=0,
                 save_best_only=False, save_weights_only=False,
                 mode='auto', period=1):
        super(MyModelCheckpoint, self).__init__()
        self.monitor = monitor
        self.verbose = verbose
        self.filepath = filepath
        self.save_best_only = save_best_only
        self.save_weights_only = save_weights_only
        self.period = period
        self.epochs_since_last_save = 0

        if mode not in ['auto', 'min', 'max']:
            warnings.warn('ModelCheckpoint mode %s is unknown, '
                          'fallback to auto mode.' % (mode),
                          RuntimeWarning)
            mode = 'auto'

        if mode == 'min':
            self.monitor_op = np.less
            self.best = np.Inf
        elif mode == 'max':
            self.monitor_op = np.greater
            self.best = -np.Inf
        else:
            if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
                self.monitor_op = np.greater
                self.best = -np.Inf
            else:
                self.monitor_op = np.less
                self.best = np.Inf

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        self.epochs_since_last_save += 1
        if self.epochs_since_last_save >= self.period:
            self.epochs_since_last_save = 0
            filepath = self.filepath.format(epoch=epoch + 1, **logs)
            if self.save_best_only:
                current = (logs.get(self.monitor[0]) + logs.get(self.monitor[1])) / 2
                if current is None:
                    warnings.warn('Can save best model only with %s available, '
                                  'skipping.' % (self.monitor), RuntimeWarning)
                else:
                    if self.monitor_op(current, self.best):
                        if self.verbose > 0:
                            print('\nEpoch %05d: %s improved from %0.5f to %0.5f,'
                                  ' saving model to %s'
                                  % (epoch + 1, self.monitor, self.best,
                                     current, filepath))
                        self.best = current
                        if self.save_weights_only:
                            self.model.save_weights(filepath, overwrite=True)
                        else:
                            self.model.save(filepath, overwrite=True)
                    else:
                        if self.verbose > 0:
                            print('\nEpoch %05d: %s did not improve from %0.5f' %
                                  (epoch + 1, self.monitor, self.best))
            else:
                if self.verbose > 0:
                    print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath))
                if self.save_weights_only:
                    self.model.save_weights(filepath, overwrite=True)
                else:
                    self.model.save(filepath, overwrite=True)