Source code for amber.modeler.kerasModeler

from abc import ABC

import tensorflow.keras as keras
from ..architect import Operation
from .dag import get_layer
import numpy as np
from .enasModeler import ModelBuilder
import tensorflow as tf
from .architectureDecoder import MultiIOArchitecture, ResConvNetArchitecture
from tensorflow.keras.layers import Concatenate, Add, Dense, Conv1D, MaxPooling1D, AveragePooling1D, \
    GlobalAveragePooling1D, Flatten, BatchNormalization, LeakyReLU, Dropout, Activation, Lambda
from tensorflow.keras import regularizers
from tensorflow.keras import constraints
from tensorflow.keras.models import Model
import copy
from ..architect.modelSpace import BranchedModelSpace


[docs]class KerasModelBuilder(ModelBuilder): def __init__(self, inputs_op, output_op, model_compile_dict, model_space=None, gpus=None, **kwargs): self.model_compile_dict = model_compile_dict self.input_node = inputs_op self.output_node = output_op self.model_space = model_space self.gpus = gpus def __call__(self, model_states): if self.gpus is None or self.gpus == 1: model = build_sequential_model( model_states=model_states, input_state=self.input_node, output_state=self.output_node, model_compile_dict=self.model_compile_dict, model_space=self.model_space ) elif type(self.gpus) is int: model = build_multi_gpu_sequential_model( model_states=model_states, input_state=self.input_node, output_state=self.output_node, model_compile_dict=self.model_compile_dict, model_space=self.model_space, gpus=self.gpus ) elif type(self.gpus) is list: mirrored_strategy = tf.distribute.MirroredStrategy(devices=self.gpus) with mirrored_strategy.scope(): model = build_sequential_model( model_states=model_states, input_state=self.input_node, output_state=self.output_node, model_compile_dict=self.model_compile_dict, model_space=self.model_space ) return model
[docs]class KerasBranchModelBuilder(ModelBuilder): def __init__(self, inputs_op, output_op, model_compile_dict, model_space=None, with_bn=False, **kwargs): assert type(model_space) is BranchedModelSpace assert len(inputs_op) == len(model_space.subspaces[0]) self.inputs_op = inputs_op self.output_op = output_op self.model_space = model_space self.model_compile_dict = model_compile_dict self.with_bn = with_bn self._branch_to_layer = self.model_space.branch_to_layer def _build_branch(self, input_op, model_states, model_space): if issubclass(type(input_op), Operation): inp = get_layer(None, input_op) else: inp = input_op x = inp assert len(model_states) > 0 for i, state in enumerate(model_states): if issubclass(type(state), Operation): x = get_layer(x, state) elif issubclass(type(state), int) or np.issubclass_(type(state), np.integer): assert model_space is not None, "if provided integer model_arc, must provide model_space in kwargs" x = get_layer(x, model_space[i][state], with_bn=self.with_bn) else: raise Exception("cannot understand %s of type %s" % (state, type(state))) return inp, x def __call__(self, model_states, **kwargs): inps = [] branches = [] # build branch sequentially for i in range(len(self.inputs_op)): inp, out = self._build_branch( input_op=self.inputs_op[i], model_states=[model_states[j] for j in self._branch_to_layer[(0, i)]], model_space=self.model_space.subspaces[0][i] ) inps.append(inp) branches.append(out) # merge branches if self.model_space.concat_op == 'concatenate': branch_merge = get_layer(x=branches, state=Operation('concatenate')) else: raise ValueError('Model builder cannot understand model space concat op: %s' % self.model_space.conat_op) # build stem _, h = self._build_branch( input_op=branch_merge, model_states=[model_states[j] for j in self._branch_to_layer[(1, None)]], model_space=self.model_space.subspaces[1] ) out = get_layer(x=h, state=self.output_op) model = Model(inputs=inps, outputs=out) model.compile(**self.model_compile_dict) return model
[docs]class KerasResidualCnnBuilder(ModelBuilder): """Function class for converting an architecture sequence tokens to a Keras model Parameters ---------- inputs_op : amber.architect.modelSpace.Operation output_op : amber.architect.modelSpace.Operation fc_units : int number of units in the fully-connected layer flatten_mode : {'GAP', 'Flatten'} the flatten mode to convert conv layers to fully-connected layers. model_compile_dict : dict model_space : amber.architect.modelSpace.ModelSpace dropout_rate : float dropout rate, must be 0<dropout_rate<1 wsf : int width scale factor """ def __init__(self, inputs_op, output_op, fc_units, flatten_mode, model_compile_dict, model_space, dropout_rate=0.2, wsf=1, add_conv1_under_pool=True, verbose=1, **kwargs): self.model_compile_dict = model_compile_dict self.inputs = inputs_op self.outputs = output_op self.fc_units = fc_units self.verbose = verbose assert flatten_mode.lower() in {'gap', 'flatten'}, "Unknown flatten mode: %s" % flatten_mode self.flatten_mode = flatten_mode.lower() self.model_space = model_space self.dropout_rate = dropout_rate self.wsf = wsf self.add_conv1_under_pool = add_conv1_under_pool self.decoder = ResConvNetArchitecture(model_space=model_space) def __call__(self, model_states): model = self._convert(model_states, verbose=self.verbose) if model is not None: model.compile(**self.model_compile_dict) return model def _convert(self, arc_seq, verbose=True): out_filters, pool_layers = self.get_out_filters(self.model_space) inp = get_layer(x=None, state=self.inputs) # this is assuming all choices have the same out_filters stem_conv = Operation('conv1d', kernel_size=8, filters=out_filters[0], activation="linear") x = self.res_layer(stem_conv, self.wsf, inp, name="stem_conv", add_conv1_under_pool=self.add_conv1_under_pool) start_idx = 0 layers = [] for layer_id in range(len(self.model_space)): if verbose: print("start_idx=%i, layer id=%i, out_filters=%i x %i" % ( start_idx, layer_id, out_filters[layer_id], self.wsf)) count = arc_seq[start_idx] this_layer = self.model_space[layer_id][count] if verbose: print(this_layer) if layer_id == 0: x = self.res_layer(this_layer, self.wsf, x, name="L%i" % layer_id, add_conv1_under_pool=self.add_conv1_under_pool) else: x = self.res_layer(this_layer, self.wsf, layers[-1], name="L%i" % layer_id, add_conv1_under_pool=self.add_conv1_under_pool) if layer_id > 0: skip = arc_seq[start_idx + 1: start_idx + layer_id + 1] skip_layers = [layers[i] for i in range(len(layers)) if skip[i] == 1] if verbose: print("skip=%s" % skip) if len(skip_layers): skip_layers.append(x) x = Add(name="L%i_resAdd" % layer_id)(skip_layers) x = BatchNormalization(name="L%i_resBn" % layer_id)(x) if self.dropout_rate != 0: x = Dropout(self.dropout_rate, name="L%i_dropout" % layer_id)(x) layers.append(x) if layer_id in pool_layers: pooled_layers = [] for i, layer in enumerate(layers): pooled_layers.append( self.factorized_reduction_layer( layer, out_filters[layer_id + 1] * self.wsf, name="pool_at_%i_from_%i" % (layer_id, i)) ) if verbose: print("pooled@%i, %s" % (layer_id, pooled_layers)) layers = pooled_layers start_idx += 1 + layer_id if verbose: print('-' * 80) # fully-connected layer if self.flatten_mode == 'gap': x = GlobalAveragePooling1D()(x) elif self.flatten_mode == 'flatten': x = Flatten()(x) else: raise Exception("Unknown flatten mode: %s" % self.flatten_mode) if self.dropout_rate != 0: x = Dropout(self.dropout_rate)(x) x = Dense(units=self.fc_units, activation="relu")(x) out = get_layer(x=x, state=self.outputs) model = Model(inputs=inp, outputs=out) return model
[docs] @staticmethod def factorized_reduction_layer(inp, out_filter, name, reduction_factor=4): x = Conv1D(out_filter, kernel_size=1, strides=1, kernel_initializer='he_normal', use_bias=False, padding="same", name=name )(inp) x = MaxPooling1D(pool_size=reduction_factor, strides=reduction_factor, padding="same")(x) return x
[docs] @staticmethod def res_layer(layer, width_scale_factor, inputs, l2_reg=5e-7, name="layer", add_conv1_under_pool=True): if layer.Layer_type == 'conv1d': activation = layer.Layer_attributes['activation'] num_filters = width_scale_factor * layer.Layer_attributes['filters'] kernel_size = layer.Layer_attributes['kernel_size'] if 'dilation' in layer.Layer_attributes: dilation = layer.Layer_attributes['dilation'] else: dilation = 1 x = Conv1D(num_filters, kernel_size=kernel_size, strides=1, padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(l2_reg), kernel_constraint=constraints.max_norm(0.9), use_bias=False, name="%s_conv" % name if dilation == 1 else "%s_conv_d%i" % (name, dilation), dilation_rate=dilation )(inputs) x = BatchNormalization(name="%s_bn" % name)(x) if activation in ("None", "linear"): pass elif activation in ("relu", "sigmoid", "tanh", "softmax", "elu"): x = Activation(activation, name="%s_%s" % (name, activation))(x) elif activation == "leaky_relu": x = LeakyReLU(alpha=0.2, name="%s_%s" % (name, activation))(x) else: raise Exception("Unknown activation: %s" % activation) elif layer.Layer_type == 'maxpool1d' or layer.Layer_type == 'avgpool1d': num_filters = width_scale_factor * layer.Layer_attributes['filters'] pool_size = layer.Layer_attributes['pool_size'] if add_conv1_under_pool: x = Conv1D(num_filters, kernel_size=1, strides=1, padding='same', kernel_initializer='he_normal', use_bias=False, name="%s_maxpool_conv" % name )(inputs) x = BatchNormalization(name="%s_bn" % name)(x) x = Activation("relu", name="%s_relu" % name)(x) else: x = inputs if layer.Layer_type == 'maxpool1d': x = MaxPooling1D(pool_size=pool_size, strides=1, padding='same', name="%s_maxpool" % name)(x) elif layer.Layer_type == 'avgpool1d': x = AveragePooling1D(pool_size=pool_size, strides=1, padding='same', name="%s_avgpool" % name)(x) else: raise Exception("Unknown pool: %s" % layer.Layer_type) elif layer.Layer_type == 'identity': x = Lambda(lambda t: t, name="%s_id" % name)(inputs) else: raise Exception("Unknown type: %s" % layer.Layer_type) return x
[docs] @staticmethod def get_out_filters(model_space): out_filters = [] pool_layers = [] for layer_id in range(len(model_space)): layer = model_space[layer_id] this_out_filters = [l.Layer_attributes['filters'] for l in layer] assert len( set(this_out_filters)) == 1, "EnasConv1dDAG only supports one identical number of filters per layer," \ "but found %i in layer %s" % (len(set(this_out_filters)), layer) if len(out_filters) and this_out_filters[0] != out_filters[-1]: pool_layers.append(layer_id - 1) out_filters.append(this_out_filters[0]) # print(out_filters) # print(pool_layers) return out_filters, pool_layers
[docs]class KerasMultiIOModelBuilder(ModelBuilder): """ Note: Still not working if num_outputs=0 """ def __init__(self, inputs_op, output_op, model_compile_dict, model_space, with_input_blocks, with_output_blocks, dropout_rate=0.2, wsf=1, **kwargs): self.model_compile_dict = model_compile_dict self.inputs = inputs_op self.outputs = output_op self.model_space = model_space self.num_inputs = len(inputs_op) if type(inputs_op) in (list, tuple) else 0 self.num_outputs = len(output_op) if type(output_op) in (list, tuple) else 0 assert not (self.num_inputs==0 & self.num_outputs==0), "MultiIO cannot have single input and single output at " \ "the same time " self.with_input_blocks = with_input_blocks self.with_output_blocks = with_output_blocks if self.with_input_blocks: assert self.num_inputs > 0, "you specified with_input_blocks=True for " \ "KerasMultiIOModelBuilder, but only provided 1 " \ "num_inputs " self.decoder = MultiIOArchitecture(num_layers=len(self.model_space), num_inputs=self.num_inputs*self.with_input_blocks, num_outputs=self.num_outputs*self.with_output_blocks) def __call__(self, model_states): model = self._convert(model_states) if model is not None: model.compile(**self.model_compile_dict) return model def _convert(self, arc_seq, with_bn=True, wsf=1): inputs = [get_layer(x=None, state=x) for x in self.inputs] if self.num_inputs>0 else [get_layer(x=None, state=self.inputs)] op, inp, skp, out = self.decoder.decode(arc_seq) out_rowsum = np.apply_along_axis(np.sum, 1, out) out_colsum = np.apply_along_axis(np.sum, 0, out) skp_rowsum = np.array([1] + [sum(x) for x in skp]) with_input_blocks = self.with_input_blocks # missing output connection if any(out_rowsum==0): print("invalid model: unconnected output") return None # missing output with skip connection if self.with_input_blocks is False and any( (skp_rowsum==0)&(out_colsum!=0) ): print("invalid model: output connected to layer with no input") return None # Build the model until outputs prev_layers = [] for layer_id in range(len(self.model_space)): this_op = op[layer_id] # Prepare the inputs if with_input_blocks: this_inputs = [inputs[i] for i in np.where(inp[layer_id])[0]] else: this_inputs = inputs if layer_id == 0 else [] if layer_id > 0: this_inputs += [ prev_layers[i] for i in np.where(skp[layer_id-1])[0] if prev_layers[i] is not None ] # Connect tensors model_op = copy.deepcopy(self.model_space[layer_id][this_op]) if 'units' in model_op.Layer_attributes: model_op.Layer_attributes['units'] *= wsf elif 'filters' in model_op.Layer_attributes: model_op.Layer_attributes['filters'] *= wsf else: raise Exception("Cannot use wsf") if len(this_inputs) > 1: input_tensor = Concatenate()(this_inputs) layer = get_layer(x=input_tensor, state=model_op, with_bn=with_bn) prev_layers.append(layer) elif len(this_inputs) == 1: input_tensor = this_inputs[0] layer = get_layer(x=input_tensor, state=model_op, with_bn=with_bn) prev_layers.append(layer) else: prev_layers.append(None) # skipped a layer # Build the outputs outputs_inputs = [] for m, o in enumerate(out): idx = [i for i in np.where(o)[0] if prev_layers[i] is not None] if len(idx) > 1: outputs_inputs.append( Concatenate()([prev_layers[i] for i in idx]) ) elif len(idx) == 1: outputs_inputs.append(prev_layers[idx[0]] ) else: #raise Exception("Unconnected output %i"%m) print("Secondary unconnected output %i"%m) return None outputs = [get_layer(x=outputs_inputs[i], state=self.outputs[i]) for i in range(self.num_outputs) ] model = Model(inputs=inputs, outputs=outputs) return model
[docs]def build_sequential_model(model_states, input_state, output_state, model_compile_dict, **kwargs): """ Parameters ---------- model_states: a list of _operators sampled from operator space input_state: output_state: specifies the output tensor, e.g. Dense(1, activation='sigmoid') model_compile_dict: a dict of `loss`, `optimizer` and `metrics` Returns --------- Keras.Model """ inp = get_layer(None, input_state) x = inp model_space = kwargs.pop("model_space", None) for i, state in enumerate(model_states): if issubclass(type(state), Operation): x = get_layer(x, state) elif issubclass(type(state), int) or np.issubclass_(type(state), np.integer): assert model_space is not None, "if provided integer model_arc, must provide model_space in kwargs" x = get_layer(x, model_space[i][state]) else: raise Exception("cannot understand %s of type %s" % (state, type(state))) out = get_layer(x, output_state) model = Model(inputs=inp, outputs=out) if not kwargs.pop('stop_compile', False): model.compile(**model_compile_dict) return model
[docs]def build_multi_gpu_sequential_model(model_states, input_state, output_state, model_compile_dict, gpus=4, **kwargs): try: from tensorflow.keras.utils import multi_gpu_model except Exception as e: raise Exception("multi gpu not supported in keras. check your version. Error: %s" % e) with tf.device('/cpu:0'): vanilla_model = build_sequential_model(model_states, input_state, output_state, model_compile_dict, stop_compile=True, **kwargs) model = multi_gpu_model(vanilla_model, gpus=gpus) model.compile(**model_compile_dict) return model
[docs]def build_sequential_model_from_string(model_states_str, input_state, output_state, state_space, model_compile_dict): """build a sequential model from a string of states """ assert len(model_states_str) == len(state_space) str_to_state = [[str(state) for state in state_space[i]] for i in range(len(state_space))] try: model_states = [state_space[i][str_to_state[i].index(model_states_str[i])] for i in range(len(state_space))] except ValueError: raise Exception("model_states_str not found in state-space") return build_sequential_model(model_states, input_state, output_state, model_compile_dict)
[docs]def build_multi_gpu_sequential_model_from_string(model_states_str, input_state, output_state, state_space, model_compile_dict): """build a sequential model from a string of states """ assert len(model_states_str) == len(state_space) str_to_state = [[str(state) for state in state_space[i]] for i in range(len(state_space))] try: model_states = [state_space[i][str_to_state[i].index(model_states_str[i])] for i in range(len(state_space))] except ValueError: raise Exception("model_states_str not found in state-space") return build_multi_gpu_sequential_model(model_states, input_state, output_state, model_compile_dict)